root/cron/parse_builds_3_sites.py

Revision 17:e12c0730b53f, 1.3 kB (checked in by Dan Blankenberg <dan@bx.psu.edu>, 2 years ago)

Update cron scripts to reflect the fact that ElementTree? is now an egg.

Line 
1 #!/usr/bin/env python
2 """
3 Connects to sites and determines which builds are available at each.
4 """
5
6 import sys
7 import urllib
8 import pkg_resources; pkg_resources.require( "elementtree" )
9 from elementtree import ElementTree
10
11 sites = ['http://genome.ucsc.edu/cgi-bin/',
12         'http://archaea.ucsc.edu/cgi-bin/',
13         'http://genome-test.cse.ucsc.edu/cgi-bin/'
14 ]
15 names = ['main',
16         'archaea',
17         'test'
18 ]
19
20 def main():
21     for i in range(len(sites)):
22         site = sites[i]+"das/dsn"
23         trackurl = sites[i]+"hgTracks?"
24         builds = []
25         try:
26             page = urllib.urlopen(site)
27         except:
28             print "#Unable to connect to " + site
29             continue
30         text = page.read()
31         try:
32             tree = ElementTree.fromstring(text)
33         except:
34             print "#Invalid xml passed back from " + site
35             continue
36         print "#Harvested from",site
37        
38         for dsn in tree:
39             build = dsn.find("SOURCE").attrib['id']
40             builds.append(build)
41             build_dict = {}
42         for build in builds:
43             build_dict[build]=0
44             builds = build_dict.keys()
45         yield [names[i],trackurl,builds]
46
47 if __name__ == "__main__":
48     for site in main():
49         print site[0]+"\t"+site[1]+"\t"+",".join(site[2])
Note: See TracBrowser for help on using the browser.