root/cron/parse_builds.py

Revision 17:e12c0730b53f, 1.5 kB (checked in by Dan Blankenberg <dan@bx.psu.edu>, 2 years ago)

Update cron scripts to reflect the fact that ElementTree? is now an egg.

Line 
1 #!/usr/bin/env python
2
3 """
4 Connects to the URL specified and outputs builds available at that
5 DSN in tabular format.  USCS Test gateway is used as default.
6 build   description
7 """
8
9 import sys
10 import urllib
11 import pkg_resources; pkg_resources.require( "elementtree" )
12 from elementtree import ElementTree
13
14 URL = "http://genome-test.cse.ucsc.edu/cgi-bin/das/dsn"
15
16 def getbuilds(url):
17     try:
18         page = urllib.urlopen(URL)
19     except:
20         print "#Unable to open " + URL
21         print "?\tunspecified (?)"
22         sys.exit(1)
23
24     text = page.read()
25     try:
26         tree = ElementTree.fromstring(text)
27     except:
28         print "#Invalid xml passed back from " + URL
29         print "?\tunspecified (?)"
30         sys.exit(1)
31
32     print "#Harvested from http://genome-test.cse.ucsc.edu/cgi-bin/das/dsn"
33     print "?\tunspecified (?)"
34     for dsn in tree:
35         build = dsn.find("SOURCE").attrib['id']
36         description = dsn.find("DESCRIPTION").text.replace(" - Genome at UCSC","").replace(" Genome at UCSC","")
37        
38         fields = description.split(" ")
39         temp = fields[0]
40         for i in range(len(fields)-1):
41             if temp == fields[i+1]:
42                 fields.pop(i+1)
43             else:
44                 temp = fields[i+1]
45         description = " ".join(fields)
46         yield [build,description]
47
48 if __name__ == "__main__":
49     if len(sys.argv) > 1:
50         URL = sys.argv[1]
51     for build in getbuilds(URL):
52         print build[0]+"\t"+build[1]+" ("+build[0]+")"
53
Note: See TracBrowser for help on using the browser.