35c0685ff195fd0332ba86828c6230a34eaaaacb
mmaddren
  Thu Apr 19 16:44:47 2012 -0700
added track viewing tools for GEO so that venkat can use it
diff --git python/programs/viewTrack/viewTrack python/programs/viewTrack/viewTrack
new file mode 100755
index 0000000..ba460ea
--- /dev/null
+++ python/programs/viewTrack/viewTrack
@@ -0,0 +1,225 @@
+#!/usr/bin/env python2.7
+
+import sys, os, shutil, argparse, urllib2, re
+from ucscgenomics import ra, soft, track, styles, geo
+
+def filesize(val):
+    if val > 1099511627776:
+        return str(round(float(val) / 1099511627776, 2)) + 'TB'
+    if val > 1073741824:
+        return str(round(float(val) / 1073741824, 2)) + 'GB'
+    if val > 1048576:
+        return str(round(float(val) / 1048576, 2)) + 'MB'
+    if val > 1024:
+        return str(round(float(val) / 1024, 2)) + 'KB'
+    else:
+        return str(val) + 'B'
+
+
+def printTrackRefLine():
+    print 'Track'.ljust(25) + 'GEO Status'.ljust(12) + '# Files'.ljust(10) + 'Data Type'.ljust(16) + 'Releases'
+        
+def trackLine(args, t):
+    nametext = t.name.ljust(25)
+    try:
+        sub = 0
+        tot = 0
+        wantSubmit = 1
+        
+        for exp in t.alphaMetaDb.experiments.iterkeys():
+            submitted = 0
+            for stanza in t.alphaMetaDb.experiments[exp]:
+                if 'geoSampleAccession' in stanza:
+                    submitted = 1
+                    break
+            if submitted == 1:
+                sub += 1
+            tot += 1
+        
+        #for stanza in t.alphaMetaDb.filter2(lambda s: s['objType'] != 'composite').itervalues():
+        #    if 'geoSampleAccession' in stanza:
+        #        sub += 1
+        #    tot += 1
+            
+        #subtext = '%d%%' % int((float(sub) / tot) * 100)
+        subtext = ('%d/%d' % (sub, tot)).ljust(10)
+        #subtext = ' ' * (4 - len(subtext)) + subtext
+        filestext = '%d' % tot
+        filestext = ' ' * (4 - len(filestext)) + filestext + ' files'
+        if sub == 0: 
+            subtext = styles.style(subtext, 'red')
+        elif sub < tot:
+            subtext = styles.style(subtext, 'yellow')
+        else:
+            subtext = styles.style(subtext, 'green')
+            wantSubmit = 0
+    
+        status = styles.style('Public    ', 'green')
+        #just randomization for testing, replace with
+        # /cluster/home/mmaddren/kent/src/hg/makeDb/trackDb/human/hg19/trackDb.wgEncode.ra
+        if (tot % 2 == 0):
+            status = styles.style('Unreleased', 'blue')
+            wantSubmit = 0
+    
+        datatype = styles.style('Not Found'.ljust(16), 'red')
+        if t.alphaMetaDb.dataType != None:
+            datatype = t.alphaMetaDb.dataType.name
+            if t.alphaMetaDb.dataType.type == 'MicroArray':
+                    datatype = styles.style(datatype.ljust(16), 'yellow')
+            elif t.alphaMetaDb.dataType.valid:
+                datatype = styles.style(datatype.ljust(16), 'green')
+            else:
+                if t.alphaMetaDb.dataType.shouldSubmit:
+                    datatype = styles.style(datatype.ljust(16), 'red')
+                else:
+                    datatype = styles.style(datatype.ljust(16), 'blue')
+                    #subtext = ''
+                    wantSubmit = 0
+        if datatype == '' and len(t.alphaMetaDb.experiments) == 0:
+            datatype = styles.style('no expIds'.ljust(16), 'red')
+                
+        if wantSubmit:
+            nametext = styles.style(nametext, 'green')
+            
+        geouptodate = ''    
+        if args.geo and t.geo != None:
+            exps = dict()
+            local = 0
+            offsite = 0
+            mismatch = 0
+            matched = 0
+            for expId in t.alphaMetaDb.experiments.iterkeys():
+                k = t.alphaMetaDb.experiments[expId].title
+                exps[k] = t.alphaMetaDb.experiments[expId]
+                if k in t.geo.accessions.iterkeys():
+                    happened = 0
+                    for stanza in exps[k]:
+                        if 'geoSampleAccession' in stanza and stanza['geoSampleAccession'] != t.geo.accessions[k]:
+                            mismatch += 1
+                            happened = 1
+                            break
+                    if happened == 0:
+                        for stanza in exps[k]:
+                            if 'geoSampleAccession' not in stanza:
+                                offsite += 1
+                                happened = 1
+                                break
+                        if happened == 0:
+                            matched += 1
+                else:
+                    local += 1
+            geouptodate = '%d GSMs, %d expIds, %d match, %d local, %d on geo, %d mismatch' % (len(t.geo.accessions.keys()), len(t.alphaMetaDb.experiments.keys()), matched, local, offsite, mismatch)
+                
+        releases = ''
+        for release in t.releaseObjects:
+            if release.onPublic:
+                releases += styles.style(str(release.index), 'green')
+            else:
+                releases += styles.style(str(release.index), 'red')
+                
+        if 'geoSeriesAccession' in t.alphaMetaDb.compositeStanza:
+            return nametext + styles.style(t.alphaMetaDb.compositeStanza['geoSeriesAccession'].ljust(12), 'green') + subtext + datatype + releases + ' ' + geouptodate
+        else:
+            return nametext + styles.style('Unsubmitted'.ljust(12), 'blue') + subtext + datatype + releases
+    except KeyError as e:
+        return styles.style(nametext, 'red') + styles.style('Error     ', 'red')
+        
+def expIdLine(args, expId, t):
+    
+    samplefiles = 0
+    samplesize = 0
+    submittedfiles = 0
+    for stanza in expId:
+        for fname in stanza['fileName'].split(','):
+            if fname in t.files:
+                file = t.files[fname]
+                samplesize = samplesize + file.size
+                samplefiles = samplefiles + 1
+                
+                if 'geoSampleAccession' in stanza:
+                    submittedfiles = submittedfiles + 1
+
+    if expId.title == None:
+        title = styles.style('Inconsistent', 'red')
+    else:
+        title = expId.title
+
+    return expId.name.rjust(12) + ' ' + title + ' [%s]' % filesize(samplesize) + ' - %d files' % samplefiles 
+        
+def displayAll(args, tracks):
+    
+    display = dict()
+    for t in tracks.itervalues():
+        display[t.name] = trackLine(args, t)
+        
+    keys = display.keys()
+    keys.sort()
+    
+    printTrackRefLine()
+    for k in keys:
+        print display[k]
+        
+def displayTrack(args, t, expIds=None):
+    print trackLine(args, t)
+    
+    if expIds == None:
+        expIds = map(int, t.alphaMetaDb.experiments.keys())
+        expIds.sort()
+        expIds = map(str, expIds)
+        
+    for expId in expIds:
+        print expIdLine(args, t.alphaMetaDb.experiments[expId], t)
+    
+def main():
+
+    parser = argparse.ArgumentParser(description = 'Provides information about tracks and their state in relation to GEO')
+    #parser.add_argument('-u', '--unsubmitted', action='store_true', default=False, help='Do not list samples that have already been submitted')
+    #parser.add_argument('-m', '--missing', action='store_true', default=False, help='List only missing files')
+    #parser.add_argument('-s', '--size', action='store_true', default=False, help='Show file sizes')
+    parser.add_argument('-g', '--geo', action='store_true', default=False, help='Shows additional information crawled from the GEO submission page. WARNING: this takes significantly longer, so if used without a composite, this could take a few minutes')
+    parser.add_argument('-t', '--trackPath', help='Overrides the default track path ~/kent/src/hg/makeDb/trackDb/')
+    parser.add_argument('database', help='The database, typically hg19 or mm9')
+    parser.add_argument('composite', nargs='?', help='The composite name, wgEncodeCshlLongRnaSeq for instance')
+    parser.add_argument('expIds', nargs='*', help='Any number of expIds separated by spaces, you can also specify a range by using a hyphen, "140 150 160-170" for instance, or leave blank to specify the entire file')
+    
+    if len(sys.argv) == 1:
+        parser.print_usage()
+        return
+    
+    args = parser.parse_args(sys.argv[1:])
+    
+    tracks = track.TrackCollection(args.database)
+    
+    if args.composite == None:
+        if args.geo:
+            for t in tracks.itervalues():
+                t.geo = None
+                try:
+                    if 'geoSeriesAccession' in t.alphaMetaDb.compositeStanza:
+                        t.geo = geo.Submission(t.alphaMetaDb.compositeStanza['geoSeriesAccession'])
+                except KeyError:
+                    pass
+        displayAll(args, tracks)
+    else:
+        if args.expIds == None or len(args.expIds) == 0:
+            displayTrack(args, tracks[args.composite])
+        else:
+            ids = list()
+            for id in args.expIds:
+                if '-' in id:
+                    start, end = id.split('-', 1)
+                    ids.extend(range(int(start), int(end) + 1))
+                else:
+                    ids.append(int(id))
+            if args.geo:
+                tracks[args.composite].geo = None
+                if 'geoSeriesAccession' in tracks[args.composite].alphaMetaDb.compositeTrack:
+                    tracks[args.composite].geo = geo.Submission(tracks[args.composite].alphaMetaDb.compositeTrack['geoSeriesAccession'])
+            displayTrack(args, tracks[args.composite], ids)        
+                    
+                    
+                    
+                    
+if __name__ == '__main__':
+    main()                   
+                    
\ No newline at end of file