84807912f92036a1d36b282121e757868cd4e4bb
mmaddren
  Tue Dec 18 13:02:11 2012 -0800
updated libraries and some tools to make some things clearer, as per code review v276 final
diff --git python/programs/viewTrack/viewTrack python/programs/viewTrack/viewTrack
index 111768b..d23c6a9 100755
--- python/programs/viewTrack/viewTrack
+++ python/programs/viewTrack/viewTrack
@@ -7,49 +7,58 @@
 from ucscGb.externalData.geo import submission
 
 def filesize(val):
     if val > 1099511627776:
         return str(round(float(val) / 1099511627776, 2)) + 'TB'
     if val > 1073741824:
         return str(round(float(val) / 1073741824, 2)) + 'GB'
     if val > 1048576:
         return str(round(float(val) / 1048576, 2)) + 'MB'
     if val > 1024:
         return str(round(float(val) / 1024, 2)) + 'KB'
     else:
         return str(val) + 'B'
 
 
-def printTrackRefLine():
+def printTrackRefLine(args):
+    print 'Total expIds: ' + str(args.totalsubmitted) + '/' + str(args.totalexps) + ' out of tracks: ' + str(args.submittedtracks) + '/' + str(args.totaltracks)
     print 'Track'.ljust(25) + 'GEO Status'.ljust(12) + '# Files'.ljust(10) + 'Data Type'.ljust(16) + 'Releases'
         
 def trackLine(args, t):
     nametext = t.name.ljust(25)
     try:
         sub = 0
         tot = 0
         wantSubmit = 1
         
         for exp in t.alphaMetaDb.experiments.iterkeys():
             submitted = 0
+            samples = 0
             for stanza in t.alphaMetaDb.experiments[exp]:
+                if 'objStatus' in stanza:
+                    continue
+                if 'Aln' not in stanza['metaObject']:
                 if 'geoSampleAccession' in stanza:
-                    submitted = 1
-                    break
-            if submitted == 1:
+                        submitted += 1
+                    samples += 1
+            if (not args.pessimistic and submitted > 0) or (submitted == samples and submitted != 0):
                 sub += 1
+                args.totalsubmitted += 1
+            if samples > 0:
             tot += 1
+                args.totalexps += 1
+                
         
         #for stanza in t.alphaMetaDb.filter2(lambda s: s['objType'] != 'composite').itervalues():
         #    if 'geoSampleAccession' in stanza:
         #        sub += 1
         #    tot += 1
             
         #subtext = '%d%%' % int((float(sub) / tot) * 100)
         subtext = ('%d/%d' % (sub, tot)).ljust(10)
         #subtext = ' ' * (4 - len(subtext)) + subtext
         filestext = '%d' % tot
         filestext = ' ' * (4 - len(filestext)) + filestext + ' files'
         if sub == 0: 
             subtext = styles.style(subtext, 'red')
         elif sub < tot:
             subtext = styles.style(subtext, 'yellow')
@@ -57,38 +66,41 @@
             subtext = styles.style(subtext, 'green')
             wantSubmit = 0
     
         status = styles.style('Public    ', 'green')
         #just randomization for testing, replace with
         # /cluster/home/mmaddren/kent/src/hg/makeDb/trackDb/human/hg19/trackDb.wgEncode.ra
         if (tot % 2 == 0):
             status = styles.style('Unreleased', 'blue')
             wantSubmit = 0
     
         datatype = styles.style('Not Found'.ljust(16), 'red')
         if t.alphaMetaDb.dataType != None:
             datatype = t.alphaMetaDb.dataType.name
             if t.alphaMetaDb.dataType.type == 'MicroArray':
                     datatype = styles.style(datatype.ljust(16), 'yellow')
+                args.totaltracks += 1
             elif t.alphaMetaDb.dataType.valid:
                 datatype = styles.style(datatype.ljust(16), 'green')
+                args.totaltracks += 1
             else:
                 if t.alphaMetaDb.dataType.shouldSubmit:
                     datatype = styles.style(datatype.ljust(16), 'red')
                 else:
                     datatype = styles.style(datatype.ljust(16), 'blue')
                     #subtext = ''
+                    args.totalexps -= tot
                     wantSubmit = 0
         if datatype == '' and len(t.alphaMetaDb.experiments) == 0:
             datatype = styles.style('no expIds'.ljust(16), 'red')
                 
         #if wantSubmit:
         #    nametext = styles.style(nametext, 'green')
             
         geouptodate = ''    
         if args.geo and t.geo != None:
             exps = dict()
             local = 0
             offsite = 0
             mismatch = 0
             matched = 0
             for expId in t.alphaMetaDb.experiments.iterkeys():
@@ -109,88 +121,123 @@
                                 break
                         if happened == 0:
                             matched += 1
                 else:
                     local += 1
             geouptodate = '%d GSMs, %d expIds, %d match, %d local, %d on geo, %d mismatch' % (len(t.geo.accessions.keys()), len(t.alphaMetaDb.experiments.keys()), matched, local, offsite, mismatch)
                 
         releases = ''
         for release in t.releaseObjects:
             if release.onPublic:
                 releases += styles.style(str(release.index), 'green')
             else:
                 releases += styles.style(str(release.index), 'red')
                 
         if 'geoSeriesAccession' in t.alphaMetaDb.compositeStanza:
+            args.submittedtracks += 1
             return nametext + styles.style(t.alphaMetaDb.compositeStanza['geoSeriesAccession'].ljust(12), 'green') + subtext + datatype + releases + ' ' + geouptodate
         else:
             return nametext + styles.style('Unsubmitted'.ljust(12), 'blue') + subtext + datatype + releases
     except KeyError as e:
-        return styles.style(nametext, 'red') + styles.style('Error     ', 'red')
+        return styles.style(nametext, 'red') + styles.style('Error ' + str(e), 'red')
         
 def expIdLine(args, expId, t):
     
     samplefiles = 0
     samplesize = 0
     submittedfiles = 0
     for stanza in expId:
         for fname in stanza['fileName'].split(','):
             if fname in t.files:
                 file = t.files[fname]
                 samplesize = samplesize + file.size
                 samplefiles = samplefiles + 1
                 
                 if 'geoSampleAccession' in stanza:
                     submittedfiles = submittedfiles + 1
 
     if expId.title == None:
         title = styles.style('Inconsistent', 'red')
     else:
         title = expId.title
 
     return expId.name.rjust(12) + ' ' + title + ' [%s]' % filesize(samplesize) + ' - %d files' % samplefiles 
         
 def displayAll(args, tracks):
     
+    args.totalsubmitted = 0
+    args.totalexps = 0
+    args.totaltracks = 0
+    args.submittedtracks = 0
     display = dict()
     for t in tracks.itervalues():
         display[t.name] = trackLine(args, t)
         
     keys = display.keys()
     keys.sort()
     
-    printTrackRefLine()
+    nongeo = list()
+    marray = list()
+    unsub = list()
+    subm = list()
+    err = list()
+    
+    printTrackRefLine(args)
     for k in keys:
-        print display[k]
+        try:
+            if not tracks[k].alphaMetaDb.dataType.shouldSubmit:
+                nongeo.append(display[k])
+            elif tracks[k].alphaMetaDb.dataType.type == 'MicroArray':
+                marray.append(display[k])
+            elif tracks[k].alphaMetaDb.dataType.valid:
+                if 'geoSeriesAccession' in tracks[k].alphaMetaDb.compositeStanza:
+                    subm.append(display[k])
+                else:
+                    unsub.append(display[k])
+            else:
+                err.append(display[k])
+        except AttributeError as e:
+            err.append(display[k])
+            
+    for i in unsub:
+        print i
+    for i in subm:
+        print i
+    for i in marray:
+        print i
+    for i in nongeo:
+        print i
+    for i in err:
+        print i
         
 def displayTrack(args, t, expIds=None):
     print trackLine(args, t)
     
     if expIds == None:
         expIds = map(int, t.alphaMetaDb.experiments.keys())
         expIds.sort()
         expIds = map(str, expIds)
         
     for expId in expIds:
         print expIdLine(args, t.alphaMetaDb.experiments[expId], t)
     
 def main():
 
     parser = argparse.ArgumentParser(description = 'Provides information about tracks and their state in relation to GEO')
     #parser.add_argument('-u', '--unsubmitted', action='store_true', default=False, help='Do not list samples that have already been submitted')
     #parser.add_argument('-m', '--missing', action='store_true', default=False, help='List only missing files')
-    #parser.add_argument('-s', '--size', action='store_true', default=False, help='Show file sizes')
+    parser.add_argument('-p', '--pessimistic', action='store_true', default=False, help='Only count completed samples')
     parser.add_argument('-g', '--geo', action='store_true', default=False, help='Shows additional information crawled from the GEO submission page. WARNING: this takes significantly longer, so if used without a composite, this could take a few minutes')
     parser.add_argument('-t', '--trackPath', help='Overrides the default track path ~/kent/src/hg/makeDb/trackDb/')
     parser.add_argument('database', help='The database, typically hg19 or mm9')
     parser.add_argument('composite', nargs='?', help='The composite name, wgEncodeCshlLongRnaSeq for instance')
     parser.add_argument('expIds', nargs='*', help='Any number of expIds separated by spaces, you can also specify a range by using a hyphen, "140 150 160-170" for instance, or leave blank to specify the entire file')
     
     if len(sys.argv) == 1:
         parser.print_usage()
         return
     
     args = parser.parse_args(sys.argv[1:])
     
     tracks = track.TrackCollection(args.database)
     
     if args.composite == None: