84807912f92036a1d36b282121e757868cd4e4bb
mmaddren
  Tue Dec 18 13:02:11 2012 -0800
updated libraries and some tools to make some things clearer, as per code review v276 final
diff --git python/programs/trackInfo/trackInfo python/programs/trackInfo/trackInfo
index 802d2b4..8f0a5cb 100755
--- python/programs/trackInfo/trackInfo
+++ python/programs/trackInfo/trackInfo
@@ -1,185 +1,201 @@
 #!/usr/bin/env python2.7
 
 import sys, os, shutil, argparse
 from ucscGb.gbData.ra import raFile
 from ucscGb.encode import track
 from ucscGb.encode import styles
 
 def filesize(val):
     if val > 1099511627776:
         return str(round(float(val) / 1099511627776, 2)) + 'TB'
     if val > 1073741824:
         return str(round(float(val) / 1073741824, 2)) + 'GB'
     if val > 1048576:
         return str(round(float(val) / 1048576, 2)) + 'MB'
     if val > 1024:
         return str(round(float(val) / 1024, 2)) + 'KB'
     else:
         return str(val) + 'B'
 
 def getFileType(filename):
     filename.replace('.gz', '')
     return filename.rsplit('.')[1]
     
 def isRawFile(filename):
     return (getFileType(filename) == 'fastq' or getFileType(filename) == 'fasta')
     
 def isSupplimentaryFile(filename):
     return not isRawFile(filename)
     
 def createMappings(mdb):
     expIds = dict()
     geoMapping = dict()
     series = None
     
     for stanza in mdb.itervalues():
         
+        #if 'objStatus' in stanza:
+        #    continue
+        
         if 'objType' in stanza and stanza['objType'] == 'composite':
             series = stanza
             continue
 
         if 'expId' not in stanza:
             continue
         
         expId = int(stanza['expId'])
         
         if expId not in expIds:
             expIds[expId] = list()
             
         expIds[expId].append(stanza)
         
         if 'geoSampleAccession' in stanza:
             # otherwise we keep track of the geo number for partially submitted samples
             if expId not in geoMapping:
                 geoMapping[expId] = stanza['geoSampleAccession']
             elif geoMapping[expId] != 'Inconsistent' and geoMapping[expId] != stanza['geoSampleAccession']:
                 geoMapping[expId] = 'Inconsistent'
     
     return expIds, geoMapping, series
 
         
 def main():
 
     parser = argparse.ArgumentParser(description = 'Provides information about a composite track.\nRed - Missing\nBlue - Already submitted\nYellow - Inconsistent GEO Accession per sample\nGreen - GEO Accession Number\nWhite - Unsubmitted file')
     parser.add_argument('-u', '--unsubmitted', action='store_true', default=False, help='Do not list samples that have already been submitted')
     parser.add_argument('-m', '--missing', action='store_true', default=False, help='List only missing files')
     parser.add_argument('-s', '--size', action='store_true', default=False, help='Show file sizes')
+    parser.add_argument('-b', '--nobams', action='store_true', default=False, help='Omit bams')
     parser.add_argument('-c', '--collapse', action='store_true', default=False, help='Collapses all sample files, showing just the sample list')
     parser.add_argument('-t', '--trackPath', help='Overrides the default track path ~/kent/src/hg/makeDb/trackDb/')
+    parser.add_argument('-o', '--objStatus', action='store_true', default=False, help='show objStatus files')
     parser.add_argument('database', help='The database, typically hg19 or mm9')
     parser.add_argument('composite', help='The composite name, wgEncodeCshlLongRnaSeq for instance')
     parser.add_argument('expIds', nargs='*', help='Any number of expIds separated by spaces, you can also specify a range by using a hyphen, "140 150 160-170" for instance, or leave blank to specify the entire file')
     
     if len(sys.argv) == 1:
         parser.print_usage()
         return
     
     args = parser.parse_args(sys.argv[1:])
     
     compositeTrack = track.CompositeTrack(args.database, args.composite, args.trackPath)
     
     ids = list()
     
     for id in args.expIds:
         if '-' in id:
             start, end = id.split('-', 1)
             ids.extend(range(int(start), int(end) + 1))
         else:
             ids.append(int(id))
 
     expIds, geoMapping, series = createMappings(compositeTrack.alphaMetaDb)
 
     if len(ids) == 0:
         ids = expIds.keys()
         ids.sort()
     
     out = list()
     totalsize = 0
     filecount = 0
     
     for idNum in ids:
         
         samplesize = 0
         submittedfiles = 0
         samplefiles = 0
         expId = expIds[idNum]
 
         for stanza in expId:
             
+            if 'objStatus' in stanza and not args.objStatus:
+                continue
+            
             if 'geoSampleAccession' in stanza and args.unsubmitted:
                 continue
                 
             for fname in stanza['fileName'].split(','):
+                if 'bam' in fname and args.nobams:
+                    continue
                 if fname in compositeTrack.files and not args.missing:
                     file = compositeTrack.files[fname]
                     samplesize = samplesize + file.size
                     samplefiles = samplefiles + 1
                     totalsize = totalsize + file.size
                     filecount = filecount + 1
                     
                     if 'geoSampleAccession' in stanza:
                         submittedfiles = submittedfiles + 1
                 
         size = ''
         if args.size:
             size = '[%s]' % filesize(samplesize)
         
         if idNum in geoMapping:
             if geoMapping[idNum] == 'Inconsistent':
                 if not args.unsubmitted:
-                    out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0]['metaObject'], 'blue'), styles.style('[%s]' % geoMapping[idNum], 'yellow'), size, str(samplefiles)))
+                    out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0].title, 'blue'), styles.style('[%s]' % geoMapping[idNum], 'yellow'), size, str(samplefiles)))
             elif samplefiles == submittedfiles:
                 if not args.unsubmitted:
-                    out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0]['metaObject'], 'blue'), styles.style('[%s]' % geoMapping[idNum], 'green'), size, str(samplefiles)))
+                    out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0].title, 'blue'), styles.style('[%s]' % geoMapping[idNum], 'green'), size, str(samplefiles)))
             else:
-                out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0]['metaObject'], 'cyan'), styles.style('[%s]' % geoMapping[idNum], 'green'), size, str(samplefiles)))
+                out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0].title, 'cyan'), styles.style('[%s]' % geoMapping[idNum], 'green'), size, str(samplefiles)))
         else:
-            out.append('\t%s %s %s - %s files' % (str(idNum), expId[0]['metaObject'], size, str(samplefiles)))
+            out.append('\t%s %s %s - %s files' % (str(idNum), expId[0].title, size, str(samplefiles)))
 
         for stanza in expId:
             
+            if 'objStatus' in stanza and not args.objStatus:
+                continue
+            
             if 'geoSampleAccession' in stanza and args.unsubmitted or args.collapse:
                 continue
 
             for fname in stanza['fileName'].split(','):
-                    
+                if 'bam' in fname and args.nobams:
+                    continue
                 if fname in compositeTrack.files:
                 
                     if args.missing:
                         continue
                 
                     file = compositeTrack.files[fname]
                     size = ''
                     if args.size:
                         size = '[%s]' % filesize(file.size)
                         
-                    if 'geoSampleAccession' not in stanza:
+                    if 'objStatus' in stanza:
+                        out.append('\t\t%s (%s) %s' % (styles.style(file.name, 'yellow'), stanza['objStatus'], size))
+                    elif 'geoSampleAccession' not in stanza:
                         out.append('\t\t%s %s' % (file.name, size))
                     elif idNum in geoMapping and geoMapping[idNum] == 'Inconsistent':
                         out.append('\t\t%s %s%s' % (styles.style(file.name, 'blue'), styles.style('[%s]' % stanza['geoSampleAccession'], 'green'), size))
                     else:
                         out.append('\t\t%s %s' % (styles.style(file.name, 'blue'), size))
                 else:
                     out.append('\t\t%s' % styles.style(fname, 'red'))
 
     strsub = ''
     if 'geoSeriesAccession' in series:
         strsub = styles.style('[%s]' % series['geoSeriesAccession'], 'green')
     
     modestr = ' '
     for id in args.expIds:
         modestr = modestr + id + ',' 
     modestr = modestr[:len(modestr) - 1]
     
     size = ''
     if args.size:
         size = '[%s]' % filesize(totalsize)
     
     out.insert(0, '%s %s%s%s - %s files' % (compositeTrack.name, size, strsub, modestr, str(filecount)))
 
     for line in out:
         print line
             
     
 if __name__ == '__main__':
     main()
\ No newline at end of file