dbf1ed2fb55f85e23ce20f7ea1781dfe35f162e9
mmaddren
  Mon Oct 31 16:40:50 2011 -0700
somehow these got removed when I pulled??? I put them back
diff --git python/programs/trackInfo/trackInfo python/programs/trackInfo/trackInfo
new file mode 100755
index 0000000..de41e50
--- /dev/null
+++ python/programs/trackInfo/trackInfo
@@ -0,0 +1,182 @@
+#!/hive/groups/encode/dcc/bin/python
+import sys, os, shutil, argparse
+from ucscgenomics import ra, soft, track, styles
+
+def filesize(val):
+    if val > 1099511627776:
+        return str(round(float(val) / 1099511627776, 2)) + 'TB'
+    if val > 1073741824:
+        return str(round(float(val) / 1073741824, 2)) + 'GB'
+    if val > 1048576:
+        return str(round(float(val) / 1048576, 2)) + 'MB'
+    if val > 1024:
+        return str(round(float(val) / 1024, 2)) + 'KB'
+    else:
+        return str(val) + 'B'
+
+def getFileType(filename):
+    filename.replace('.gz', '')
+    return filename.rsplit('.')[1]
+    
+def isRawFile(filename):
+    return (getFileType(filename) == 'fastq' or getFileType(filename) == 'fasta')
+    
+def isSupplimentaryFile(filename):
+    return not isRawFile(filename)
+    
+def createMappings(mdb):
+    expIds = dict()
+    geoMapping = dict()
+    series = None
+    
+    for stanza in mdb.itervalues():
+        
+        if 'objType' in stanza and stanza['objType'] == 'composite':
+            series = stanza
+            continue
+
+        if 'expId' not in stanza:
+            continue
+        
+        expId = int(stanza['expId'])
+        
+        if expId not in expIds:
+            expIds[expId] = list()
+            
+        expIds[expId].append(stanza)
+        
+        if 'geoSampleAccession' in stanza:
+            # otherwise we keep track of the geo number for partially submitted samples
+            if expId not in geoMapping:
+                geoMapping[expId] = stanza['geoSampleAccession']
+            elif geoMapping[expId] != 'Inconsistent' and geoMapping[expId] != stanza['geoSampleAccession']:
+                geoMapping[expId] = 'Inconsistent'
+    
+    return expIds, geoMapping, series
+
+        
+def main():
+
+    parser = argparse.ArgumentParser(description = 'Provides information about a composite track.\nRed - Missing\nBlue - Already submitted\nYellow - Inconsistent GEO Accession per sample\nGreen - GEO Accession Number\nWhite - Unsubmitted file')
+    parser.add_argument('-u', '--unsubmitted', action='store_true', default=False, help='Do not list samples that have already been submitted')
+    parser.add_argument('-m', '--missing', action='store_true', default=False, help='List only missing files')
+    parser.add_argument('-s', '--size', action='store_true', default=False, help='Show file sizes')
+    parser.add_argument('-c', '--collapse', action='store_true', default=False, help='Collapses all sample files, showing just the sample list')
+    parser.add_argument('-t', '--trackPath', help='Overrides the default track path ~/kent/src/hg/makeDb/trackDb/')
+    parser.add_argument('database', help='The database, typically hg19 or mm9')
+    parser.add_argument('composite', help='The composite name, wgEncodeCshlLongRnaSeq for instance')
+    parser.add_argument('expIds', nargs='*', help='Any number of expIds separated by spaces, you can also specify a range by using a hyphen, "140 150 160-170" for instance, or leave blank to specify the entire file')
+    
+    if len(sys.argv) == 1:
+        parser.print_usage()
+        return
+    
+    args = parser.parse_args(sys.argv[1:])
+    
+    compositeTrack = track.CompositeTrack(args.database, args.composite, args.trackPath)
+    
+    ids = list()
+    
+    for id in args.expIds:
+        if '-' in id:
+            start, end = id.split('-', 1)
+            ids.extend(range(int(start), int(end) + 1))
+        else:
+            ids.append(int(id))
+
+    expIds, geoMapping, series = createMappings(compositeTrack.alphaMetaDb)
+
+    if len(ids) == 0:
+        ids = expIds.keys()
+        ids.sort()
+    
+    out = list()
+    totalsize = 0
+    filecount = 0
+    
+    for idNum in ids:
+        
+        samplesize = 0
+        submittedfiles = 0
+        samplefiles = 0
+        expId = expIds[idNum]
+
+        for stanza in expId:
+            
+            if 'geoSampleAccession' in stanza and args.unsubmitted:
+                continue
+                
+            for fname in stanza['fileName'].split(','):
+                if fname in compositeTrack.files and not args.missing:
+                    file = compositeTrack.files[fname]
+                    samplesize = samplesize + file.size
+                    samplefiles = samplefiles + 1
+                    totalsize = totalsize + file.size
+                    filecount = filecount + 1
+                    
+                    if 'geoSampleAccession' in stanza:
+                        submittedfiles = submittedfiles + 1
+                
+        size = ''
+        if args.size:
+            size = '[%s]' % filesize(samplesize)
+        
+        if idNum in geoMapping:
+            if geoMapping[idNum] == 'Inconsistent':
+                if not args.unsubmitted:
+                    out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0]['metaObject'], 'blue'), styles.style('[%s]' % geoMapping[idNum], 'yellow'), size, str(samplefiles)))
+            elif samplefiles == submittedfiles:
+                if not args.unsubmitted:
+                    out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0]['metaObject'], 'blue'), styles.style('[%s]' % geoMapping[idNum], 'green'), size, str(samplefiles)))
+            else:
+                out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0]['metaObject'], 'cyan'), styles.style('[%s]' % geoMapping[idNum], 'green'), size, str(samplefiles)))
+        else:
+            out.append('\t%s %s %s - %s files' % (str(idNum), expId[0]['metaObject'], size, str(samplefiles)))
+
+        for stanza in expId:
+            
+            if 'geoSampleAccession' in stanza and args.unsubmitted or args.collapse:
+                continue
+
+            for fname in stanza['fileName'].split(','):
+                    
+                if fname in compositeTrack.files:
+                
+                    if args.missing:
+                        continue
+                
+                    file = compositeTrack.files[fname]
+                    size = ''
+                    if args.size:
+                        size = '[%s]' % filesize(file.size)
+                        
+                    if 'geoSampleAccession' not in stanza:
+                        out.append('\t\t%s %s' % (file.name, size))
+                    elif idNum in geoMapping and geoMapping[idNum] == 'Inconsistent':
+                        out.append('\t\t%s %s%s' % (styles.style(file.name, 'blue'), styles.style('[%s]' % stanza['geoSampleAccession'], 'green'), size))
+                    else:
+                        out.append('\t\t%s %s' % (styles.style(file.name, 'blue'), size))
+                else:
+                    out.append('\t\t%s' % styles.style(fname, 'red'))
+
+    strsub = ''
+    if 'geoSeriesAccession' in series:
+        strsub = styles.style('[%s]' % series['geoSeriesAccession'], 'green')
+    
+    modestr = ' '
+    for id in args.expIds:
+        modestr = modestr + id + ',' 
+    modestr = modestr[:len(modestr) - 1]
+    
+    size = ''
+    if args.size:
+        size = '[%s]' % filesize(totalsize)
+    
+    out.insert(0, '%s %s%s%s - %s files' % (compositeTrack.name, size, strsub, modestr, str(filecount)))
+
+    for line in out:
+        print line
+            
+    
+if __name__ == '__main__':
+    main()
\ No newline at end of file