84807912f92036a1d36b282121e757868cd4e4bb mmaddren Tue Dec 18 13:02:11 2012 -0800 updated libraries and some tools to make some things clearer, as per code review v276 final diff --git python/programs/trackInfo/trackInfo python/programs/trackInfo/trackInfo index 802d2b4..8f0a5cb 100755 --- python/programs/trackInfo/trackInfo +++ python/programs/trackInfo/trackInfo @@ -1,185 +1,201 @@ #!/usr/bin/env python2.7 import sys, os, shutil, argparse from ucscGb.gbData.ra import raFile from ucscGb.encode import track from ucscGb.encode import styles def filesize(val): if val > 1099511627776: return str(round(float(val) / 1099511627776, 2)) + 'TB' if val > 1073741824: return str(round(float(val) / 1073741824, 2)) + 'GB' if val > 1048576: return str(round(float(val) / 1048576, 2)) + 'MB' if val > 1024: return str(round(float(val) / 1024, 2)) + 'KB' else: return str(val) + 'B' def getFileType(filename): filename.replace('.gz', '') return filename.rsplit('.')[1] def isRawFile(filename): return (getFileType(filename) == 'fastq' or getFileType(filename) == 'fasta') def isSupplimentaryFile(filename): return not isRawFile(filename) def createMappings(mdb): expIds = dict() geoMapping = dict() series = None for stanza in mdb.itervalues(): + #if 'objStatus' in stanza: + # continue + if 'objType' in stanza and stanza['objType'] == 'composite': series = stanza continue if 'expId' not in stanza: continue expId = int(stanza['expId']) if expId not in expIds: expIds[expId] = list() expIds[expId].append(stanza) if 'geoSampleAccession' in stanza: # otherwise we keep track of the geo number for partially submitted samples if expId not in geoMapping: geoMapping[expId] = stanza['geoSampleAccession'] elif geoMapping[expId] != 'Inconsistent' and geoMapping[expId] != stanza['geoSampleAccession']: geoMapping[expId] = 'Inconsistent' return expIds, geoMapping, series def main(): parser = argparse.ArgumentParser(description = 'Provides information about a composite track.\nRed - Missing\nBlue - Already submitted\nYellow - Inconsistent GEO Accession per sample\nGreen - GEO Accession Number\nWhite - Unsubmitted file') parser.add_argument('-u', '--unsubmitted', action='store_true', default=False, help='Do not list samples that have already been submitted') parser.add_argument('-m', '--missing', action='store_true', default=False, help='List only missing files') parser.add_argument('-s', '--size', action='store_true', default=False, help='Show file sizes') + parser.add_argument('-b', '--nobams', action='store_true', default=False, help='Omit bams') parser.add_argument('-c', '--collapse', action='store_true', default=False, help='Collapses all sample files, showing just the sample list') parser.add_argument('-t', '--trackPath', help='Overrides the default track path ~/kent/src/hg/makeDb/trackDb/') + parser.add_argument('-o', '--objStatus', action='store_true', default=False, help='show objStatus files') parser.add_argument('database', help='The database, typically hg19 or mm9') parser.add_argument('composite', help='The composite name, wgEncodeCshlLongRnaSeq for instance') parser.add_argument('expIds', nargs='*', help='Any number of expIds separated by spaces, you can also specify a range by using a hyphen, "140 150 160-170" for instance, or leave blank to specify the entire file') if len(sys.argv) == 1: parser.print_usage() return args = parser.parse_args(sys.argv[1:]) compositeTrack = track.CompositeTrack(args.database, args.composite, args.trackPath) ids = list() for id in args.expIds: if '-' in id: start, end = id.split('-', 1) ids.extend(range(int(start), int(end) + 1)) else: ids.append(int(id)) expIds, geoMapping, series = createMappings(compositeTrack.alphaMetaDb) if len(ids) == 0: ids = expIds.keys() ids.sort() out = list() totalsize = 0 filecount = 0 for idNum in ids: samplesize = 0 submittedfiles = 0 samplefiles = 0 expId = expIds[idNum] for stanza in expId: + if 'objStatus' in stanza and not args.objStatus: + continue + if 'geoSampleAccession' in stanza and args.unsubmitted: continue for fname in stanza['fileName'].split(','): + if 'bam' in fname and args.nobams: + continue if fname in compositeTrack.files and not args.missing: file = compositeTrack.files[fname] samplesize = samplesize + file.size samplefiles = samplefiles + 1 totalsize = totalsize + file.size filecount = filecount + 1 if 'geoSampleAccession' in stanza: submittedfiles = submittedfiles + 1 size = '' if args.size: size = '[%s]' % filesize(samplesize) if idNum in geoMapping: if geoMapping[idNum] == 'Inconsistent': if not args.unsubmitted: - out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0]['metaObject'], 'blue'), styles.style('[%s]' % geoMapping[idNum], 'yellow'), size, str(samplefiles))) + out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0].title, 'blue'), styles.style('[%s]' % geoMapping[idNum], 'yellow'), size, str(samplefiles))) elif samplefiles == submittedfiles: if not args.unsubmitted: - out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0]['metaObject'], 'blue'), styles.style('[%s]' % geoMapping[idNum], 'green'), size, str(samplefiles))) + out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0].title, 'blue'), styles.style('[%s]' % geoMapping[idNum], 'green'), size, str(samplefiles))) else: - out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0]['metaObject'], 'cyan'), styles.style('[%s]' % geoMapping[idNum], 'green'), size, str(samplefiles))) + out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0].title, 'cyan'), styles.style('[%s]' % geoMapping[idNum], 'green'), size, str(samplefiles))) else: - out.append('\t%s %s %s - %s files' % (str(idNum), expId[0]['metaObject'], size, str(samplefiles))) + out.append('\t%s %s %s - %s files' % (str(idNum), expId[0].title, size, str(samplefiles))) for stanza in expId: + if 'objStatus' in stanza and not args.objStatus: + continue + if 'geoSampleAccession' in stanza and args.unsubmitted or args.collapse: continue for fname in stanza['fileName'].split(','): - + if 'bam' in fname and args.nobams: + continue if fname in compositeTrack.files: if args.missing: continue file = compositeTrack.files[fname] size = '' if args.size: size = '[%s]' % filesize(file.size) - if 'geoSampleAccession' not in stanza: + if 'objStatus' in stanza: + out.append('\t\t%s (%s) %s' % (styles.style(file.name, 'yellow'), stanza['objStatus'], size)) + elif 'geoSampleAccession' not in stanza: out.append('\t\t%s %s' % (file.name, size)) elif idNum in geoMapping and geoMapping[idNum] == 'Inconsistent': out.append('\t\t%s %s%s' % (styles.style(file.name, 'blue'), styles.style('[%s]' % stanza['geoSampleAccession'], 'green'), size)) else: out.append('\t\t%s %s' % (styles.style(file.name, 'blue'), size)) else: out.append('\t\t%s' % styles.style(fname, 'red')) strsub = '' if 'geoSeriesAccession' in series: strsub = styles.style('[%s]' % series['geoSeriesAccession'], 'green') modestr = ' ' for id in args.expIds: modestr = modestr + id + ',' modestr = modestr[:len(modestr) - 1] size = '' if args.size: size = '[%s]' % filesize(totalsize) out.insert(0, '%s %s%s%s - %s files' % (compositeTrack.name, size, strsub, modestr, str(filecount))) for line in out: print line if __name__ == '__main__': main() \ No newline at end of file