dbf1ed2fb55f85e23ce20f7ea1781dfe35f162e9 mmaddren Mon Oct 31 16:40:50 2011 -0700 somehow these got removed when I pulled??? I put them back diff --git python/programs/trackInfo/trackInfo python/programs/trackInfo/trackInfo new file mode 100755 index 0000000..de41e50 --- /dev/null +++ python/programs/trackInfo/trackInfo @@ -0,0 +1,182 @@ +#!/hive/groups/encode/dcc/bin/python +import sys, os, shutil, argparse +from ucscgenomics import ra, soft, track, styles + +def filesize(val): + if val > 1099511627776: + return str(round(float(val) / 1099511627776, 2)) + 'TB' + if val > 1073741824: + return str(round(float(val) / 1073741824, 2)) + 'GB' + if val > 1048576: + return str(round(float(val) / 1048576, 2)) + 'MB' + if val > 1024: + return str(round(float(val) / 1024, 2)) + 'KB' + else: + return str(val) + 'B' + +def getFileType(filename): + filename.replace('.gz', '') + return filename.rsplit('.')[1] + +def isRawFile(filename): + return (getFileType(filename) == 'fastq' or getFileType(filename) == 'fasta') + +def isSupplimentaryFile(filename): + return not isRawFile(filename) + +def createMappings(mdb): + expIds = dict() + geoMapping = dict() + series = None + + for stanza in mdb.itervalues(): + + if 'objType' in stanza and stanza['objType'] == 'composite': + series = stanza + continue + + if 'expId' not in stanza: + continue + + expId = int(stanza['expId']) + + if expId not in expIds: + expIds[expId] = list() + + expIds[expId].append(stanza) + + if 'geoSampleAccession' in stanza: + # otherwise we keep track of the geo number for partially submitted samples + if expId not in geoMapping: + geoMapping[expId] = stanza['geoSampleAccession'] + elif geoMapping[expId] != 'Inconsistent' and geoMapping[expId] != stanza['geoSampleAccession']: + geoMapping[expId] = 'Inconsistent' + + return expIds, geoMapping, series + + +def main(): + + parser = argparse.ArgumentParser(description = 'Provides information about a composite track.\nRed - Missing\nBlue - Already submitted\nYellow - Inconsistent GEO Accession per sample\nGreen - GEO Accession Number\nWhite - Unsubmitted file') + parser.add_argument('-u', '--unsubmitted', action='store_true', default=False, help='Do not list samples that have already been submitted') + parser.add_argument('-m', '--missing', action='store_true', default=False, help='List only missing files') + parser.add_argument('-s', '--size', action='store_true', default=False, help='Show file sizes') + parser.add_argument('-c', '--collapse', action='store_true', default=False, help='Collapses all sample files, showing just the sample list') + parser.add_argument('-t', '--trackPath', help='Overrides the default track path ~/kent/src/hg/makeDb/trackDb/') + parser.add_argument('database', help='The database, typically hg19 or mm9') + parser.add_argument('composite', help='The composite name, wgEncodeCshlLongRnaSeq for instance') + parser.add_argument('expIds', nargs='*', help='Any number of expIds separated by spaces, you can also specify a range by using a hyphen, "140 150 160-170" for instance, or leave blank to specify the entire file') + + if len(sys.argv) == 1: + parser.print_usage() + return + + args = parser.parse_args(sys.argv[1:]) + + compositeTrack = track.CompositeTrack(args.database, args.composite, args.trackPath) + + ids = list() + + for id in args.expIds: + if '-' in id: + start, end = id.split('-', 1) + ids.extend(range(int(start), int(end) + 1)) + else: + ids.append(int(id)) + + expIds, geoMapping, series = createMappings(compositeTrack.alphaMetaDb) + + if len(ids) == 0: + ids = expIds.keys() + ids.sort() + + out = list() + totalsize = 0 + filecount = 0 + + for idNum in ids: + + samplesize = 0 + submittedfiles = 0 + samplefiles = 0 + expId = expIds[idNum] + + for stanza in expId: + + if 'geoSampleAccession' in stanza and args.unsubmitted: + continue + + for fname in stanza['fileName'].split(','): + if fname in compositeTrack.files and not args.missing: + file = compositeTrack.files[fname] + samplesize = samplesize + file.size + samplefiles = samplefiles + 1 + totalsize = totalsize + file.size + filecount = filecount + 1 + + if 'geoSampleAccession' in stanza: + submittedfiles = submittedfiles + 1 + + size = '' + if args.size: + size = '[%s]' % filesize(samplesize) + + if idNum in geoMapping: + if geoMapping[idNum] == 'Inconsistent': + if not args.unsubmitted: + out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0]['metaObject'], 'blue'), styles.style('[%s]' % geoMapping[idNum], 'yellow'), size, str(samplefiles))) + elif samplefiles == submittedfiles: + if not args.unsubmitted: + out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0]['metaObject'], 'blue'), styles.style('[%s]' % geoMapping[idNum], 'green'), size, str(samplefiles))) + else: + out.append('\t%s %s %s%s - %s files' % (str(idNum), styles.style(expId[0]['metaObject'], 'cyan'), styles.style('[%s]' % geoMapping[idNum], 'green'), size, str(samplefiles))) + else: + out.append('\t%s %s %s - %s files' % (str(idNum), expId[0]['metaObject'], size, str(samplefiles))) + + for stanza in expId: + + if 'geoSampleAccession' in stanza and args.unsubmitted or args.collapse: + continue + + for fname in stanza['fileName'].split(','): + + if fname in compositeTrack.files: + + if args.missing: + continue + + file = compositeTrack.files[fname] + size = '' + if args.size: + size = '[%s]' % filesize(file.size) + + if 'geoSampleAccession' not in stanza: + out.append('\t\t%s %s' % (file.name, size)) + elif idNum in geoMapping and geoMapping[idNum] == 'Inconsistent': + out.append('\t\t%s %s%s' % (styles.style(file.name, 'blue'), styles.style('[%s]' % stanza['geoSampleAccession'], 'green'), size)) + else: + out.append('\t\t%s %s' % (styles.style(file.name, 'blue'), size)) + else: + out.append('\t\t%s' % styles.style(fname, 'red')) + + strsub = '' + if 'geoSeriesAccession' in series: + strsub = styles.style('[%s]' % series['geoSeriesAccession'], 'green') + + modestr = ' ' + for id in args.expIds: + modestr = modestr + id + ',' + modestr = modestr[:len(modestr) - 1] + + size = '' + if args.size: + size = '[%s]' % filesize(totalsize) + + out.insert(0, '%s %s%s%s - %s files' % (compositeTrack.name, size, strsub, modestr, str(filecount))) + + for line in out: + print line + + +if __name__ == '__main__': + main() \ No newline at end of file