84807912f92036a1d36b282121e757868cd4e4bb mmaddren Tue Dec 18 13:02:11 2012 -0800 updated libraries and some tools to make some things clearer, as per code review v276 final diff --git python/programs/viewTrack/viewTrack python/programs/viewTrack/viewTrack index 111768b..d23c6a9 100755 --- python/programs/viewTrack/viewTrack +++ python/programs/viewTrack/viewTrack @@ -7,49 +7,58 @@ from ucscGb.externalData.geo import submission def filesize(val): if val > 1099511627776: return str(round(float(val) / 1099511627776, 2)) + 'TB' if val > 1073741824: return str(round(float(val) / 1073741824, 2)) + 'GB' if val > 1048576: return str(round(float(val) / 1048576, 2)) + 'MB' if val > 1024: return str(round(float(val) / 1024, 2)) + 'KB' else: return str(val) + 'B' -def printTrackRefLine(): +def printTrackRefLine(args): + print 'Total expIds: ' + str(args.totalsubmitted) + '/' + str(args.totalexps) + ' out of tracks: ' + str(args.submittedtracks) + '/' + str(args.totaltracks) print 'Track'.ljust(25) + 'GEO Status'.ljust(12) + '# Files'.ljust(10) + 'Data Type'.ljust(16) + 'Releases' def trackLine(args, t): nametext = t.name.ljust(25) try: sub = 0 tot = 0 wantSubmit = 1 for exp in t.alphaMetaDb.experiments.iterkeys(): submitted = 0 + samples = 0 for stanza in t.alphaMetaDb.experiments[exp]: + if 'objStatus' in stanza: + continue + if 'Aln' not in stanza['metaObject']: if 'geoSampleAccession' in stanza: - submitted = 1 - break - if submitted == 1: + submitted += 1 + samples += 1 + if (not args.pessimistic and submitted > 0) or (submitted == samples and submitted != 0): sub += 1 + args.totalsubmitted += 1 + if samples > 0: tot += 1 + args.totalexps += 1 + #for stanza in t.alphaMetaDb.filter2(lambda s: s['objType'] != 'composite').itervalues(): # if 'geoSampleAccession' in stanza: # sub += 1 # tot += 1 #subtext = '%d%%' % int((float(sub) / tot) * 100) subtext = ('%d/%d' % (sub, tot)).ljust(10) #subtext = ' ' * (4 - len(subtext)) + subtext filestext = '%d' % tot filestext = ' ' * (4 - len(filestext)) + filestext + ' files' if sub == 0: subtext = styles.style(subtext, 'red') elif sub < tot: subtext = styles.style(subtext, 'yellow') @@ -57,38 +66,41 @@ subtext = styles.style(subtext, 'green') wantSubmit = 0 status = styles.style('Public ', 'green') #just randomization for testing, replace with # /cluster/home/mmaddren/kent/src/hg/makeDb/trackDb/human/hg19/trackDb.wgEncode.ra if (tot % 2 == 0): status = styles.style('Unreleased', 'blue') wantSubmit = 0 datatype = styles.style('Not Found'.ljust(16), 'red') if t.alphaMetaDb.dataType != None: datatype = t.alphaMetaDb.dataType.name if t.alphaMetaDb.dataType.type == 'MicroArray': datatype = styles.style(datatype.ljust(16), 'yellow') + args.totaltracks += 1 elif t.alphaMetaDb.dataType.valid: datatype = styles.style(datatype.ljust(16), 'green') + args.totaltracks += 1 else: if t.alphaMetaDb.dataType.shouldSubmit: datatype = styles.style(datatype.ljust(16), 'red') else: datatype = styles.style(datatype.ljust(16), 'blue') #subtext = '' + args.totalexps -= tot wantSubmit = 0 if datatype == '' and len(t.alphaMetaDb.experiments) == 0: datatype = styles.style('no expIds'.ljust(16), 'red') #if wantSubmit: # nametext = styles.style(nametext, 'green') geouptodate = '' if args.geo and t.geo != None: exps = dict() local = 0 offsite = 0 mismatch = 0 matched = 0 for expId in t.alphaMetaDb.experiments.iterkeys(): @@ -109,88 +121,123 @@ break if happened == 0: matched += 1 else: local += 1 geouptodate = '%d GSMs, %d expIds, %d match, %d local, %d on geo, %d mismatch' % (len(t.geo.accessions.keys()), len(t.alphaMetaDb.experiments.keys()), matched, local, offsite, mismatch) releases = '' for release in t.releaseObjects: if release.onPublic: releases += styles.style(str(release.index), 'green') else: releases += styles.style(str(release.index), 'red') if 'geoSeriesAccession' in t.alphaMetaDb.compositeStanza: + args.submittedtracks += 1 return nametext + styles.style(t.alphaMetaDb.compositeStanza['geoSeriesAccession'].ljust(12), 'green') + subtext + datatype + releases + ' ' + geouptodate else: return nametext + styles.style('Unsubmitted'.ljust(12), 'blue') + subtext + datatype + releases except KeyError as e: - return styles.style(nametext, 'red') + styles.style('Error ', 'red') + return styles.style(nametext, 'red') + styles.style('Error ' + str(e), 'red') def expIdLine(args, expId, t): samplefiles = 0 samplesize = 0 submittedfiles = 0 for stanza in expId: for fname in stanza['fileName'].split(','): if fname in t.files: file = t.files[fname] samplesize = samplesize + file.size samplefiles = samplefiles + 1 if 'geoSampleAccession' in stanza: submittedfiles = submittedfiles + 1 if expId.title == None: title = styles.style('Inconsistent', 'red') else: title = expId.title return expId.name.rjust(12) + ' ' + title + ' [%s]' % filesize(samplesize) + ' - %d files' % samplefiles def displayAll(args, tracks): + args.totalsubmitted = 0 + args.totalexps = 0 + args.totaltracks = 0 + args.submittedtracks = 0 display = dict() for t in tracks.itervalues(): display[t.name] = trackLine(args, t) keys = display.keys() keys.sort() - printTrackRefLine() + nongeo = list() + marray = list() + unsub = list() + subm = list() + err = list() + + printTrackRefLine(args) for k in keys: - print display[k] + try: + if not tracks[k].alphaMetaDb.dataType.shouldSubmit: + nongeo.append(display[k]) + elif tracks[k].alphaMetaDb.dataType.type == 'MicroArray': + marray.append(display[k]) + elif tracks[k].alphaMetaDb.dataType.valid: + if 'geoSeriesAccession' in tracks[k].alphaMetaDb.compositeStanza: + subm.append(display[k]) + else: + unsub.append(display[k]) + else: + err.append(display[k]) + except AttributeError as e: + err.append(display[k]) + + for i in unsub: + print i + for i in subm: + print i + for i in marray: + print i + for i in nongeo: + print i + for i in err: + print i def displayTrack(args, t, expIds=None): print trackLine(args, t) if expIds == None: expIds = map(int, t.alphaMetaDb.experiments.keys()) expIds.sort() expIds = map(str, expIds) for expId in expIds: print expIdLine(args, t.alphaMetaDb.experiments[expId], t) def main(): parser = argparse.ArgumentParser(description = 'Provides information about tracks and their state in relation to GEO') #parser.add_argument('-u', '--unsubmitted', action='store_true', default=False, help='Do not list samples that have already been submitted') #parser.add_argument('-m', '--missing', action='store_true', default=False, help='List only missing files') - #parser.add_argument('-s', '--size', action='store_true', default=False, help='Show file sizes') + parser.add_argument('-p', '--pessimistic', action='store_true', default=False, help='Only count completed samples') parser.add_argument('-g', '--geo', action='store_true', default=False, help='Shows additional information crawled from the GEO submission page. WARNING: this takes significantly longer, so if used without a composite, this could take a few minutes') parser.add_argument('-t', '--trackPath', help='Overrides the default track path ~/kent/src/hg/makeDb/trackDb/') parser.add_argument('database', help='The database, typically hg19 or mm9') parser.add_argument('composite', nargs='?', help='The composite name, wgEncodeCshlLongRnaSeq for instance') parser.add_argument('expIds', nargs='*', help='Any number of expIds separated by spaces, you can also specify a range by using a hyphen, "140 150 160-170" for instance, or leave blank to specify the entire file') if len(sys.argv) == 1: parser.print_usage() return args = parser.parse_args(sys.argv[1:]) tracks = track.TrackCollection(args.database) if args.composite == None: