464aa35bb5735dfb8f565621dbde8b3b2ead1581 mmaddren Thu May 10 14:53:09 2012 -0700 fixed ucscGb and some of the more important scripts using it since they broke during the move to the new library structure diff --git python/programs/viewTrack/viewTrack python/programs/viewTrack/viewTrack index ba460ea..111768b 100755 --- python/programs/viewTrack/viewTrack +++ python/programs/viewTrack/viewTrack @@ -1,225 +1,228 @@ #!/usr/bin/env python2.7 import sys, os, shutil, argparse, urllib2, re -from ucscgenomics import ra, soft, track, styles, geo +from ucscGb.gbData.ra import raFile +from ucscGb.encode import track +from ucscGb.encode import styles +from ucscGb.externalData.geo import submission def filesize(val): if val > 1099511627776: return str(round(float(val) / 1099511627776, 2)) + 'TB' if val > 1073741824: return str(round(float(val) / 1073741824, 2)) + 'GB' if val > 1048576: return str(round(float(val) / 1048576, 2)) + 'MB' if val > 1024: return str(round(float(val) / 1024, 2)) + 'KB' else: return str(val) + 'B' def printTrackRefLine(): print 'Track'.ljust(25) + 'GEO Status'.ljust(12) + '# Files'.ljust(10) + 'Data Type'.ljust(16) + 'Releases' def trackLine(args, t): nametext = t.name.ljust(25) try: sub = 0 tot = 0 wantSubmit = 1 for exp in t.alphaMetaDb.experiments.iterkeys(): submitted = 0 for stanza in t.alphaMetaDb.experiments[exp]: if 'geoSampleAccession' in stanza: submitted = 1 break if submitted == 1: sub += 1 tot += 1 #for stanza in t.alphaMetaDb.filter2(lambda s: s['objType'] != 'composite').itervalues(): # if 'geoSampleAccession' in stanza: # sub += 1 # tot += 1 #subtext = '%d%%' % int((float(sub) / tot) * 100) subtext = ('%d/%d' % (sub, tot)).ljust(10) #subtext = ' ' * (4 - len(subtext)) + subtext filestext = '%d' % tot filestext = ' ' * (4 - len(filestext)) + filestext + ' files' if sub == 0: subtext = styles.style(subtext, 'red') elif sub < tot: subtext = styles.style(subtext, 'yellow') else: subtext = styles.style(subtext, 'green') wantSubmit = 0 status = styles.style('Public ', 'green') #just randomization for testing, replace with # /cluster/home/mmaddren/kent/src/hg/makeDb/trackDb/human/hg19/trackDb.wgEncode.ra if (tot % 2 == 0): status = styles.style('Unreleased', 'blue') wantSubmit = 0 datatype = styles.style('Not Found'.ljust(16), 'red') if t.alphaMetaDb.dataType != None: datatype = t.alphaMetaDb.dataType.name if t.alphaMetaDb.dataType.type == 'MicroArray': datatype = styles.style(datatype.ljust(16), 'yellow') elif t.alphaMetaDb.dataType.valid: datatype = styles.style(datatype.ljust(16), 'green') else: if t.alphaMetaDb.dataType.shouldSubmit: datatype = styles.style(datatype.ljust(16), 'red') else: datatype = styles.style(datatype.ljust(16), 'blue') #subtext = '' wantSubmit = 0 if datatype == '' and len(t.alphaMetaDb.experiments) == 0: datatype = styles.style('no expIds'.ljust(16), 'red') - if wantSubmit: - nametext = styles.style(nametext, 'green') + #if wantSubmit: + # nametext = styles.style(nametext, 'green') geouptodate = '' if args.geo and t.geo != None: exps = dict() local = 0 offsite = 0 mismatch = 0 matched = 0 for expId in t.alphaMetaDb.experiments.iterkeys(): k = t.alphaMetaDb.experiments[expId].title exps[k] = t.alphaMetaDb.experiments[expId] if k in t.geo.accessions.iterkeys(): happened = 0 for stanza in exps[k]: if 'geoSampleAccession' in stanza and stanza['geoSampleAccession'] != t.geo.accessions[k]: mismatch += 1 happened = 1 break if happened == 0: for stanza in exps[k]: if 'geoSampleAccession' not in stanza: offsite += 1 happened = 1 break if happened == 0: matched += 1 else: local += 1 geouptodate = '%d GSMs, %d expIds, %d match, %d local, %d on geo, %d mismatch' % (len(t.geo.accessions.keys()), len(t.alphaMetaDb.experiments.keys()), matched, local, offsite, mismatch) releases = '' for release in t.releaseObjects: if release.onPublic: releases += styles.style(str(release.index), 'green') else: releases += styles.style(str(release.index), 'red') if 'geoSeriesAccession' in t.alphaMetaDb.compositeStanza: return nametext + styles.style(t.alphaMetaDb.compositeStanza['geoSeriesAccession'].ljust(12), 'green') + subtext + datatype + releases + ' ' + geouptodate else: return nametext + styles.style('Unsubmitted'.ljust(12), 'blue') + subtext + datatype + releases except KeyError as e: return styles.style(nametext, 'red') + styles.style('Error ', 'red') def expIdLine(args, expId, t): samplefiles = 0 samplesize = 0 submittedfiles = 0 for stanza in expId: for fname in stanza['fileName'].split(','): if fname in t.files: file = t.files[fname] samplesize = samplesize + file.size samplefiles = samplefiles + 1 if 'geoSampleAccession' in stanza: submittedfiles = submittedfiles + 1 if expId.title == None: title = styles.style('Inconsistent', 'red') else: title = expId.title return expId.name.rjust(12) + ' ' + title + ' [%s]' % filesize(samplesize) + ' - %d files' % samplefiles def displayAll(args, tracks): display = dict() for t in tracks.itervalues(): display[t.name] = trackLine(args, t) keys = display.keys() keys.sort() printTrackRefLine() for k in keys: print display[k] def displayTrack(args, t, expIds=None): print trackLine(args, t) if expIds == None: expIds = map(int, t.alphaMetaDb.experiments.keys()) expIds.sort() expIds = map(str, expIds) for expId in expIds: print expIdLine(args, t.alphaMetaDb.experiments[expId], t) def main(): parser = argparse.ArgumentParser(description = 'Provides information about tracks and their state in relation to GEO') #parser.add_argument('-u', '--unsubmitted', action='store_true', default=False, help='Do not list samples that have already been submitted') #parser.add_argument('-m', '--missing', action='store_true', default=False, help='List only missing files') #parser.add_argument('-s', '--size', action='store_true', default=False, help='Show file sizes') parser.add_argument('-g', '--geo', action='store_true', default=False, help='Shows additional information crawled from the GEO submission page. WARNING: this takes significantly longer, so if used without a composite, this could take a few minutes') parser.add_argument('-t', '--trackPath', help='Overrides the default track path ~/kent/src/hg/makeDb/trackDb/') parser.add_argument('database', help='The database, typically hg19 or mm9') parser.add_argument('composite', nargs='?', help='The composite name, wgEncodeCshlLongRnaSeq for instance') parser.add_argument('expIds', nargs='*', help='Any number of expIds separated by spaces, you can also specify a range by using a hyphen, "140 150 160-170" for instance, or leave blank to specify the entire file') if len(sys.argv) == 1: parser.print_usage() return args = parser.parse_args(sys.argv[1:]) tracks = track.TrackCollection(args.database) if args.composite == None: if args.geo: for t in tracks.itervalues(): t.geo = None try: if 'geoSeriesAccession' in t.alphaMetaDb.compositeStanza: - t.geo = geo.Submission(t.alphaMetaDb.compositeStanza['geoSeriesAccession']) + t.geo = submission.Submission(t.alphaMetaDb.compositeStanza['geoSeriesAccession']) except KeyError: pass displayAll(args, tracks) else: if args.expIds == None or len(args.expIds) == 0: displayTrack(args, tracks[args.composite]) else: ids = list() for id in args.expIds: if '-' in id: start, end = id.split('-', 1) ids.extend(range(int(start), int(end) + 1)) else: ids.append(int(id)) if args.geo: tracks[args.composite].geo = None if 'geoSeriesAccession' in tracks[args.composite].alphaMetaDb.compositeTrack: - tracks[args.composite].geo = geo.Submission(tracks[args.composite].alphaMetaDb.compositeTrack['geoSeriesAccession']) + tracks[args.composite].geo = submission.Submission(tracks[args.composite].alphaMetaDb.compositeTrack['geoSeriesAccession']) displayTrack(args, tracks[args.composite], ids) if __name__ == '__main__': main() \ No newline at end of file