35c0685ff195fd0332ba86828c6230a34eaaaacb mmaddren Thu Apr 19 16:44:47 2012 -0700 added track viewing tools for GEO so that venkat can use it diff --git python/programs/viewTrack/viewTrack python/programs/viewTrack/viewTrack new file mode 100755 index 0000000..ba460ea --- /dev/null +++ python/programs/viewTrack/viewTrack @@ -0,0 +1,225 @@ +#!/usr/bin/env python2.7 + +import sys, os, shutil, argparse, urllib2, re +from ucscgenomics import ra, soft, track, styles, geo + +def filesize(val): + if val > 1099511627776: + return str(round(float(val) / 1099511627776, 2)) + 'TB' + if val > 1073741824: + return str(round(float(val) / 1073741824, 2)) + 'GB' + if val > 1048576: + return str(round(float(val) / 1048576, 2)) + 'MB' + if val > 1024: + return str(round(float(val) / 1024, 2)) + 'KB' + else: + return str(val) + 'B' + + +def printTrackRefLine(): + print 'Track'.ljust(25) + 'GEO Status'.ljust(12) + '# Files'.ljust(10) + 'Data Type'.ljust(16) + 'Releases' + +def trackLine(args, t): + nametext = t.name.ljust(25) + try: + sub = 0 + tot = 0 + wantSubmit = 1 + + for exp in t.alphaMetaDb.experiments.iterkeys(): + submitted = 0 + for stanza in t.alphaMetaDb.experiments[exp]: + if 'geoSampleAccession' in stanza: + submitted = 1 + break + if submitted == 1: + sub += 1 + tot += 1 + + #for stanza in t.alphaMetaDb.filter2(lambda s: s['objType'] != 'composite').itervalues(): + # if 'geoSampleAccession' in stanza: + # sub += 1 + # tot += 1 + + #subtext = '%d%%' % int((float(sub) / tot) * 100) + subtext = ('%d/%d' % (sub, tot)).ljust(10) + #subtext = ' ' * (4 - len(subtext)) + subtext + filestext = '%d' % tot + filestext = ' ' * (4 - len(filestext)) + filestext + ' files' + if sub == 0: + subtext = styles.style(subtext, 'red') + elif sub < tot: + subtext = styles.style(subtext, 'yellow') + else: + subtext = styles.style(subtext, 'green') + wantSubmit = 0 + + status = styles.style('Public ', 'green') + #just randomization for testing, replace with + # /cluster/home/mmaddren/kent/src/hg/makeDb/trackDb/human/hg19/trackDb.wgEncode.ra + if (tot % 2 == 0): + status = styles.style('Unreleased', 'blue') + wantSubmit = 0 + + datatype = styles.style('Not Found'.ljust(16), 'red') + if t.alphaMetaDb.dataType != None: + datatype = t.alphaMetaDb.dataType.name + if t.alphaMetaDb.dataType.type == 'MicroArray': + datatype = styles.style(datatype.ljust(16), 'yellow') + elif t.alphaMetaDb.dataType.valid: + datatype = styles.style(datatype.ljust(16), 'green') + else: + if t.alphaMetaDb.dataType.shouldSubmit: + datatype = styles.style(datatype.ljust(16), 'red') + else: + datatype = styles.style(datatype.ljust(16), 'blue') + #subtext = '' + wantSubmit = 0 + if datatype == '' and len(t.alphaMetaDb.experiments) == 0: + datatype = styles.style('no expIds'.ljust(16), 'red') + + if wantSubmit: + nametext = styles.style(nametext, 'green') + + geouptodate = '' + if args.geo and t.geo != None: + exps = dict() + local = 0 + offsite = 0 + mismatch = 0 + matched = 0 + for expId in t.alphaMetaDb.experiments.iterkeys(): + k = t.alphaMetaDb.experiments[expId].title + exps[k] = t.alphaMetaDb.experiments[expId] + if k in t.geo.accessions.iterkeys(): + happened = 0 + for stanza in exps[k]: + if 'geoSampleAccession' in stanza and stanza['geoSampleAccession'] != t.geo.accessions[k]: + mismatch += 1 + happened = 1 + break + if happened == 0: + for stanza in exps[k]: + if 'geoSampleAccession' not in stanza: + offsite += 1 + happened = 1 + break + if happened == 0: + matched += 1 + else: + local += 1 + geouptodate = '%d GSMs, %d expIds, %d match, %d local, %d on geo, %d mismatch' % (len(t.geo.accessions.keys()), len(t.alphaMetaDb.experiments.keys()), matched, local, offsite, mismatch) + + releases = '' + for release in t.releaseObjects: + if release.onPublic: + releases += styles.style(str(release.index), 'green') + else: + releases += styles.style(str(release.index), 'red') + + if 'geoSeriesAccession' in t.alphaMetaDb.compositeStanza: + return nametext + styles.style(t.alphaMetaDb.compositeStanza['geoSeriesAccession'].ljust(12), 'green') + subtext + datatype + releases + ' ' + geouptodate + else: + return nametext + styles.style('Unsubmitted'.ljust(12), 'blue') + subtext + datatype + releases + except KeyError as e: + return styles.style(nametext, 'red') + styles.style('Error ', 'red') + +def expIdLine(args, expId, t): + + samplefiles = 0 + samplesize = 0 + submittedfiles = 0 + for stanza in expId: + for fname in stanza['fileName'].split(','): + if fname in t.files: + file = t.files[fname] + samplesize = samplesize + file.size + samplefiles = samplefiles + 1 + + if 'geoSampleAccession' in stanza: + submittedfiles = submittedfiles + 1 + + if expId.title == None: + title = styles.style('Inconsistent', 'red') + else: + title = expId.title + + return expId.name.rjust(12) + ' ' + title + ' [%s]' % filesize(samplesize) + ' - %d files' % samplefiles + +def displayAll(args, tracks): + + display = dict() + for t in tracks.itervalues(): + display[t.name] = trackLine(args, t) + + keys = display.keys() + keys.sort() + + printTrackRefLine() + for k in keys: + print display[k] + +def displayTrack(args, t, expIds=None): + print trackLine(args, t) + + if expIds == None: + expIds = map(int, t.alphaMetaDb.experiments.keys()) + expIds.sort() + expIds = map(str, expIds) + + for expId in expIds: + print expIdLine(args, t.alphaMetaDb.experiments[expId], t) + +def main(): + + parser = argparse.ArgumentParser(description = 'Provides information about tracks and their state in relation to GEO') + #parser.add_argument('-u', '--unsubmitted', action='store_true', default=False, help='Do not list samples that have already been submitted') + #parser.add_argument('-m', '--missing', action='store_true', default=False, help='List only missing files') + #parser.add_argument('-s', '--size', action='store_true', default=False, help='Show file sizes') + parser.add_argument('-g', '--geo', action='store_true', default=False, help='Shows additional information crawled from the GEO submission page. WARNING: this takes significantly longer, so if used without a composite, this could take a few minutes') + parser.add_argument('-t', '--trackPath', help='Overrides the default track path ~/kent/src/hg/makeDb/trackDb/') + parser.add_argument('database', help='The database, typically hg19 or mm9') + parser.add_argument('composite', nargs='?', help='The composite name, wgEncodeCshlLongRnaSeq for instance') + parser.add_argument('expIds', nargs='*', help='Any number of expIds separated by spaces, you can also specify a range by using a hyphen, "140 150 160-170" for instance, or leave blank to specify the entire file') + + if len(sys.argv) == 1: + parser.print_usage() + return + + args = parser.parse_args(sys.argv[1:]) + + tracks = track.TrackCollection(args.database) + + if args.composite == None: + if args.geo: + for t in tracks.itervalues(): + t.geo = None + try: + if 'geoSeriesAccession' in t.alphaMetaDb.compositeStanza: + t.geo = geo.Submission(t.alphaMetaDb.compositeStanza['geoSeriesAccession']) + except KeyError: + pass + displayAll(args, tracks) + else: + if args.expIds == None or len(args.expIds) == 0: + displayTrack(args, tracks[args.composite]) + else: + ids = list() + for id in args.expIds: + if '-' in id: + start, end = id.split('-', 1) + ids.extend(range(int(start), int(end) + 1)) + else: + ids.append(int(id)) + if args.geo: + tracks[args.composite].geo = None + if 'geoSeriesAccession' in tracks[args.composite].alphaMetaDb.compositeTrack: + tracks[args.composite].geo = geo.Submission(tracks[args.composite].alphaMetaDb.compositeTrack['geoSeriesAccession']) + displayTrack(args, tracks[args.composite], ids) + + + + +if __name__ == '__main__': + main() + \ No newline at end of file