2717829315a39adf14be0ad4745acf5a9d17583e max Tue Feb 20 10:48:12 2024 -0800 updates to hubMake diff --git src/utils/hubMake/hubMake src/utils/hubMake/hubMake index 13b532b..9d8a9c3 100755 --- src/utils/hubMake/hubMake +++ src/utils/hubMake/hubMake @@ -22,70 +22,118 @@ parser.add_option("-o", "--outDir", dest="outDir", action="store", help="Input directory where hub.txt file is created. Default is same as input directory.") parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show verbose debug messages") (options, args) = parser.parse_args() if len(args)==0: parser.print_help() exit(1) if options.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) return args, options -def parseMetaTsv(fname, meta): - " parse a meta.tsv file and return as a dict of trackName -> dict of key->val " +def errAbort(msg): + " print and abort) " + logging.error(msg) + sys.exit(1) + +def parseMetaRa(fname): + """parse tracks.ra or tracks.txt and return as a dict of trackName -> dict of key->val """ + logging.debug("Reading %s as .ra" % fname) + trackName = None + stanzaData = {} + ret = {} + for line in open(fname): + line = line.strip() + if line.startswith("#"): + continue + if line=="": + if len(stanzaData)==0: + # double newline + continue + if trackName is None: + errAbort("File %s has a stanza without a track name" % fname) + if trackName in ret: + errAbort("File %s contains two stanzas with the same track name '%s' " % trackName) + + ret[trackName] = stanzaData + stanzaData = {} + trackName = None + continue + + key, val = line.split(" ", maxsplit=1) + if key == "track": + trackName = val + continue + else: + stanzaData[key] = val + + if len(stanzaData)!=0: + ret[trackName] = stanzaData + logging.debug("Got %s from .ra" % str(ret)) + return ret + +def parseMetaTsv(fname): + " parse a tracks.tsv file and return as a dict of trackName -> dict of key->val " headers = None meta = {} logging.debug("Parsing %s as tab-sep" % fname) for line in open(fname): row = line.rstrip("\r\n").split("\t") if headers is None: assert(line.startswith("track\t")) headers = row continue assert(len(row)==len(headers)) key = row[0] rowDict = {} for header, val in zip(headers[1:], row[1:]): rowDict[header] = val #row = {k:v for k,v in zip(headers, fs)} meta[key] = rowDict return meta -def parseMetaJson(fname, meta): +def parseMetaJson(fname): " parse a json file and merge it into meta and return " logging.debug("Reading %s as json" % fname) newMeta = json.load(open(fname)) - meta.update(newMeta) - return meta + return newMeta def parseMeta(inDir): " parse a tab-sep file with headers and return a dict firstField -> dictionary " fname = join(inDir, "tracks.tsv") meta = {} if isfile(fname): - meta = parseMetaTsv(fname, meta) + tsvMeta = parseMetaTsv(fname) + meta = allMetaOverride(meta, tsvMeta) fname = join(inDir, "tracks.json") if isfile(fname): - meta = parseMetaJson(fname, meta) + jsonMeta = parseMetaJson(fname) + meta = allMetaOverride(meta, jsonMeta) + fname = join(inDir, "tracks.ra") + if isfile(fname): + raMeta = parseMetaRa(fname) + meta = allMetaOverride(meta, raMeta) + + logging.debug("Got overrides from %s: %s" % (inDir, str(meta))) return meta def writeHubGenome(ofh, db, inMeta): " create a hub.txt and genomes.txt file, hub.txt is just a template " meta = inMeta.get("hub", {}) ofh.write("hub autoHub\n") ofh.write("shortLabel %s\n" % meta.get("shortLabel", "Auto-generated hub")) ofh.write("longLabel %s\n" % meta.get("longLabel", "Auto-generated hub")) #ofh.write("genomesFile genomes.txt\n") if "descriptionUrl" in meta: ofh.write("descriptionUrl %s\n" % meta["descriptionUrl"]) ofh.write("email %s\n" % meta.get("email", "yourEmail@example.com")) ofh.write("useOneFile on\n\n") ofh.write("genome %s\n\n" % db) @@ -162,168 +210,199 @@ pairShare = pairCount / len(fnameDict) return ( pairShare > 0.8 ) def writeLn(ofh, spaceCount, line): "write line to ofh, with spaceCount before it " ofh.write("".join([" "]*spaceCount)) ofh.write(line) ofh.write("\n") def writeStanza(ofh, indent, tdb): " write a stanza given a tdb key-val dict " track = tdb["track"] shortLabel = tdb.get("shortLabel", track) - visibility = tdb.get("visibility", "hide") + visibility = tdb.get("visibility", "pack") longLabel = tdb.get("longLabel", shortLabel) trackType = tdb["type"] writeLn(ofh, indent, "track %s" % track) writeLn(ofh, indent, "shortLabel %s" % shortLabel) if longLabel: writeLn(ofh, indent, "longLabel %s" % longLabel) if "parent" in tdb: writeLn(ofh, indent, "parent %s" % tdb["parent"]) writeLn(ofh, indent, "type %s" % trackType) writeLn(ofh, indent, "visibility %s" % visibility) if "bigDataUrl" in tdb: writeLn(ofh, indent, "bigDataUrl %s" % tdb["bigDataUrl"]) for key, val in tdb.items(): if key in ["track", "shortLabel", "longLabel", "type", "bigDataUrl", "visibility", "parent"]: continue writeLn(ofh, indent, "%s %s" % (key, val)) ofh.write("\n") def metaOverride(tdb, meta): - " for every key in tdb, check if meta contains a value for that and overwrite it in tdb with that value " + " override track info for one single track, from meta into tdb " if not tdb["track"] in meta: return trackMeta = meta[tdb["track"]] - for key, val in tdb.items(): - if key in trackMeta: + for key, val in trackMeta.items(): tdb[key] = trackMeta[key] +def allMetaOverride(tdb, meta): + " override track info for all tracks, from meta into tdb " + if meta is None: + return tdb + + for trackName in meta: + trackMeta = meta[trackName] + if trackName not in tdb: + tdb[trackName] = {} + + trackTdb = tdb[trackName] + + for key, val in trackMeta.items(): + trackTdb[key] = val + + return tdb + def writeTdb(inDir, dirDict, dirType, tdbDir, ofh): " given a dict with basename -> type -> filenames, write track entries to ofh " global compCount fnameDict = dirDict[dirType] for parentName, typeDict in fnameDict.items(): if parentName is None: # top level tracks subDir = inDir else: # container tracks subDir = join(inDir, parentName) - parentMeta = parseMeta(subDir) + parentMeta = parseMeta(inDir) indent = 0 parentHasViews = False if dirType=="comps": tdb = { "track" : parentName, "shortLabel": parentName, "visibility" : "dense", "compositeTrack" : "on", "autoScale" : "group", "type" : "bed 4" } metaOverride(tdb, parentMeta) parentHasViews = mostFilesArePaired(typeDict) if parentHasViews: tdb["subGroup1"] = "view Views PK=Peaks SIG=Signals" logging.info("Container track %s has >80%% of paired files, activating views" % parentName) writeStanza(ofh, indent, tdb) indent = 4 if parentHasViews: + groupMeta = parseMeta(subDir) tdbViewPeaks = { "track" : parentName+"ViewPeaks", "shortLabel" : parentName+" Peaks", "parent" : parentName, "view" : "PK", "visibility" : "dense", "type" : "bigBed", "scoreFilter" : "off", "viewUi" : "on" } metaOverride(tdbViewPeaks, parentMeta) writeStanza(ofh, indent, tdbViewPeaks) tdbViewSig = { "track" : parentName+"ViewSignal", "shortLabel" : parentName+" Signal", "parent" : parentName, "view" : "SIG", "visibility" : "dense", "type" : "bigWig", "viewUi" : "on" } metaOverride(tdbViewSig, parentMeta) writeStanza(ofh, indent, tdbViewSig) + else: + groupMeta = parseMeta(subDir) for trackBase, typeFnames in typeDict.items(): for fileType, absFnames in typeFnames.items(): assert(len(absFnames)==1) # for now, not sure what to do when we get multiple basenames of the same file type absFname = absFnames[0] fileBase = basename(absFname) relFname = relpath(absFname, tdbDir) labelSuff = "" if parentHasViews: if fileType=="bigWig": labelSuff = " Signal" elif fileType=="bigBed": labelSuff = " Peaks" else: assert(False) # views and non-bigWig/Bed are not supported yet? if parentName is not None: parentPrefix = parentName+"-" else: parentPrefix = "" + trackName = parentPrefix+trackBase+"-"+fileType tdb = { - "track" : parentPrefix+trackBase+"-"+fileType, + "track" : trackName, "shortLabel": trackBase+labelSuff, "longLabel": trackBase+labelSuff, "visibility" : "dense", "type" : fileType, "bigDataUrl" : relFname, } if parentName: tdb["parent"] = parentName if parentHasViews: + onOff = "on" + if trackName in groupMeta and "visibility" in groupMeta[trackName]: + vis = groupMeta[trackName]["visibility"] + if vis=="hide": + onOff = "off" + del tdb["visibility"] + if fileType=="bigBed": - tdb["parent"] = parentName+"ViewPeaks"+" on" + tdb["parent"] = parentName+"ViewPeaks"+" "+onOff tdb["subGroups"] = "view=PK" else: - tdb["parent"] = parentName+"ViewSignal"+" on" + tdb["parent"] = parentName+"ViewSignal"+" "+onOff tdb["subGroups"] = "view=SIG" - metaOverride(tdb, parentMeta) + metaOverride(tdb, groupMeta) + + if trackName in groupMeta and "visibility" in groupMeta[trackName]: + del tdb["visibility"] + writeStanza(ofh, indent, tdb) def makeTrackHub(db, options): """ get writeLn(ofh, indent, d .bb files under dirName and create a trackDb.txt for them""" inDir = "." if options.inDir: inDir = options.inDir tdbDir = inDir if options.outDir: tdbDir = options.outDir #db = args[0]