1a1b94ac3518c1b9dfdf08f25605e505cf14f359
max
  Fri Nov 6 07:38:57 2020 -0800
adding bigHeat helper script inspired by Hiram's email, no redmine

diff --git src/utils/bigHeat src/utils/bigHeat
new file mode 100755
index 0000000..f5fe86b
--- /dev/null
+++ src/utils/bigHeat
@@ -0,0 +1,181 @@
+#!/usr/bin/env python
+
+import logging, sys, optparse, subprocess, os
+from collections import defaultdict
+from os.path import *
+
+# made with: import cm from matplotlib; (cm.get_cmap("viridis")(x)*255)[:, 0:3].astype(int).tolist()
+pal = [[68, 1, 84], [72, 35, 116], [64, 67, 135], [52, 94, 141], [41, 120, 142], [32, 144, 140], [34, 167, 132], [68, 190, 112], [121, 209, 81], [189, 222, 38], [253, 231, 36]]
+palStrs = [ ",".join([str(x) for x in col]) for col in pal]
+colorCount = len(pal)
+colSteps = 1.0/colorCount
+
+# can be set to something like '/gbdb/wuhCor1/pbm/IgG_Z-score-Healthy_Controls/' if your symlinks are in place
+bbDir = None
+
+# ==== functions =====
+    
+def parseArgs():
+    " setup logging, parse command line arguments and options. -h shows auto-generated help page "
+    parser = optparse.OptionParser("""usage: %prog [options] locationBed locationMatrixFnames chromSizes outDir - create one feature
+            Duplicate BED features and color by them by the values in locationMatrix.
+            Creates new bigBed files in outDir and creates a basic trackDb.ra file there.
+
+    BED file looks like this:
+
+            chr1 1 1000 myGene 0 + 1 1000 0,0,0
+            chr2 1 1000 myGene2 0 + 1 1000 0,0,0
+
+    locationMatrix looks like this:
+
+            gene sample1 sample2 sample3
+            myGene 1 2 3
+            myGene2 0.1 3 10
+
+    This will create a composite with three subtracks (sample1, sample2, sample). Each subtrack will have myGene,
+    and colored in intensity with sample3 more intense than sample1 and sample2. Same for myGene2.""")
+
+    #parser.add_option("-d", "--debug", dest="debug", action="store_true", help="show debug messages")
+    #(options, args) = parser.parse_args()
+
+    parser.add_option("-b", "--bbDir", dest="bbDir", action="store", help="when writing trackDb, prefix all bigDataUrl filenames with this. Use this with the /gbdb directory.")
+    (options, args) = parser.parse_args()
+
+    if args==[]:
+        parser.print_help()
+        exit(1)
+
+    #if options.debug:
+        #logging.basicConfig(level=logging.DEBUG)
+        #logging.getLogger().setLevel(logging.DEBUG)
+    #else:
+    logging.basicConfig(level=logging.INFO)
+    logging.getLogger().setLevel(logging.INFO)
+
+    return args, options
+
+def parseBed(bedFname):
+    beds = {}
+    for line in open(bedFname):
+        row = line.rstrip("\n").split("\t")
+        assert(len(row)>=9)
+        row = row[:9]
+        beds[row[3]] = row
+    return beds
+
+def toBigBed(bedFnames, chromSizes):
+    " convert to bigBed "
+    bbFnames = []
+    for fname in bedFnames:
+        bbFname = splitext(fname)[0]+".bb"
+        bbFnames.append(bbFname)
+        print("Converting %s to %s" % (fname, bbFname))
+        cmd = ["bedSort", fname, fname]
+        assert(subprocess.call(cmd)==0)
+        cmd = ["bedToBigBed", fname, chromSizes, bbFname, "-tab"]
+        assert(subprocess.call(cmd)==0)
+        os.remove(fname)
+    return bbFnames
+
+def makeTrackDb(bbFnames, outDir, parentName):
+    " write a trackDb file to outDir "
+    global bbDir
+    tdbFn = join(outDir, "trackDb.ra")
+    tdbFh = open(tdbFn, "w")
+
+    label = parentName.replace("_", " ")
+    tdbFh.write("track %s\n" % parentName)
+    tdbFh.write("shortLabel %s\n" % label)
+    tdbFh.write("longLabel %s\n" % label)
+    tdbFh.write("type bed 9\n")
+    tdbFh.write("compositeTrack on\n")
+    tdbFh.write("visibility dense\n")
+    tdbFh.write("labelOnFeature on\n")
+    tdbFh.write("itemRgb on\n")
+    tdbFh.write("\n")
+
+    for bbFname in bbFnames:
+        trackName = splitext(basename(bbFname))[0]
+        trackLabel = trackName.replace("_", " ")
+        fullTrackName = parentName+"_"+trackName # avoid subtrack name clashes
+        if bbDir:
+            bbPath = join(bbDir, basename(bbFname))
+        else:
+            bbPath = bbFname
+        tdbFh.write("    track %s\n" % fullTrackName)
+        tdbFh.write("    shortLabel %s\n" % trackLabel)
+        tdbFh.write("    longLabel %s\n" % trackLabel)
+        tdbFh.write("    type bigBed 9\n")
+        tdbFh.write("    bigDataUrl %s\n" % bbPath)
+        tdbFh.write("    parent %s on\n" % parentName)
+        tdbFh.write("\n")
+
+    tdbFh.close()
+
+def bigHeat(beds, fname, chromSizes, outDir):
+    " create bed files, convert them to bigBeds and make trackDb.ra in the directory "
+    global bbDir
+    baseIn = basename(fname).split(".")[0]
+    #outDir = join(outDir, baseIn)
+    if not isdir(outDir):
+        os.makedirs(outDir)
+
+    print("Processing %s to output directory %s" % (fname, outDir))
+    ofhs = {}
+    for line in open(fname):
+        row = line.rstrip("\n").split("\t")
+
+        if row[0]=="Sample":
+            sampleNames = row[1:]
+            # create output file handles
+            for sn in sampleNames:
+                ofn = join(outDir, sn+".bed")
+                ofhs[sn] = open(ofn, "w")
+            continue
+        if row[0].startswith("Seq.") or "protein" in row[0] or row[0]=="":
+            continue
+
+        name = row[0]
+        values = row[1:]
+        assert(len(values)==len(sampleNames))
+
+        nums = [float(x) for x in values]
+        minX = min(nums)
+        maxX = max(nums)
+        minMax = maxX - minX
+        ratios = [(x - minX)/minMax for x in nums]
+        ratios = [min(0.999, x) for x in ratios] # make sure that 1.0 doesn't appear
+        colIdx = [int(float(x) / colSteps) for x in ratios]
+        colors = [palStrs[x] for x in colIdx]
+
+        bed = beds[name]
+        for sampleName, rgbCol in zip(sampleNames, colors):
+            bed[8] = rgbCol
+            ofh = ofhs[sampleName]
+            ofh.write("\t".join(bed))
+            ofh.write("\n")
+
+    bedFnames = []
+    for ofh in ofhs.values():
+        bedFnames.append(ofh.name)
+        ofh.close()
+
+    bbFnames = toBigBed(bedFnames, chromSizes)
+    makeTrackDb(bbFnames, outDir, baseIn)
+
+# ----------- main --------------
+def main():
+    global bbDir
+    args, options = parseArgs()
+
+    bedFname = args[0]
+    matrixFname = args[1]
+    chromSizes = args[2]
+    outDir = args[3]
+
+    bbDir = options.bbDir
+
+    beds = parseBed(bedFname)
+    bigHeat(beds, matrixFname, chromSizes, outDir)
+
+main()