7a8fdcbb9b6b28ffdeee0a1da8bb83a3c68c7ed8
max
  Fri Jun 4 06:03:33 2021 -0700
fixing up makedocs and track docs for revel track, refs #27142

diff --git src/hg/makeDb/revel/revelToWig.py src/hg/makeDb/revel/revelToWig.py
new file mode 100644
index 0000000..c12949c
--- /dev/null
+++ src/hg/makeDb/revel/revelToWig.py
@@ -0,0 +1,100 @@
+import subprocess, sys
+from collections import defaultdict
+
+# based on caddToWig.py in kent/src/hg/makeDb/cadd
+# two arguments: input filename and db (one of: hg19, hg38)
+
+def inputLineChunk(fname, db):
+    " yield all values at consecutive positions as a tuple (chrom, pos, list of (nucl, phred value)) "
+    # chr,hg19_pos,grch38_pos,ref,alt,aaref,aaalt,REVEL
+    values = []
+    firstChrom = None
+    firstPos = None
+    lastChrom = None
+    lastPos = None
+    donePos = {}
+
+    assert (db in ["hg19", "hg38"])
+    doHg19 = (db=="hg19")
+
+    #fname = "revel_grch38_all_chromosomes.csv.gz"
+    if fname.endswith(".gz"):
+        ifh = subprocess.Popen(['zcat', fname], stdout=subprocess.PIPE, encoding="ascii").stdout
+    else:
+        ifh = open(fname)
+
+    for line in ifh:
+        if line.startswith("chr"):
+            continue
+        row = line.rstrip("\n").split(",")[:8]
+
+        chrom, hg19Pos, hg38Pos, ref, alt, aaRef, aaAlt, revel = row
+        if doHg19:
+            pos = hg19Pos
+        else:
+            pos = hg38Pos
+
+        if not doHg19 and pos==".":
+            continue
+
+        pos = int(pos) # wiggle ascii is 1-based AAARRGH!!
+
+        # the file has duplicate values in the hg38 column, but for different hg19 positions!
+        if not doHg19:
+            if pos in donePos and hg19Pos!=donePos[pos]:
+                continue
+            hg19Pos = int(hg19Pos)
+            donePos[pos] = hg19Pos
+
+        chrom = "chr"+chrom
+        revel = float(revel)
+
+
+        if firstPos is None:
+            firstChrom = chrom
+            firstPos = pos
+
+        if chrom != lastChrom or pos-lastPos > 1:
+            yield firstChrom, firstPos, values
+            firstPos, firstChrom = pos, chrom
+            values = []
+
+            if chrom != lastChrom:
+                lastPos = None
+                donePos = {}
+
+        if lastPos is None or pos-lastPos > 0:
+            values.append([0.0,0.0,0.0,0.0])
+
+        nuclIdx = "ACGT".find(alt)
+        values[-1][nuclIdx] = revel
+
+        lastPos = pos
+        lastChrom = chrom
+    # last line of file
+    yield firstChrom, firstPos, values
+
+fname = sys.argv[1]
+db = sys.argv[2]
+
+outFhs = {
+        "A" : open("a.wig", "w"),
+        "C" : open("c.wig", "w"),
+        "T" : open("t.wig", "w"),
+        "G" : open("g.wig", "w")
+        }
+
+for chrom, pos, nuclValues in inputLineChunk(fname, db):
+    if len(nuclValues)==0:
+        continue
+
+    for ofh in outFhs.values():
+        ofh.write("fixedStep chrom=%s span=1 step=1 start=%d\n" % (chrom, pos))
+
+    for nuclVals in nuclValues:
+        for nucIdx in (0,1,2,3):
+            alt = "ACGT"[nucIdx]
+            ofh = outFhs[alt]
+            revel = nuclVals[nucIdx]
+            ofh.write(str(revel))
+            ofh.write("\n")