f350ebff8f2cc1e0772032e59e926b5e45b374cd
lrnassar
  Tue May 5 16:18:28 2026 -0700
Adding ClinPred missense pathogenicity score track on hg19 and hg38. refs #37510

ClinPred (Alirezaie et al, AJHG 2018) joins the predictionScoresSuper supertrack
as a composite of four bigWigs, one per alternate base, with a per-position color
overlay (red for score >= 0.5 likely pathogenic, blue for < 0.5 likely benign).
Adds clinPredToWig.py to convert the upstream score table to wig, a clinPred
branch in makeWigColorByRevelCadd.py for the color overlay step, and reciprocal
relatedTracks entries to REVEL, CADD, PrimateAI-3D, and AlphaMissense. Also adds
Display Conventions and Credits entries in predictionScoresSuper.html for
ClinPred, PrimateAI-3D, and PromoterAI.

diff --git src/hg/makeDb/clinPred/clinPredToWig.py src/hg/makeDb/clinPred/clinPredToWig.py
new file mode 100644
index 00000000000..0a8fa304711
--- /dev/null
+++ src/hg/makeDb/clinPred/clinPredToWig.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+"""Convert a ClinPred all-missense score table into four per-ALT wig files.
+
+Input is the tab-separated table distributed by the ClinPred authors:
+    Chr  Start  Ref  Alt  ClinPred_Score
+with 1-based positions, no 'chr' prefix, and one row per scored missense
+SNV. Three rows are present at every scored exome position (one per
+non-reference base).
+
+Output is a.wig, c.wig, g.wig, t.wig in the current directory.
+
+Convention: at every position covered by at least one ClinPred score,
+the reference base and any synonymous ALTs (those with no row in the
+input) are emitted as 0.0 on their respective tracks; positions with no
+exome coverage are emitted as gaps. This lets the user distinguish
+"reference / no missense effect" from "no data here".
+"""
+
+import sys
+import numpy as np
+
+NUC_IDX = {"A": 0, "C": 1, "G": 2, "T": 3}
+OUT_NAMES = ["a.wig", "c.wig", "g.wig", "t.wig"]
+
+
+def parseChromSizes(fname):
+    sizes = {}
+    for line in open(fname):
+        chrom, size = line.strip().split()
+        sizes[chrom] = int(size)
+    return sizes
+
+
+def initArrays(chromLen):
+    return [np.full(chromLen, -1, dtype=float) for _ in range(4)]
+
+
+def writeChrom(chrom, arrays, outFhs):
+    arrSum = np.sum(arrays, axis=0)
+    chromLen = arrays[0].size
+    for nucIdx in range(4):
+        arr = arrays[nucIdx]
+        ofh = outFhs[nucIdx]
+        stretch = []
+        stretchStart = 0
+        for i in range(chromLen):
+            val = arr[i]
+            hasData = (arrSum[i] != -4)
+            if hasData:
+                if not stretch:
+                    stretchStart = i
+                stretch.append(0.0 if val == -1 else val)
+            else:
+                if stretch:
+                    ofh.write("fixedStep chrom=%s span=1 step=1 start=%d\n"
+                              % (chrom, stretchStart + 1))
+                    for v in stretch:
+                        ofh.write("%s\n" % v)
+                    ofh.write("\n")
+                stretch = []
+        if stretch:
+            ofh.write("fixedStep chrom=%s span=1 step=1 start=%d\n"
+                      % (chrom, stretchStart + 1))
+            for v in stretch:
+                ofh.write("%s\n" % v)
+            ofh.write("\n")
+
+
+def main():
+    if len(sys.argv) != 3:
+        sys.stderr.write("usage: clinPredToWig.py chrom.sizes inputFile\n")
+        sys.stderr.write("  Writes a.wig, c.wig, g.wig, t.wig in current directory.\n")
+        sys.exit(1)
+
+    chromSizesFname = sys.argv[1]
+    inFname = sys.argv[2]
+    chromSizes = parseChromSizes(chromSizesFname)
+
+    outFhs = {i: open(name, "w") for i, name in enumerate(OUT_NAMES)}
+
+    lastChrom = None
+    arrays = None
+
+    for line in open(inFname):
+        if line.startswith("Chr\t"):
+            continue
+        chromName, startStr, ref, alt, scoreStr = line.rstrip("\n").split("\t")
+        ucscChrom = "chr" + chromName
+
+        if ucscChrom != lastChrom:
+            if lastChrom is not None and arrays is not None:
+                writeChrom(lastChrom, arrays, outFhs)
+            if ucscChrom not in chromSizes:
+                sys.stderr.write("warning: %s not in chrom.sizes, skipping\n" % ucscChrom)
+                arrays = None
+                lastChrom = ucscChrom
+                continue
+            sys.stderr.write("loading %s\n" % ucscChrom)
+            arrays = initArrays(chromSizes[ucscChrom])
+            lastChrom = ucscChrom
+
+        if arrays is None:
+            continue
+
+        if alt not in NUC_IDX:
+            continue
+
+        pos = int(startStr) - 1
+        arrays[NUC_IDX[alt]][pos] = float(scoreStr)
+
+    if lastChrom is not None and arrays is not None:
+        writeChrom(lastChrom, arrays, outFhs)
+
+    for f in outFhs.values():
+        f.close()
+
+
+if __name__ == "__main__":
+    main()