de2ccf6d827865f11d3c8edd9ceeb1b6394a7380
lrnassar
  Tue Apr 21 18:22:59 2026 -0700
PrimateAI-3D: label items by nucleotide change, add aaChange field and HTML mouseover.

Variant analysts typically work at the nucleotide level, and the current
item label (amino acid change) collapses distinguishable variants: ~17%
of items share their (chrom, pos, AA-change) tuple with another item
because of codon degeneracy (e.g. three C>A, C>G, C>T at the same
position can all appear as "M>I"). Labeling by nucleotide change makes
every item uniquely distinguishable (0.0% collisions on hg38, 0.1% on
hg19 from overlapping transcripts).

- primateAi.as: field 4 (name) is now "Nucleotide change (e.g. T>C)";
new field aaChange (placed before ref/alt) holds the amino acid
change.
- primateAiToBigBed.py: write name = "{ref}>{alt}", new aaChange column,
and an HTML mouseover with terse labels (Var/AA/Score/Perc/Pred) and
a colored prediction string.
- primateAi.ra: add labelFields name,aaChange and defaultLabelFields
name so users can toggle the on-feature label between nt change
(default) and AA change.
- primateAi.html: expand Display Conventions with the label-convention
rationale and a legend for each mouseover field.

refs #37274

diff --git src/hg/makeDb/scripts/primateai/primateAiToBigBed.py src/hg/makeDb/scripts/primateai/primateAiToBigBed.py
index 7e7e15d3229..bac429bff64 100644
--- src/hg/makeDb/scripts/primateai/primateAiToBigBed.py
+++ src/hg/makeDb/scripts/primateai/primateAiToBigBed.py
@@ -28,34 +28,41 @@
             gene = fields[4]
             refAa = fields[6]
             altAa = fields[7]
             scorePai = float(fields[8])
             percentile = float(fields[9])
             # hg19 source has some rows missing the refseq column (11 fields instead of 12)
             if len(fields) >= 12:
                 refSeq = fields[10]
                 prediction = fields[11]
             else:
                 refSeq = ""
                 prediction = fields[10]
 
             chromStart = pos - 1  # convert to 0-based
             chromEnd = pos
-            name = f"{refAa}>{altAa}"
+            name = f"{ref}>{alt}"
+            aaChange = f"{refAa}>{altAa}"
             score = int(round(percentile * 1000))
             rgb = "200,0,0" if prediction == "pathogenic" else "0,0,200"
+            color = "#c80000" if prediction == "pathogenic" else "#0000c8"
 
-            mouseOver = f"{ref}>{alt} {name} score={scorePai:.3f} pct={percentile:.3f} ({prediction})"
+            mouseOver = (f"<b>Var</b>: {ref}>{alt}<br>"
+                         f"<b>AA</b>: {aaChange}<br>"
+                         f"<b>Score</b>: {scorePai:.3f}<br>"
+                         f"<b>Perc</b>: {percentile:.3f}<br>"
+                         f"<b>Pred</b>: <span style=\"color:{color}\">{prediction}</span>")
 
             out.write(f"{chrom}\t{chromStart}\t{chromEnd}\t{name}\t{score}\t"
                        f".\t{chromStart}\t{chromEnd}\t{rgb}\t"
-                       f"{ref}\t{alt}\t{gene}\t{refSeq}\t{scorePai:.3f}\t{percentile:.3f}\t"
+                       f"{aaChange}\t{ref}\t{alt}\t{gene}\t{refSeq}\t"
+                       f"{scorePai:.3f}\t{percentile:.3f}\t"
                        f"{prediction}\t{mouseOver}\n")
             count += 1
             if count % 10000000 == 0:
                 print(f"  {count} variants processed...", file=sys.stderr)
 
     print(f"  {count} variants written to {outPath}", file=sys.stderr)
     print("Done. Now sort and run bedToBigBed.", file=sys.stderr)
 
 if __name__ == "__main__":
     main()