de2ccf6d827865f11d3c8edd9ceeb1b6394a7380
lrnassar
Tue Apr 21 18:22:59 2026 -0700
PrimateAI-3D: label items by nucleotide change, add aaChange field and HTML mouseover.
Variant analysts typically work at the nucleotide level, and the current
item label (amino acid change) collapses distinguishable variants: ~17%
of items share their (chrom, pos, AA-change) tuple with another item
because of codon degeneracy (e.g. three C>A, C>G, C>T at the same
position can all appear as "M>I"). Labeling by nucleotide change makes
every item uniquely distinguishable (0.0% collisions on hg38, 0.1% on
hg19 from overlapping transcripts).
- primateAi.as: field 4 (name) is now "Nucleotide change (e.g. T>C)";
new field aaChange (placed before ref/alt) holds the amino acid
change.
- primateAiToBigBed.py: write name = "{ref}>{alt}", new aaChange column,
and an HTML mouseover with terse labels (Var/AA/Score/Perc/Pred) and
a colored prediction string.
- primateAi.ra: add labelFields name,aaChange and defaultLabelFields
name so users can toggle the on-feature label between nt change
(default) and AA change.
- primateAi.html: expand Display Conventions with the label-convention
rationale and a legend for each mouseover field.
refs #37274
diff --git src/hg/makeDb/scripts/primateai/primateAiToBigBed.py src/hg/makeDb/scripts/primateai/primateAiToBigBed.py
index 7e7e15d3229..bac429bff64 100644
--- src/hg/makeDb/scripts/primateai/primateAiToBigBed.py
+++ src/hg/makeDb/scripts/primateai/primateAiToBigBed.py
@@ -28,34 +28,41 @@
gene = fields[4]
refAa = fields[6]
altAa = fields[7]
scorePai = float(fields[8])
percentile = float(fields[9])
# hg19 source has some rows missing the refseq column (11 fields instead of 12)
if len(fields) >= 12:
refSeq = fields[10]
prediction = fields[11]
else:
refSeq = ""
prediction = fields[10]
chromStart = pos - 1 # convert to 0-based
chromEnd = pos
- name = f"{refAa}>{altAa}"
+ name = f"{ref}>{alt}"
+ aaChange = f"{refAa}>{altAa}"
score = int(round(percentile * 1000))
rgb = "200,0,0" if prediction == "pathogenic" else "0,0,200"
+ color = "#c80000" if prediction == "pathogenic" else "#0000c8"
- mouseOver = f"{ref}>{alt} {name} score={scorePai:.3f} pct={percentile:.3f} ({prediction})"
+ mouseOver = (f"Var: {ref}>{alt}
"
+ f"AA: {aaChange}
"
+ f"Score: {scorePai:.3f}
"
+ f"Perc: {percentile:.3f}
"
+ f"Pred: {prediction}")
out.write(f"{chrom}\t{chromStart}\t{chromEnd}\t{name}\t{score}\t"
f".\t{chromStart}\t{chromEnd}\t{rgb}\t"
- f"{ref}\t{alt}\t{gene}\t{refSeq}\t{scorePai:.3f}\t{percentile:.3f}\t"
+ f"{aaChange}\t{ref}\t{alt}\t{gene}\t{refSeq}\t"
+ f"{scorePai:.3f}\t{percentile:.3f}\t"
f"{prediction}\t{mouseOver}\n")
count += 1
if count % 10000000 == 0:
print(f" {count} variants processed...", file=sys.stderr)
print(f" {count} variants written to {outPath}", file=sys.stderr)
print("Done. Now sort and run bedToBigBed.", file=sys.stderr)
if __name__ == "__main__":
main()