de2ccf6d827865f11d3c8edd9ceeb1b6394a7380 lrnassar Tue Apr 21 18:22:59 2026 -0700 PrimateAI-3D: label items by nucleotide change, add aaChange field and HTML mouseover. Variant analysts typically work at the nucleotide level, and the current item label (amino acid change) collapses distinguishable variants: ~17% of items share their (chrom, pos, AA-change) tuple with another item because of codon degeneracy (e.g. three C>A, C>G, C>T at the same position can all appear as "M>I"). Labeling by nucleotide change makes every item uniquely distinguishable (0.0% collisions on hg38, 0.1% on hg19 from overlapping transcripts). - primateAi.as: field 4 (name) is now "Nucleotide change (e.g. T>C)"; new field aaChange (placed before ref/alt) holds the amino acid change. - primateAiToBigBed.py: write name = "{ref}>{alt}", new aaChange column, and an HTML mouseover with terse labels (Var/AA/Score/Perc/Pred) and a colored prediction string. - primateAi.ra: add labelFields name,aaChange and defaultLabelFields name so users can toggle the on-feature label between nt change (default) and AA change. - primateAi.html: expand Display Conventions with the label-convention rationale and a legend for each mouseover field. refs #37274 diff --git src/hg/makeDb/scripts/primateai/primateAiToBigBed.py src/hg/makeDb/scripts/primateai/primateAiToBigBed.py index 7e7e15d3229..bac429bff64 100644 --- src/hg/makeDb/scripts/primateai/primateAiToBigBed.py +++ src/hg/makeDb/scripts/primateai/primateAiToBigBed.py @@ -28,34 +28,41 @@ gene = fields[4] refAa = fields[6] altAa = fields[7] scorePai = float(fields[8]) percentile = float(fields[9]) # hg19 source has some rows missing the refseq column (11 fields instead of 12) if len(fields) >= 12: refSeq = fields[10] prediction = fields[11] else: refSeq = "" prediction = fields[10] chromStart = pos - 1 # convert to 0-based chromEnd = pos - name = f"{refAa}>{altAa}" + name = f"{ref}>{alt}" + aaChange = f"{refAa}>{altAa}" score = int(round(percentile * 1000)) rgb = "200,0,0" if prediction == "pathogenic" else "0,0,200" + color = "#c80000" if prediction == "pathogenic" else "#0000c8" - mouseOver = f"{ref}>{alt} {name} score={scorePai:.3f} pct={percentile:.3f} ({prediction})" + mouseOver = (f"<b>Var</b>: {ref}>{alt}<br>" + f"<b>AA</b>: {aaChange}<br>" + f"<b>Score</b>: {scorePai:.3f}<br>" + f"<b>Perc</b>: {percentile:.3f}<br>" + f"<b>Pred</b>: <span style=\"color:{color}\">{prediction}</span>") out.write(f"{chrom}\t{chromStart}\t{chromEnd}\t{name}\t{score}\t" f".\t{chromStart}\t{chromEnd}\t{rgb}\t" - f"{ref}\t{alt}\t{gene}\t{refSeq}\t{scorePai:.3f}\t{percentile:.3f}\t" + f"{aaChange}\t{ref}\t{alt}\t{gene}\t{refSeq}\t" + f"{scorePai:.3f}\t{percentile:.3f}\t" f"{prediction}\t{mouseOver}\n") count += 1 if count % 10000000 == 0: print(f" {count} variants processed...", file=sys.stderr) print(f" {count} variants written to {outPath}", file=sys.stderr) print("Done. Now sort and run bedToBigBed.", file=sys.stderr) if __name__ == "__main__": main()