a56d88e05670b759ff3b32829542537ccc790c57
lrnassar
Tue Apr 28 19:18:20 2026 -0700
Address CR feedback on insight + tp53 hub scripts. refs #37418
Drop duplicated bash() wrappers in favor of subprocess.run / check_output
with list args, eliminating shell=True, embedded-quote concerns, and
stderr-into-stdout merging. Centralize common operations as
run_sort_bed/run_liftOver in tp53FuncLib alongside existing run_bedToBigBed.
Switch HTML escaping to stdlib html.escape() consistently. insightHCIPriors
mouseover (previously unescaped) now escapes HGVS fields, addressing the
specific c.123A>G case Jonathan flagged. Replace invalid tags with
across all five affected mouseover sites.
diff --git src/hg/makeDb/scripts/tp53/tp53CancerHotspots.py src/hg/makeDb/scripts/tp53/tp53CancerHotspots.py
index 09901e7a9f4..202633ed040 100644
--- src/hg/makeDb/scripts/tp53/tp53CancerHotspots.py
+++ src/hg/makeDb/scripts/tp53/tp53CancerHotspots.py
@@ -1,179 +1,179 @@
#!/usr/bin/env python3
"""
TP53 VCEP cancerhotspots.org subtrack generator (PM1 Evidence composite).
Fetches cancerhotspots.org single-residue hotspots, filters to TP53, and
emits a per-AA-change bigBed 9+4 with ACMG PM1 strength assignment:
PM1_Moderate (+2 pts) >=10 somatic occurrences of this exact aa change
PM1_Supporting (+1 pt) 2-9 occurrences
Synonymous (WT->WT) and stop-gain (*) variants are excluded; only missense.
Live pull with a cached-snapshot fallback.
"""
import argparse
import json
import os
import sys
import urllib.request
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
import tp53FuncLib as lib
DEFAULT_OUTDIR = "/hive/users/lrnassar/claude/RM37399/cancerHotspots"
SNAPSHOT_NAME = "cancerhotspots_single.json"
API_URL = "https://www.cancerhotspots.org/api/hotspots/single"
COLOR_MOD = "230,3,131" # fuchsia — PM1_Moderate
COLOR_SUP = "245,182,207" # light fuchsia — PM1_Supporting
AUTOSQL = """table TP53CancerHotspots
"TP53 somatic hotspots from cancerhotspots.org mapped to ACMG PM1 strength"
(
string chrom; "Reference sequence chromosome or scaffold"
uint chromStart; "Start position in chromosome"
uint chromEnd; "End position in chromosome"
string name; "Amino acid change (e.g. R175H)"
uint score; "Not used, all 0"
char[1] strand; "Not used, all ."
uint thickStart; "Same as chromStart"
uint thickEnd; "Same as chromEnd"
uint reserved; "RGB color"
string acmgCode; "PM1_Moderate or PM1_Supporting"
string points; "Tavtigian points contribution"
uint somaticCount; "Somatic occurrence count (cancerhotspots.org)"
lstring _mouseOver; "HTML mouseover"
)
"""
def fetch_or_load(outdir):
path = os.path.join(outdir, SNAPSHOT_NAME)
try:
req = urllib.request.Request(API_URL, headers={'User-Agent': 'UCSC-kent/TP53-hub'})
with urllib.request.urlopen(req, timeout=30) as resp:
body = resp.read()
data = json.loads(body)
# Write snapshot on success
with open(path, 'wb') as f:
f.write(body)
print(" fetched {} records from cancerhotspots.org".format(len(data)))
return data
except Exception as e:
if os.path.exists(path):
print(" WARNING: live fetch failed ({}); falling back to snapshot {}".format(e, path))
with open(path) as f:
return json.load(f)
raise RuntimeError("cancerhotspots fetch failed and no snapshot: {}".format(e))
def records_for_tp53(data):
out = []
for h in data:
if h.get('hugoSymbol') != 'TP53':
continue
res = h.get('residue', '')
if not res:
continue
# residue is like "R175"; first char = WT aa, rest = codon position.
# cancerhotspots.org also reports splice-site somatic hotspots where
# residue starts with 'X' and variants contain 'sp'. Those are NOT
# missense and must not be classified PM1 (CSpec §PM1 is missense only).
wt = res[0]
if wt == 'X' or not wt.isalpha():
continue
try:
codon = int(res[1:])
except ValueError:
continue
variants = h.get('variantAminoAcid') or {}
for alt, count in variants.items():
if alt == wt: # synonymous — not missense
continue
if alt in ('*', 'X'): # stop-gain — handled by PVS1, not PM1
continue
if not alt.isalpha() or len(alt) != 1:
# splice-site codes ("sp", "fs", etc.) are not missense
continue
if count is None or count < 2:
continue
if count >= 10:
acmg, points, color = "PM1_Moderate", "+2", COLOR_MOD
else:
acmg, points, color = "PM1_Supporting", "+1", COLOR_SUP
out.append({
'wt': wt,
'codon': codon,
'alt': alt,
'count': count,
'acmg': acmg,
'points': points,
'color': color,
'name': "{}{}{}".format(wt, codon, alt),
})
return out
def mouseover(rec):
return (
"{acmg} ({points} pts)"
"
Variant: {name} (TP53 NP_000537.3)"
"
Somatic occurrences (cancerhotspots.org): {count}"
"
Codon: {codon}"
"
Source: cancerhotspots.org — PM1 per CSpec GN009 v2.4.0"
).format(**rec)
def generate_bed(records, tx):
lines = []
chrom = tx['chrom']
for rec in records:
segs = lib.aa_codon_genomic(rec['codon'], tx)
if not segs:
continue
for g_start, g_end, _ex in segs:
lines.append("\t".join([
chrom, str(g_start), str(g_end),
rec['name'], "0", ".",
str(g_start), str(g_end),
rec['color'],
rec['acmg'], rec['points'], str(rec['count']),
mouseover(rec),
]))
return lines
def build(db, outdir):
print("=== {} ===".format(db))
os.makedirs(outdir, exist_ok=True)
data = fetch_or_load(outdir)
records = records_for_tp53(data)
print(" {} TP53 PM1 records".format(len(records)))
tx = lib.get_transcript_info(db)
bed_lines = generate_bed(records, tx)
print(" {} BED rows".format(len(bed_lines)))
as_file = os.path.join(outdir, "TP53CancerHotspots.as")
lib.write_autosql(as_file, AUTOSQL)
bed = os.path.join(outdir, "TP53CancerHotspots_{}.bed".format(db))
with open(bed, 'w') as f:
f.write("\n".join(bed_lines) + "\n")
- lib.bash("sort -k1,1 -k2,2n {0} -o {0}".format(bed))
+ lib.run_sort_bed(bed)
bb = os.path.join(outdir, "TP53CancerHotspots{}.bb".format(db.capitalize()))
lib.run_bedToBigBed(bed, as_file, bb, lib.chrom_sizes_path(db), "bed9+4")
print(" wrote {}".format(bb))
def main():
p = argparse.ArgumentParser(description=__doc__)
p.add_argument('-o', '--output-dir', default=DEFAULT_OUTDIR)
p.add_argument('--db', action='append', help='hg38 or hg19 (repeat). Default hg38.')
args = p.parse_args()
dbs = args.db if args.db else ['hg38']
for db in dbs:
build(db, args.output_dir)
if __name__ == "__main__":
main()