a56d88e05670b759ff3b32829542537ccc790c57 lrnassar Tue Apr 28 19:18:20 2026 -0700 Address CR feedback on insight + tp53 hub scripts. refs #37418 Drop duplicated bash() wrappers in favor of subprocess.run / check_output with list args, eliminating shell=True, embedded-quote concerns, and stderr-into-stdout merging. Centralize common operations as run_sort_bed/run_liftOver in tp53FuncLib alongside existing run_bedToBigBed. Switch HTML escaping to stdlib html.escape() consistently. insightHCIPriors mouseover (previously unescaped) now escapes HGVS fields, addressing the specific c.123A>G case Jonathan flagged. Replace invalid
tags with
across all five affected mouseover sites. diff --git src/hg/makeDb/scripts/tp53/tp53VCEPClinVar.py src/hg/makeDb/scripts/tp53/tp53VCEPClinVar.py index bd966993935..75fc4276adf 100644 --- src/hg/makeDb/scripts/tp53/tp53VCEPClinVar.py +++ src/hg/makeDb/scripts/tp53/tp53VCEPClinVar.py @@ -3,30 +3,31 @@ TP53 VCEP Curated Variants track generator. Fetches TP53 VCEP classifications from the ClinGen Evidence Repository (EvRepo). EvRepo is the authoritative source for VCEP classifications (176 TP53 variants as of 2026-04-17) and is used here instead of ClinVar esearch because ClinVar's NCBI esearch index silently drops legacy VCV records like VCV 12374 / R175H. Emits bigBed 9+9 with the per-variant list of applied evidence codes plus the overall classification (P / LP / VUS / LB / B). hg38 and hg19 coordinates parsed directly from the HGVS list; liftOver fallback where absent. """ import argparse +import html import json import os import re import subprocess import sys import tempfile import time import urllib.parse import urllib.request EVREPO_URL = ("https://erepo.genome.network/evrepo/api/classifications" "?gene=TP53&matchLimit=2000&format=json") CLINVAR_EFETCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" # HGVS genomic regex for hg38 (NC_000017.11) and hg19 (NC_000017.10) @@ -76,45 +77,37 @@ string hgvsp; "p. notation if available" string varId; "ClinVar variation ID" string caid; "ClinGen Canonical Allele ID" string publishedDate; "Date published to EvRepo" string metCodes; "Semicolon-separated list of Met evidence codes" string notMetCodes; "Count / summary of Not Met codes" lstring _mouseOver; "HTML mouseover" ) """ def log(msg): print(msg, file=sys.stderr) -def bash(cmd): - result = subprocess.run(cmd, shell=True, capture_output=True, text=True) - if result.returncode != 0: - raise RuntimeError("Command failed: {}\n{}".format(cmd, result.stderr)) - return result.stdout - - def escape_html(s): if not s: return "" - # First escape HTML special chars, then encode any remaining non-ASCII - # as numeric entities so UCSC hgTracks' mouseover pipeline renders - # cleanly (raw UTF-8 ends up as mojibake like 'â€"'). - escaped = (str(s).replace('&', '&').replace('<', '<') - .replace('>', '>').replace('"', '"')) + # HTML-escape, then encode non-ASCII as numeric entities so UCSC + # hgTracks' mouseover pipeline renders cleanly (raw UTF-8 ends up as + # mojibake like 'â€"'). + escaped = html.escape(str(s)) return "".join( ch if ord(ch) < 128 else "&#{};".format(ord(ch)) for ch in escaped ) def fetch_evrepo(): """Download the full TP53 VCEP classification list from EvRepo.""" log("Fetching TP53 VCEP classifications from EvRepo...") req = urllib.request.Request(EVREPO_URL, headers={'User-Agent': 'UCSC-kent/TP53-hub'}) with urllib.request.urlopen(req, timeout=60) as resp: data = json.loads(resp.read()) entries = data.get('variantInterpretations', []) log(" Found {} TP53 VCEP entries".format(len(entries))) @@ -224,31 +217,33 @@ 'met_codes': met, 'not_met_count': not_met_count, } def liftover_coords(coords, chain): if not coords: return {} with tempfile.NamedTemporaryFile(mode='w', suffix='.bed', delete=False) as f: in_bed = f.name for vid, (chrom, s, e) in coords.items(): f.write("{}\t{}\t{}\t{}\n".format(chrom, s, e, vid)) out_bed = in_bed.replace('.bed', '.lifted.bed') un_bed = in_bed.replace('.bed', '.unmapped.bed') try: - bash("liftOver {} {} {} {} 2>/dev/null".format(in_bed, chain, out_bed, un_bed)) + subprocess.run( + ["liftOver", in_bed, chain, out_bed, un_bed], + check=True, stderr=subprocess.DEVNULL) except Exception: pass lifted = {} if os.path.exists(out_bed): with open(out_bed) as f: for line in f: flds = line.strip().split('\t') if len(flds) >= 4: lifted[flds[3]] = (flds[0], int(flds[1]), int(flds[2])) for p in [in_bed, out_bed, un_bed]: if os.path.exists(p): os.remove(p) return lifted @@ -332,35 +327,37 @@ str(v['not_met_count']), mo, ])) return lines, unmapped def build_assembly(variants, assembly, outdir): log("\n=== {} ===".format(assembly)) entries, unmapped = create_bed(variants, assembly) log(" mapped: {} unmapped: {}".format(len(entries), len(unmapped))) if not entries: return None bed = os.path.join(outdir, "TP53VCEPCuratedVars_{}.bed".format(assembly)) with open(bed, 'w') as f: f.write("\n".join(entries) + "\n") - bash("sort -k1,1 -k2,2n {0} -o {0}".format(bed)) + subprocess.run(["sort", "-k1,1", "-k2,2n", bed, "-o", bed], check=True) as_file = os.path.join(outdir, "TP53VCEPCuratedVars.as") bb = os.path.join(outdir, "TP53VCEPCuratedVars{}.bb".format(assembly.capitalize())) - bash("bedToBigBed -as={} -type=bed9+9 -tab {} {} {}".format( - as_file, bed, CHROM_SIZES[assembly], bb)) + subprocess.run( + ["bedToBigBed", "-as=" + as_file, "-type=bed9+9", "-tab", + bed, CHROM_SIZES[assembly], bb], + check=True) log(" wrote {}".format(bb)) return bb def write_tsv(variants, path): cols = ['var_id', 'caid', 'display', 'hgvsc', 'hgvsp', 'classification', 'published', 'met_codes', 'not_met_count', 'hg38_start', 'hg38_end', 'hg19_start', 'hg19_end'] with open(path, 'w') as f: f.write("\t".join(cols) + "\n") for v in variants: h38 = v['hg38_bed'] or (None, None) h19 = v['hg19_bed'] or (None, None) row = [ v['var_id'], v['caid'], v['display'], v['hgvsc'] or '',