a56d88e05670b759ff3b32829542537ccc790c57
lrnassar
  Tue Apr 28 19:18:20 2026 -0700
Address CR feedback on insight + tp53 hub scripts. refs #37418

Drop duplicated bash() wrappers in favor of subprocess.run / check_output
with list args, eliminating shell=True, embedded-quote concerns, and
stderr-into-stdout merging. Centralize common operations as
run_sort_bed/run_liftOver in tp53FuncLib alongside existing run_bedToBigBed.

Switch HTML escaping to stdlib html.escape() consistently. insightHCIPriors
mouseover (previously unescaped) now escapes HGVS fields, addressing the
specific c.123A>G case Jonathan flagged. Replace invalid </br> tags with
<br> across all five affected mouseover sites.

diff --git src/hg/makeDb/scripts/tp53/tp53ClinDomains.py src/hg/makeDb/scripts/tp53/tp53ClinDomains.py
index bb5622540bd..642a8da7c39 100644
--- src/hg/makeDb/scripts/tp53/tp53ClinDomains.py
+++ src/hg/makeDb/scripts/tp53/tp53ClinDomains.py
@@ -1,159 +1,159 @@
 #!/usr/bin/env python3
 """
 TP53 VCEP Clinical Domains track generator.
 
 Builds bigBed 9+5 for the seven clinically relevant TP53 protein domains
 defined in ClinGen CSpec GN009 v2.4.0 (PM1), plus the six PM1_Moderate
 hotspot codons (R175, G245, R248, R249, R273, R282) overlaid on the DBD.
 
 Transcript: NM_000546.6 / NP_000537.3 (MANE Select), 393 aa, chr17 minus strand.
 PM1 is applicable for TP53 (unlike the MMR genes in the InSiGHT hub).
 """
 
 import os
 import sys
 
 sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 import tp53FuncLib as lib
 
 DEFAULT_OUTDIR = "/hive/users/lrnassar/claude/RM37399/clinDomains"
 
 # Domains from CSpec GN009 v2.4.0 &#167;PM1 (aa ranges on NP_000537.3)
 DOMAINS = [
     ("TAD1",  17,  25,  "Transactivation domain 1"),
     ("TAD2",  48,  56,  "Transactivation domain 2"),
     ("PRR",   64,  92,  "Proline-rich region"),
     ("DBD",   100, 292, "DNA binding domain"),
     ("Hinge", 293, 324, "Hinge"),
     ("OD",    325, 356, "Oligomerization (tetramerization) domain"),
     ("CTD",   368, 387, "C-terminal (basic / regulatory) domain"),
 ]
 
 # PM1_Moderate hotspot codons. Tavtigian +2 points each.
 HOTSPOT_CODONS = [
     (175, "R175", "DNA-contact / structural hotspot"),
     (245, "G245", "structural hotspot"),
     (248, "R248", "DNA-contact hotspot"),
     (249, "R249", "structural hotspot"),
     (273, "R273", "DNA-contact hotspot"),
     (282, "R282", "DNA-contact hotspot"),
 ]
 
 DOMAIN_COLOR = "138,111,158"    # purple
 HOTSPOT_COLOR = "230,3,131"     # fuchsia
 
 AUTOSQL = """table TP53clinDomains
 "TP53 VCEP clinically relevant protein domains and PM1_Moderate hotspot codons (NM_000546.6)"
    (
    string chrom;        "Reference sequence chromosome or scaffold"
    uint   chromStart;   "Start position in chromosome"
    uint   chromEnd;     "End position in chromosome"
    string name;         "Domain name or hotspot residue"
    uint   score;        "Not used, all 0"
    char[1] strand;      "Not used, all ."
    uint   thickStart;   "Same as chromStart"
    uint   thickEnd;     "Same as chromEnd"
    uint   reserved;     "RGB value"
    string domainType;   "Either 'Domain' or 'PM1_Moderate hotspot'"
    string NMaccession;  "Transcript accession (NM_000546.6)"
    string AAlocation;   "Amino acid range"
    string description;  "Description or role"
    lstring _mouseOver;  "HTML mouseover"
    )
 """
 
 
 def domain_mouseover(name, desc, aa_lo, aa_hi):
     return (
         "<b>Domain:</b> {name} ({desc})"
         "<br><b>Gene:</b> TP53"
         "<br><b>Transcript:</b> {tx} (NP_000537.3)"
         "<br><b>Amino acid loc:</b> {lo}-{hi}"
         "<br><b>Note:</b> used in PVS1 decision tree; PM1 hotspot codons "
         "overlaid on DBD contribute +2 pts each"
     ).format(name=name, desc=desc, tx=lib.TRANSCRIPT, lo=aa_lo, hi=aa_hi)
 
 
 def hotspot_mouseover(label, role, codon):
     return (
         "<b>PM1_Moderate hotspot:</b> {label} (+2 pts)"
         "<br><b>Role:</b> {role}"
         "<br><b>Gene:</b> TP53"
         "<br><b>Transcript:</b> {tx} (NP_000537.3)"
         "<br><b>Codon:</b> {codon}"
         "<br><b>ACMG code:</b> PM1_Moderate"
     ).format(label=label, role=role, tx=lib.TRANSCRIPT, codon=codon)
 
 
 def generate_bed(tx):
     lines = []
     chrom = tx['chrom']
     for name, aa_lo, aa_hi, desc in DOMAINS:
         mo = domain_mouseover(name, desc, aa_lo, aa_hi)
         for g_start, g_end, _ex in lib.aa_to_genomic(aa_lo, aa_hi, tx):
             if g_start >= g_end:
                 continue
             lines.append("\t".join([
                 chrom, str(g_start), str(g_end),
                 name, "0", ".",
                 str(g_start), str(g_end),
                 DOMAIN_COLOR,
                 "Domain", lib.TRANSCRIPT,
                 "{}-{}".format(aa_lo, aa_hi),
                 desc,
                 mo,
             ]))
     for codon, label, role in HOTSPOT_CODONS:
         mo = hotspot_mouseover(label, role, codon)
         for g_start, g_end, _ex in lib.aa_codon_genomic(codon, tx):
             if g_start >= g_end:
                 continue
             lines.append("\t".join([
                 chrom, str(g_start), str(g_end),
                 label, "0", ".",
                 str(g_start), str(g_end),
                 HOTSPOT_COLOR,
                 "PM1_Moderate hotspot", lib.TRANSCRIPT,
                 str(codon),
                 role,
                 mo,
             ]))
     return lines
 
 
 def build(db, outdir):
     print("=== {} ===".format(db))
     tx = lib.get_transcript_info(db)
     print("  {} at {}:{}-{} {}".format(
         tx['name'], tx['chrom'], tx['txStart'], tx['txEnd'], tx['strand']))
     bed_lines = generate_bed(tx)
     print("  {} BED rows".format(len(bed_lines)))
 
     os.makedirs(outdir, exist_ok=True)
     as_file = os.path.join(outdir, "TP53clinDomains.as")
     lib.write_autosql(as_file, AUTOSQL)
 
     bed = os.path.join(outdir, "TP53clinDomains_{}.bed".format(db))
     with open(bed, 'w') as f:
         f.write("\n".join(bed_lines) + "\n")
-    lib.bash("sort -k1,1 -k2,2n {0} -o {0}".format(bed))
+    lib.run_sort_bed(bed)
 
     bb = os.path.join(outdir, "TP53clinDomains{}.bb".format(db.capitalize()))
     lib.run_bedToBigBed(bed, as_file, bb, lib.chrom_sizes_path(db), "bed9+5")
     print("  wrote {}".format(bb))
 
 
 def main():
     import argparse
     p = argparse.ArgumentParser(description=__doc__)
     p.add_argument('-o', '--output-dir', default=DEFAULT_OUTDIR)
     p.add_argument('--db', action='append',
                    help='Assembly db (hg38 or hg19); repeat for both. Default: hg38 only.')
     args = p.parse_args()
     dbs = args.db if args.db else ['hg38']
     for db in dbs:
         build(db, args.output_dir)
 
 
 if __name__ == "__main__":
     main()