src/hg/makeDb/scripts/tp53/tp53FuncLib.py a56d88e05670b759ff3b32829542537ccc790c57

a56d88e05670b759ff3b32829542537ccc790c57
lrnassar
  Tue Apr 28 19:18:20 2026 -0700
Address CR feedback on insight + tp53 hub scripts. refs #37418

Drop duplicated bash() wrappers in favor of subprocess.run / check_output
with list args, eliminating shell=True, embedded-quote concerns, and
stderr-into-stdout merging. Centralize common operations as
run_sort_bed/run_liftOver in tp53FuncLib alongside existing run_bedToBigBed.

Switch HTML escaping to stdlib html.escape() consistently. insightHCIPriors
mouseover (previously unescaped) now escapes HGVS fields, addressing the
specific c.123A>G case Jonathan flagged. Replace invalid </br> tags with
<br> across all five affected mouseover sites.

diff --git src/hg/makeDb/scripts/tp53/tp53FuncLib.py src/hg/makeDb/scripts/tp53/tp53FuncLib.py
index e3b5da12edb..3f50583642f 100644
--- src/hg/makeDb/scripts/tp53/tp53FuncLib.py
+++ src/hg/makeDb/scripts/tp53/tp53FuncLib.py
@@ -4,53 +4,50 @@
 All TP53 tracks use the same canonical transcript (NM_000546.6, chr17 minus
 strand, 393 aa) and share the amino-acid-to-genomic mapping logic. Centralizing
 it here keeps the minus-strand handling consistent across tracks, avoiding the
 off-by-one class of bugs that bit InSiGHT's PMS2 before being fixed.
 
 Canonical transcript: NM_000546.6 / NP_000537.3 (MANE Select)
 """
 
 import subprocess
 
 TRANSCRIPT = "NM_000546.6"
 PROTEIN = "NP_000537.3"
 GENE = "TP53"
 
 
-def bash(cmd):
-    """Run cmd in bash subprocess, return stdout; raise on non-zero exit."""
-    try:
-        out = subprocess.run(
-            cmd, check=True, shell=True,
-            stdout=subprocess.PIPE, universal_newlines=True,
-            stderr=subprocess.STDOUT,
-        )
-        return out.stdout
-    except subprocess.CalledProcessError as e:
-        raise RuntimeError(
-            "command '{}' returned error (code {}): {}".format(
-                e.cmd, e.returncode, e.output)
-        )
+def run_sort_bed(bed):
+    """Sort a BED file in place by chrom, then start."""
+    subprocess.run(["sort", "-k1,1", "-k2,2n", bed, "-o", bed], check=True)
+
+
+def run_liftOver(in_bed, chain, out_bed, unmapped):
+    """Wrap liftOver."""
+    subprocess.run(
+        ["liftOver", in_bed, chain, out_bed, unmapped],
+        check=True)
 
 
 def get_transcript_info(db, accession=TRANSCRIPT):
     """Query hgsql ncbiRefSeq and return a dict with tx/cds/exon info."""
     query = (
         "SELECT name, chrom, strand, txStart, txEnd, cdsStart, cdsEnd, "
         "exonStarts, exonEnds FROM ncbiRefSeq WHERE name='{}'"
     ).format(accession)
-    result = bash('hgsql {} -Ne "{}"'.format(db, query))
+    result = subprocess.check_output(
+        ["hgsql", db, "-Ne", query], text=True)
     if not result.strip():
         raise ValueError("Transcript {} not found in {}.ncbiRefSeq".format(accession, db))
     fields = result.strip().split('\t')
     exon_starts = [int(x) for x in fields[7].rstrip(',').split(',')]
     exon_ends = [int(x) for x in fields[8].rstrip(',').split(',')]
     return {
         'name': fields[0],
         'chrom': fields[1],
         'strand': fields[2],
         'txStart': int(fields[3]),
         'txEnd': int(fields[4]),
         'cdsStart': int(fields[5]),
         'cdsEnd': int(fields[6]),
         'exonStarts': exon_starts,
         'exonEnds': exon_ends,
@@ -139,32 +136,34 @@
         # intron the two segments should already be ordered correctly.
     if within >= len(nts):
         return None
     return nts[within]
 
 
 def write_autosql(path, content):
     with open(path, 'w') as f:
         f.write(content)
         if not content.endswith('\n'):
             f.write('\n')
 
 
 def run_bedToBigBed(bed, as_file, bb, chrom_sizes, bed_type):
     """Invoke bedToBigBed with the given type (e.g., 'bed9+5')."""
-    bash("bedToBigBed -as={} -type={} -tab {} {} {}".format(
-        as_file, bed_type, bed, chrom_sizes, bb))
+    subprocess.run(
+        ["bedToBigBed", "-as=" + as_file, "-type=" + bed_type, "-tab",
+         bed, chrom_sizes, bb],
+        check=True)
 
 
 def chrom_sizes_path(db):
     return "/cluster/data/{}/chrom.sizes".format(db)
 
 
 PER_PAPER_BEDS = {
     'kato':       ("/hive/users/lrnassar/claude/RM37399/functionalAssays/kato/TP53FuncKato_hg38.bed", 10),
     'giacomelli': ("/hive/users/lrnassar/claude/RM37399/functionalAssays/giacomelli/TP53FuncGiacomelli_hg38.bed", 10),
     'kawaguchi':  ("/hive/users/lrnassar/claude/RM37399/functionalAssays/kawaguchi/TP53FuncKawaguchi_hg38.bed", 9),
     'funk':       ("/hive/users/lrnassar/claude/RM37399/functionalAssays/funk/TP53FuncFunk_hg38.bed", 10),
 }
 
 
 def load_per_paper_raw_scores():