a56d88e05670b759ff3b32829542537ccc790c57
lrnassar
  Tue Apr 28 19:18:20 2026 -0700
Address CR feedback on insight + tp53 hub scripts. refs #37418

Drop duplicated bash() wrappers in favor of subprocess.run / check_output
with list args, eliminating shell=True, embedded-quote concerns, and
stderr-into-stdout merging. Centralize common operations as
run_sort_bed/run_liftOver in tp53FuncLib alongside existing run_bedToBigBed.

Switch HTML escaping to stdlib html.escape() consistently. insightHCIPriors
mouseover (previously unescaped) now escapes HGVS fields, addressing the
specific c.123A>G case Jonathan flagged. Replace invalid </br> tags with
<br> across all five affected mouseover sites.

diff --git src/hg/makeDb/scripts/insight/buildInsightClinVar.py src/hg/makeDb/scripts/insight/buildInsightClinVar.py
index 7a1d2728f17..a4f4eca2cef 100644
--- src/hg/makeDb/scripts/insight/buildInsightClinVar.py
+++ src/hg/makeDb/scripts/insight/buildInsightClinVar.py
@@ -18,30 +18,31 @@
     python3 buildInsightClinVar.py [--output-dir DIR]
 
 Output files:
     - insight_clinvar_variants.tsv: Combined variant data from ClinVar
     - insightClinVar.as: AutoSQL schema file
     - insightClinVar_hg19.bed: BED file for hg19
     - insightClinVar_hg38.bed: BED file for hg38
     - insightClinVarHg19.bb: bigBed file for hg19
     - insightClinVarHg38.bb: bigBed file for hg38
 
 Author: UCSC Genome Browser Group
 Date: 2026
 """
 
 import argparse
+import html
 import os
 import subprocess
 import sys
 import tempfile
 import time
 import urllib.request
 import xml.etree.ElementTree as ET
 
 # ============================================================================
 # Configuration
 # ============================================================================
 
 # Genes to fetch from ClinVar (Lynch syndrome MMR genes)
 GENES = ["MLH1", "MSH2", "MSH6", "PMS2"]
 
@@ -98,48 +99,30 @@
    string dateEvaluated;   "Date of classification"
    lstring comment;        "InSiGHT submitter comment"
    lstring _mouseOver;     "HTML mouseover text"
    )
 """
 
 # ============================================================================
 # Utility Functions
 # ============================================================================
 
 def log(msg):
     """Print log message to stderr"""
     print(msg, file=sys.stderr)
 
 
-def bash(cmd):
-    """Run a bash command and return output"""
-    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
-    if result.returncode != 0:
-        raise RuntimeError(f"Command failed: {cmd}\n{result.stderr}")
-    return result.stdout
-
-
-def escape_html(text):
-    """Escape special characters for HTML"""
-    if not text:
-        return ""
-    return (str(text).replace('&', '&amp;')
-                     .replace('<', '&lt;')
-                     .replace('>', '&gt;')
-                     .replace('"', '&quot;'))
-
-
 def fetch_url(url, max_retries=3):
     """Fetch URL with retries"""
     for attempt in range(max_retries):
         try:
             req = urllib.request.Request(url)
             with urllib.request.urlopen(req, timeout=120) as response:
                 return response.read().decode('utf-8')
         except Exception as e:
             if attempt < max_retries - 1:
                 log(f"    Retry {attempt + 1} after error: {e}")
                 time.sleep(2)
             else:
                 raise
 
 
@@ -317,31 +300,33 @@
     Returns:
         dict mapping id to (chrom, start, end) in target assembly
     """
     if not coords:
         return {}
 
     with tempfile.NamedTemporaryFile(mode='w', suffix='.bed', delete=False) as f:
         input_bed = f.name
         for var_id, (chrom, start, end) in coords.items():
             f.write(f"{chrom}\t{start}\t{end}\t{var_id}\n")
 
     output_bed = input_bed.replace('.bed', '.lifted.bed')
     unmapped_bed = input_bed.replace('.bed', '.unmapped.bed')
 
     try:
-        bash(f"liftOver {input_bed} {chain_file} {output_bed} {unmapped_bed} 2>/dev/null")
+        subprocess.run(
+            ["liftOver", input_bed, chain_file, output_bed, unmapped_bed],
+            check=True, stderr=subprocess.DEVNULL)
     except Exception:
         for f in [input_bed, output_bed, unmapped_bed]:
             if os.path.exists(f):
                 os.remove(f)
         return {}
 
     lifted = {}
     if os.path.exists(output_bed):
         with open(output_bed) as f:
             for line in f:
                 fields = line.strip().split('\t')
                 if len(fields) >= 4:
                     lifted[fields[3]] = (fields[0], int(fields[1]), int(fields[2]))
 
     for f in [input_bed, output_bed, unmapped_bed]:
@@ -408,37 +393,37 @@
         else:
             # Missing coordinates
             unmapped.append(v)
             continue
 
         if start is None:
             unmapped.append(v)
             continue
 
         # Get color based on classification
         color = COLORS.get(v['classification'], DEFAULT_COLOR)
 
         # Build mouseover HTML
         clinvar_url = f"https://www.ncbi.nlm.nih.gov/clinvar/variation/{v['var_id']}/"
         mouse_over = (
-            f"<b>Variant:</b> {escape_html(v['name'])}<br>"
+            f"<b>Variant:</b> {html.escape(v['name'])}<br>"
             f"<b>ClinVar ID:</b> <a href=\"{clinvar_url}\" target=\"_blank\">{v['var_id']}</a><br>"
-            f"<b>Classification:</b> {escape_html(v['classification'])}<br>"
-            f"<b>Date evaluated:</b> {escape_html(v['date_evaluated'])}"
+            f"<b>Classification:</b> {html.escape(v['classification'])}<br>"
+            f"<b>Date evaluated:</b> {html.escape(v['date_evaluated'])}"
         )
         if v['comment']:
-            mouse_over += f"<br><b>Comment:</b> {escape_html(v['comment'])}"
+            mouse_over += f"<br><b>Comment:</b> {html.escape(v['comment'])}"
 
         # Truncate name if too long
         name = v['name'] if len(v['name']) <= 200 else v['name'][:197] + "..."
 
         # Review status - use custom text
         review_status = "Reviewed by expert panel InSiGHT"
 
         # Build BED9+7 line
         comment = v['comment'].replace('\t', ' ').replace('\n', ' ')
         bed_fields = [
             chrom,                          # chrom
             str(start),                     # chromStart
             str(end),                       # chromEnd
             name,                           # name
             '0',                            # score
@@ -475,40 +460,43 @@
     log(f"  Mapped: {stats['mapped']} (native: {stats['mapped_native']}, liftOver: {stats['mapped_liftover']})")
     log(f"  Unmapped: {len(unmapped)}")
 
     if not entries:
         log("  No entries to write!")
         return None, None
 
     # Write BED file
     bed_file = os.path.join(output_dir, f"insightClinVar_{assembly}.bed")
     log(f"  Writing BED file: {bed_file}")
     with open(bed_file, 'w') as f:
         f.write('\n'.join(entries) + '\n')
 
     # Sort BED file
     log(f"  Sorting BED file...")
-    bash(f"sort -k1,1 -k2,2n {bed_file} -o {bed_file}")
+    subprocess.run(["sort", "-k1,1", "-k2,2n", bed_file, "-o", bed_file], check=True)
 
     # Create bigBed
     as_file = os.path.join(output_dir, "insightClinVar.as")
     bb_file = os.path.join(output_dir, f"insightClinVar{assembly.capitalize()}.bb")
     chrom_sizes = CHROM_SIZES[assembly]
 
     log(f"  Creating bigBed file: {bb_file}")
     try:
-        bash(f"bedToBigBed -as={as_file} -type=bed9+7 -tab {bed_file} {chrom_sizes} {bb_file}")
+        subprocess.run(
+            ["bedToBigBed", "-as=" + as_file, "-type=bed9+7", "-tab",
+             bed_file, chrom_sizes, bb_file],
+            check=True)
         log(f"  Successfully created: {bb_file}")
     except Exception as e:
         log(f"  ERROR creating bigBed: {e}")
         bb_file = None
 
     return bed_file, bb_file
 
 
 # ============================================================================
 # Main Pipeline
 # ============================================================================
 
 def main():
     parser = argparse.ArgumentParser(
         description='Build InSiGHT ClinVar VCEP variants bigBed tracks'