1525618a814bcf6eb64c4147b99c520509a55d60
max
  Tue May 19 07:37:29 2026 -0700
Refresh mpraVarDB to 2026-05-19 snapshot; fix hg19 pos in name field.

Upstream landed the Mouri/Tewhey pvalue correction (5,092 rows with
pvalue > 1 -> 0) and replaced 47,156 placeholder fdr=1.0 values with
NaN. Rebuild from the new CSV; remove the temporary "pending upstream
fix" note in mpraVarDb.html.

Also closes the pre-existing "hg19 pos in non-rs name field" issue:
mpravardbToBed.py gained a post-liftOver step that rewrites the
chr:pos prefix inside non-rs names with the hg38 coordinates. 47,160
names were rewritten in this build (e.g. chr1:1403972:C>CG at hg38
chr1:1468591 now reads chr1:1468592:C>CG).

itemCount preserved at 239,028. refs #37359

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

diff --git src/hg/makeDb/scripts/mpravardb/mpravardbToBed.py src/hg/makeDb/scripts/mpravardb/mpravardbToBed.py
index 984367f26cf..f8fb6f9aac9 100644
--- src/hg/makeDb/scripts/mpravardb/mpravardbToBed.py
+++ src/hg/makeDb/scripts/mpravardb/mpravardbToBed.py
@@ -193,49 +193,76 @@
             elif genome == "hg38":
                 f38.write(line)
                 hg38_count += 1
 
     print(f"Wrote {hg19_count} hg19 rows to {hg19_bed}")
     print(f"Wrote {hg38_count} hg38 rows to {hg38_bed}")
 
 def run(cmd):
     """Run a shell command, exit on failure."""
     print(f"  Running: {cmd}")
     result = subprocess.run(cmd, shell=True)
     if result.returncode != 0:
         print(f"ERROR: command failed with exit code {result.returncode}", file=sys.stderr)
         sys.exit(1)
 
+# Non-rs names are built in csv_to_bed() as "chr<X>:<pos>:<ref>><alt>"
+# from the raw CSV pos, which for hg19 rows is the hg19 position. liftOver
+# updates the BED coordinates but leaves the name string alone, so the name
+# carries a stale hg19 pos. Rewrite the chrom:pos prefix from the row's
+# (already hg38-coordinate) col 1 + col 2 so the name agrees with the row's
+# coordinates.
+_NAME_PREFIX_RE = re.compile(r"^chr[^:]+:\d+:")
+
+def fix_lifted_names(lifted_bed):
+    fixed = 0
+    tmp = lifted_bed + ".tmp"
+    with open(lifted_bed) as fin, open(tmp, "w") as fout:
+        for line in fin:
+            cols = line.rstrip("\n").split("\t")
+            chrom, start, name = cols[0], int(cols[1]), cols[3]
+            if _NAME_PREFIX_RE.match(name):
+                new_name = _NAME_PREFIX_RE.sub(f"{chrom}:{start+1}:", name)
+                if new_name != name:
+                    cols[3] = new_name
+                    fixed += 1
+            fout.write("\t".join(cols) + "\n")
+    os.replace(tmp, lifted_bed)
+    print(f"  Rewrote hg19 -> hg38 pos in {fixed} non-rs name fields")
+
 def main():
     hg19_bed = "mpravardb.hg19.bed"
     hg38_bed = "mpravardb.hg38.bed"
     lifted_bed = "mpravardb.hg19lifted.bed"
     unmapped_bed = "mpravardb.unmapped.bed"
     merged_bed = "mpravardb.bed"
     output_bb = "mpravardb.bb"
 
     print("Step 1: Converting CSV to BED format...")
     csv_to_bed(INPUT_CSV, hg19_bed, hg38_bed)
 
     print("\nStep 2: Lifting hg19 coordinates to hg38...")
     run(f"liftOver -bedPlus=9 -tab {hg19_bed} {LIFTOVER_CHAIN} {lifted_bed} {unmapped_bed}")
 
     # Count unmapped
     unmapped = sum(1 for line in open(unmapped_bed) if not line.startswith("#"))
     lifted = sum(1 for _ in open(lifted_bed))
     print(f"  Lifted: {lifted}, Unmapped: {unmapped}")
 
+    # Rewrite chr:pos inside non-rs name fields with the lifted hg38 pos.
+    fix_lifted_names(lifted_bed)
+
     print("\nStep 3: Merging hg38 native + lifted hg19...")
     run(f"cat {hg38_bed} {lifted_bed} > {merged_bed}.tmp")
     run(f"bedSort {merged_bed}.tmp {merged_bed}")
     os.remove(f"{merged_bed}.tmp")
 
     total = sum(1 for _ in open(merged_bed))
     print(f"  Total merged rows: {total}")
 
     print("\nStep 4: Converting to bigBed...")
     run(f"bedToBigBed -type=bed9+ -tab -as={AS_FILE} {merged_bed} {CHROM_SIZES} {output_bb}")
 
     print(f"\nDone. Output: {output_bb}")
     print(f"File size: {os.path.getsize(output_bb) / 1024 / 1024:.1f} MB")
 
 if __name__ == "__main__":