e5697049b1e077237d1f83c8293e23d62ded0820 braney Tue May 12 11:40:26 2026 -0700 quickLiftBench: Mode C reference hub + hub-variant case schema, refs #37445 Adds a synthetic 4-track hub (bigBed native + lifted, bigWig native + lifted) at utils/qa/quickLiftBench/testHub/, regenerated via buildTestHub.sh, and extends the bench schema so a variant can be a mapping (hubUrl + db + position + tracks) in addition to the existing saved-session string. The two new cases mode_c_hs1_bb and mode_c_hs1_bw exercise quickLift on hs1 against an hg38-sourced chain, with both variants rendering at the same hs1 coords so the only difference is whether the chain-remap step runs. First numbers (n=10, hgwdev, chr22:15M-50M, 5000 source features / 34000 bedGraph bins): case native total lifted total parallel-fetch delta mode_c_hs1_bb 1376 ms 1154 ms +900 ms (chain remap) mode_c_hs1_bw 26 ms 27 ms negligible bigBed scales per-feature (~180 us/feature for 5000 hg38 features remapped); bigWig is essentially free. Also moves the regress_quickLift_parallel case from server: sandbox to server: hgwdev now that #37470 has landed on dev. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> diff --git src/utils/qa/quickLiftBench/testHub/buildTestHub.sh src/utils/qa/quickLiftBench/testHub/buildTestHub.sh new file mode 100755 index 00000000000..82f4177f1ff --- /dev/null +++ src/utils/qa/quickLiftBench/testHub/buildTestHub.sh @@ -0,0 +1,156 @@ +#!/bin/bash +# buildTestHub.sh - regenerate the Mode C reference hub data. +# +# Builds a self-contained hs1 hub with two pairs of stanzas: +# * modeC_bb_native / modeC_bb_lifted -- bigBed12, N features in chr22. +# * modeC_bw_native / modeC_bw_lifted -- bigWig, one signal value per BW_STEP +# bp across the same chr22 region. +# Each pair shares the same underlying data; the lifted variant points at an +# hg38-coord file plus quickLiftUrl. Drops everything (bb/bw + chain pair + +# hub.txt) into $1 (default ~/public_html/quickLiftBench/testHub). +# +# Refs Redmine #37445. + +set -euo pipefail + +N="${N:-5000}" +SEED="${SEED:-42}" +REGION_START="${REGION_START:-15000000}" +REGION_END="${REGION_END:-50000000}" +BW_STEP="${BW_STEP:-1000}" +DEST="${1:-$HOME/public_html/quickLiftBench/testHub}" + +HG38_CHROM_SIZES=/hive/data/genomes/hg38/chrom.sizes +HS1_CHROM_SIZES=/hive/data/genomes/hs1/chrom.sizes +HG38_TO_HS1_CHAIN=/gbdb/hg38/liftOver/hg38ToHs1.over.chain.gz + +# bigChain pair used by quickLift for hg38 -> hs1 on-the-fly remap. +# Copied into the hub dir so the bench is self-contained on one host. +QUICK_CHAIN_SRC=/hive/data/genomes/hs1/bed/lastz.hg38/axtChain/hs1.hg38.quick.bb +QUICK_CHAIN_LINK_SRC=/hive/data/genomes/hs1/bed/lastz.hg38/axtChain/hs1.hg38.quickLink.bb + +mkdir -p "$DEST" +cd "$DEST" + +python3 - "$N" "$SEED" "$REGION_START" "$REGION_END" > modeC_hg38.bed <<'PY' +import random, sys +N, seed, start_region, end_region = int(sys.argv[1]), int(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4]) +random.seed(seed) +chrom = "chr22" +span = end_region - start_region +# Lay features at regular stride so they don't overlap (~5kb each). +stride = max(span // N, 6000) +for i in range(N): + s = start_region + i * stride + e = s + 5000 + if e > end_region: + break + strand = "+" if i % 2 == 0 else "-" + thickStart, thickEnd = s + 1000, e - 1000 + print("\t".join(map(str, [ + chrom, s, e, f"feat_{i:05d}", 0, strand, + thickStart, thickEnd, "100,100,200", + 3, "500,2000,500,", "0,1500,4500," + ]))) +PY + +liftOver -bedPlus=12 modeC_hg38.bed "$HG38_TO_HS1_CHAIN" modeC_hs1.bed unmapped_hs1.bed + +sort -k1,1 -k2,2n modeC_hg38.bed -o modeC_hg38.bed +sort -k1,1 -k2,2n modeC_hs1.bed -o modeC_hs1.bed + +bedToBigBed -type=bed12 modeC_hg38.bed "$HG38_CHROM_SIZES" modeC_hg38.bb +bedToBigBed -type=bed12 modeC_hs1.bed "$HS1_CHROM_SIZES" modeC_hs1.bb + +# ---- bigWig: BedGraph at BW_STEP bp resolution, lifted by chain ---- +python3 - "$REGION_START" "$REGION_END" "$BW_STEP" <<'PY' > modeC_hg38.bedGraph +import math, sys +s0, e0, step = int(sys.argv[1]), int(sys.argv[2]), int(sys.argv[3]) +chrom = "chr22" +for s in range(s0, e0, step): + e = s + step + # smooth signal in [50, 150]; chrom-position-driven, deterministic + v = 100 + 50 * math.sin((s - s0) / 100000.0) + print(f"{chrom}\t{s}\t{e}\t{v:.4f}") +PY + +# liftOver of bedGraph: keep 4th column (value) as bedPlus extra. +liftOver -bedPlus=3 modeC_hg38.bedGraph "$HG38_TO_HS1_CHAIN" \ + modeC_hs1.bedGraph unmapped_hs1.bedGraph + +# liftOver may emit overlapping intervals when a chain block edge falls +# inside a step; bedGraphToBigWig requires strictly non-overlapping. Sort, +# then drop any line whose start is < the previous line's end (cheap +# defensive trim; loses at most a handful of bins per chain edge). +sort -k1,1 -k2,2n modeC_hg38.bedGraph -o modeC_hg38.bedGraph +sort -k1,1 -k2,2n modeC_hs1.bedGraph | awk 'BEGIN{OFS="\t"; pc=""; pe=-1} {if (!($1==pc && $2<pe)) {print; pc=$1; pe=$3}}' > modeC_hs1.bedGraph.trim +mv modeC_hs1.bedGraph.trim modeC_hs1.bedGraph + +bedGraphToBigWig modeC_hg38.bedGraph "$HG38_CHROM_SIZES" modeC_hg38.bw +bedGraphToBigWig modeC_hs1.bedGraph "$HS1_CHROM_SIZES" modeC_hs1.bw + +# Co-locate the quickLift chain pair so both stanzas fetch from one host. +# quickLift's chain loader constructs the link filename from the .bb name +# by replacing ".bb" with ".link.bb", so the link file MUST be named +# <chain>.link.bb (not <chain>Link.bb). +cp "$QUICK_CHAIN_SRC" hg38ToHs1.quick.bb +cp "$QUICK_CHAIN_LINK_SRC" hg38ToHs1.quick.link.bb + +# hub.txt (useOneFile -- single self-contained file) +cat > hub.txt <<EOF +hub quickLiftBench_modeC +shortLabel quickLiftBench Mode C +longLabel quickLiftBench Mode C: native vs quickLifted view of the same synthetic data +useOneFile on +email braney@ucsc.edu +descriptionUrl https://redmine.gi.ucsc.edu/issues/37445 + +genome hs1 + +track modeC_bb_native +shortLabel modeC bb native +longLabel Mode C bigBed native: BED12 features in hs1 coords +type bigBed 12 +bigDataUrl modeC_hs1.bb +visibility pack +color 0,128,0 +priority 1 + +track modeC_bb_lifted +shortLabel modeC bb lifted +longLabel Mode C bigBed lifted: same features in hg38 coords + quickLift hg38->hs1 +type bigBed 12 +bigDataUrl modeC_hg38.bb +quickLiftUrl hg38ToHs1.quick.bb +quickLiftDb hg38 +visibility pack +color 0,128,0 +priority 2 + +track modeC_bw_native +shortLabel modeC bw native +longLabel Mode C bigWig native: signal in hs1 coords +type bigWig +bigDataUrl modeC_hs1.bw +visibility full +color 0,128,0 +viewLimits 0:200 +priority 3 + +track modeC_bw_lifted +shortLabel modeC bw lifted +longLabel Mode C bigWig lifted: same signal in hg38 coords + quickLift hg38->hs1 +type bigWig +bigDataUrl modeC_hg38.bw +quickLiftUrl hg38ToHs1.quick.bb +quickLiftDb hg38 +visibility full +color 0,128,0 +viewLimits 0:200 +priority 4 +EOF + +echo "hub built at $DEST" +echo " modeC_hg38.bb: $(wc -l < modeC_hg38.bed) features" +echo " modeC_hs1.bb: $(wc -l < modeC_hs1.bed) features (after liftOver)" +echo " bedGraph bins: hg38=$(wc -l < modeC_hg38.bedGraph) hs1=$(wc -l < modeC_hs1.bedGraph)"