526213b2893134217a300ff913e11b4e98d67991 max Mon Apr 20 08:50:10 2026 -0700 lrSv: add cpc1Sv and aprSv pangenome SV subtracks (hg38, hs1) cpc1Sv: 97,205 SVs from the CPC + HPRC Phase 1 pangenome (Gao et al 2023, Nature; PMID 37316654) built on T2T-CHM13v2, with 53 Chinese and 47 HPRC samples. Each graph snarl site is shown as one item with alt alleles classified by length delta (INS/DEL/CPX, 50 bp threshold) and collapsed. aprSv: 103,077 SVs from the Arabic Pangenome Reference (Nassir et al. 2025, Nat Commun; PMID 40707445) built on T2T-CHM13v2 from 53 UAE-resident Arab individuals. Same multi-allele classification as cpc1Sv, with alt alleles iterated within each multi-allelic row. Both tracks load natively on hs1 and are lifted to hg38 with hs1ToHg38.over.chain.gz. refs #36258 diff --git src/hg/makeDb/scripts/lrSv/lrSvAprBuild.sh src/hg/makeDb/scripts/lrSv/lrSvAprBuild.sh new file mode 100755 index 00000000000..d09200a5764 --- /dev/null +++ src/hg/makeDb/scripts/lrSv/lrSvAprBuild.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# Build apr lrSv subtrack for hs1 (native) and hg38 (lifted). +# Run from /hive/data/genomes/hg38/bed/lrSv/apr/ + +set -euo pipefail + +SCRIPTS=/cluster/home/max/kent/src/hg/makeDb/scripts/lrSv +AS="$SCRIPTS/lrSvApr.as" +VCF=apr_review_v1_2902_chm13.vcf.gz +HS1_SIZES=/hive/data/genomes/hs1/chrom.sizes +HG38_SIZES=/hive/data/genomes/hg38/chrom.sizes +CHAIN=/gbdb/hs1/liftOver/hs1ToHg38.over.chain.gz + +echo "[$(date +%T)] converting VCF to hs1 bed..." +zcat "$VCF" | python3 "$SCRIPTS/lrSvAprVcfToBed.py" /dev/stdin apr.hs1.bed "$HS1_SIZES" + +echo "[$(date +%T)] sorting hs1 bed..." +bedSort apr.hs1.bed apr.hs1.sorted.bed + +echo "[$(date +%T)] building hs1 bigBed..." +bedToBigBed -type=bed9+ -tab -as="$AS" \ + apr.hs1.sorted.bed "$HS1_SIZES" apr.hs1.bb + +echo "[$(date +%T)] lifting to hg38..." +liftOver -tab -bedPlus=9 apr.hs1.bed "$CHAIN" \ + apr.hg38.bed apr.hs1.unmapped.bed +echo " hg38 mapped: $(wc -l < apr.hg38.bed)" +echo " hg38 unmapped: $(grep -cv '^#' apr.hs1.unmapped.bed || true)" + +echo "[$(date +%T)] sorting hg38 bed..." +bedSort apr.hg38.bed apr.hg38.sorted.bed + +echo "[$(date +%T)] building hg38 bigBed..." +bedToBigBed -type=bed9+ -tab -as="$AS" \ + apr.hg38.sorted.bed "$HG38_SIZES" apr.hg38.bb + +ls -lh apr.hs1.bb apr.hg38.bb +echo "[$(date +%T)] done."