6ab7b1dfca6cc123dfe09a9e9f8afdf0c8cbdf31 max Mon Apr 20 06:40:58 2026 -0700 changes after code review, refs #37376 diff --git src/hg/makeDb/doc/hg38/strVar.txt src/hg/makeDb/doc/hg38/strVar.txt index 7c313ad7279..d8d58ccdcd1 100644 --- src/hg/makeDb/doc/hg38/strVar.txt +++ src/hg/makeDb/doc/hg38/strVar.txt @@ -1,86 +1,87 @@ # strVar supertrack build notes # TRExplorer track # Mon Mar 17 2026 (Claude/max) # Source: https://trexplorer.broadinstitute.org # Preprint: https://doi.org/10.1101/2024.10.04.615514 cd /hive/data/genomes/hg38/bed/str/trexplorer # Download the catalog from TRExplorer (5,599,658 loci) # File: TR_catalog.5599658_loci.20260123_034640.tsv.gz # Convert TSV to BED9+ format (colors by motif size, parses allele histograms) python3 ~/kent/src/hg/makeDb/scripts/trexplorer/trexplorerToBed.py \ TR_catalog.5599658_loci.20260123_034640.tsv.gz > trexplorer.bed sort -k1,1 -k2,2n trexplorer.bed > trexplorer.sorted.bed bedToBigBed -type=bed9+ -tab \ -as=$HOME/kent/src/hg/makeDb/scripts/trexplorer/trexplorer.as \ trexplorer.sorted.bed /hive/data/genomes/hg38/chrom.sizes trexplorer.bb # Symlink into /gbdb ln -s /hive/data/genomes/hg38/bed/str/trexplorer/trexplorer.bb /gbdb/hg38/strVar/trexplorer.bb # Clean up intermediate files rm trexplorer.bed trexplorer.sorted.bed ############# # Heterozygosity coloring for all strVar subtracks # Thu Apr 10 2026 (Claude/max) # All four subtracks (webstr, tommoStr, trexplorer, viennaVntr) were recolored # from motif-period-based coloring to expected heterozygosity: # het = 1 - sum(p_i^2), blue (low) -> red (high) # # Color bins: # het < 0.1: dark blue (0,0,180) # 0.1-0.3: medium blue (70,130,230) # 0.3-0.5: light purple (180,130,200) # 0.5-0.7: salmon (230,100,80) # >= 0.7: dark red (180,0,0) # no data: gray (128,128,128) # webstr: het pooled across 5 1000G populations weighted by sample count cd /hive/data/genomes/hg38/bed/str/webstr +# Received the dump file from Melissa Gymrek by email. Alas, webstr has no good download file yet. python3 ~/kent/src/hg/makeDb/scripts/webstr/webstrToBed.py WebSTRDataDumpForMax \ > webstr.bed sort -k1,1 -k2,2n webstr.bed > webstr.sorted.bed bedToBigBed -type=bed9+ -tab \ -as=$HOME/kent/src/hg/makeDb/scripts/webstr/webstr.as \ webstr.sorted.bed /hive/data/genomes/hg38/chrom.sizes webstr.bb rm webstr.bed webstr.sorted.bed # tommoStr: het from allele count histogram (single Japanese population) cd /hive/data/genomes/hg38/bed/str/tommo python3 ~/kent/src/hg/makeDb/scripts/tommoStr/tommoStrToBed.py \ expansion-hunter-61KJPN-panel-export.reheader.vcf.gz > tommoStr.bed sort -k1,1 -k2,2n tommoStr.bed > tommoStr.sorted.bed bedToBigBed -type=bed9+ -tab \ -as=$HOME/kent/src/hg/makeDb/scripts/tommoStr/tommoStr.as \ tommoStr.sorted.bed /hive/data/genomes/hg38/chrom.sizes tommoStr.bb rm tommoStr.bed tommoStr.sorted.bed # trexplorer: het pooled across TenK10K and HPRC256 cohort histograms cd /hive/data/genomes/hg38/bed/str/trexplorer python3 ~/kent/src/hg/makeDb/scripts/trexplorer/trexplorerToBed.py \ TR_catalog.5599658_loci.20260123_034640.tsv.gz > trexplorer.bed sort -k1,1 -k2,2n trexplorer.bed > trexplorer.sorted.bed bedToBigBed -type=bed9+ -tab \ -as=$HOME/kent/src/hg/makeDb/scripts/trexplorer/trexplorer.as \ trexplorer.sorted.bed /hive/data/genomes/hg38/chrom.sizes trexplorer.bb rm trexplorer.bed trexplorer.sorted.bed # viennaVntr: het extracted from multi-sample VCF genotypes (1,019 samples) cd /hive/data/genomes/hg38/bed/str/viennaVntr python3 ~/kent/src/hg/makeDb/scripts/viennaVntr/viennaVntrHet.py \ vamos-multisample.vcf > het.tsv python3 ~/kent/src/hg/makeDb/scripts/viennaVntr/viennaVntrToBed.py \ vamos-summary.tsv het.tsv > viennaVntr.bed sort -k1,1 -k2,2n viennaVntr.bed > viennaVntr.sorted.bed bedClip viennaVntr.sorted.bed /hive/data/genomes/hg38/chrom.sizes viennaVntr.clipped.bed bedToBigBed -type=bed9+ -tab \ -as=$HOME/kent/src/hg/makeDb/scripts/viennaVntr/viennaVntr.as \ viennaVntr.clipped.bed /hive/data/genomes/hg38/chrom.sizes viennaVntr.bb rm viennaVntr.bed viennaVntr.sorted.bed viennaVntr.clipped.bed