687239fe5e1d9407369eb985448dc1f844b261b0 gperez2 Tue Nov 5 11:24:23 2024 -0800 Moving the programaticGIAB.txt text to the problematic.txt makedoc, refs #32715 diff --git src/hg/makeDb/doc/hg38/problematic.txt src/hg/makeDb/doc/hg38/problematic.txt index be4a6c6..a655cfd 100644 --- src/hg/makeDb/doc/hg38/problematic.txt +++ src/hg/makeDb/doc/hg38/problematic.txt @@ -61,15 +61,66 @@ ... # so turn those into bigBeds and link everything to /gbdb for f in $(ls *.bed); do base=$(basename -s .bed $f); bedSort ${f} ${f}.sorted; bedToBigBed ${f}.sorted../../../chrom.sizes ${base}.bb; done # for some reason tabix doesn't like to be run in a for loop from ls, needs find for f in $(find . -name "*.vcf.gz"); do tabix --verbosity 3 -p vcf "${f}"; done mkdir -p /gbdb/hg38/problematic/highRepro ln -s `pwd`/*.bb /gbdb/hg38/problematic/highRepro/ ln -s `pwd`/*.vcf.gz* /gbdb/hg38/problematic/highRepro/ # turns the beds into one single bed with the overlapped regions: for f in $(ls *.bed); do echo $f; n=${f/.bed/}; tawk -v name=${n} '{print $0,name}' $f > ${n}.bed4; done cat *.bed4 | sort -k1,1 -k2,2n > highRepro.allRegions bedOverlapMerge highRepro.allRegions | tawk '{print $0,0,".",$2,$2,"0,0,0",gensub(/\//, ",", "g",$4)}' > highRepro.merged bedToBigBed -type=bed9+1 -tab -as=highRepro.as highRepro.merged ../../../chrom.sizes highRepro.bb + +############################################################################# +# problematicGIAB: Difficult regions from GIAB via NCBI - Megna/Gerardo +# Redmine #34253 + +# Megna workflow for v3.3: +cp /cluster/home/mchalama/public_html/tracks/CoLoRSdb/genomes.txt /cluster/home/mchalama/public_html/tracks/difficult +cp /cluster/home/mchalama/public_html/tracks/CoLoRSdb/hub.txt /cluster/home/mchalama/public_html/tracks/difficult +cp /cluster/home/mchalama/public_html/tracks/CoLoRSdb/hg38/CoLoRSdb.html /cluster/home/mchalama/public_html/tracks/difficult +cp /cluster/home/mchalama/public_html/tracks/CoLoRSdb/hg38/trackDb.txt /cluster/home/mchalama/public_html/tracks/difficult +mkdir hg38 +mv /cluster/home/mchalama/public_html/tracks/difficult/CoLoRSdb.html /cluster/home/mchalama/public_html/tracks/difficult/hg38 +mv /cluster/home/mchalama/public_html/tracks/difficult/trackDb.txt /cluster/home/mchalama/public_html/tracks/difficult/hg38 +edit genomes.txt +edit hub.txt; got contact of PI from here: https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/README.md +wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/GRCh38@all/Union/GRCh38_alldifficultregions.bed.gz +wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/GRCh38@all/Union/GRCh38_alllowmapandsegdupregions.bed.gz +wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/GRCh38@all/Union/GRCh38_notinalldifficultregions.bed.gz +wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/GRCh38@all/Union/GRCh38_notinalllowmapandsegdupregions.bed.gz +#edit trackDb.txt to include the 4 datasets +#Bed to BigBed conversion because I realized you can't see beds properly on the hub +bedToBigBed /cluster/home/mchalama/public_html/tracks/difficult/hg38/GRCh38_alldifficultregions.bed.gz /cluster/home/mchalama/public_html/tracks/difficult/hg38.chrom.sizes GRCh38_alldifficultregions.bb +bedToBigBed /cluster/home/mchalama/public_html/tracks/difficult/hg38/GRCh38_alllowmapandsegdupregions.bed.gz /cluster/home/mchalama/public_html/tracks/difficult/hg38.chrom.sizes GRCh38_alllowmapandsegdupregions.bb +bedToBigBed /cluster/home/mchalama/public_html/tracks/difficult/hg38/GRCh38_notinalldifficultregions.bed.gz /cluster/home/mchalama/public_html/tracks/difficult/hg38.chrom.sizes GRCh38_notinalldifficultregions.bb +bedToBigBed /cluster/home/mchalama/public_html/tracks/difficult/hg38/GRCh38_notinalllowmapandsegdupregions.bed.gz /cluster/home/mchalama/public_html/tracks/difficult/hg38.chrom.sizes GRCh38_notinalllowmapandsegdupregions.bb +edit the html file to include information about the track +#open genome browser +#navigate to custom hub and paste the following URL +#https://hgwdev.gi.ucsc.edu/~mchalama/tracks/difficult/hub.txt + + +# Gerardo workflow for v3.5: +# Copied the trackDb.txt into the human/hg38/problematic.ra and copied html description into problematic html +# Downloaded the v3.5 data +cd /hive/data/genomes/hg38/bed/ +mkdir problematic; cd problematic +mkdir GIAB; cd GIAB +wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.5/GRCh38@all/Union/GRCh38_alldifficultregions.bb +wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.5/GRCh38@all/Union/GRCh38_alllowmapandsegdupregions.bb +wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.5/GRCh38@all/Union/GRCh38_notinalldifficultregions.bb +wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.5/GRCh38@all/Union/GRCh38_notinalllowmapandsegdupregions.bb +cd /gbdb/hg38/ +mkdir problematic; cd problematic +mkdir GIAB; cd GIAB +# Made symlinks +ln -s /hive/data/genomes/hg38/bed/problematic/GIAB/alldifficultregions.bb +ln -s /hive/data/genomes/hg38/bed/problematic/GIAB/notinalldifficultregions.bb +ln -s /hive/data/genomes/hg38/bed/problematic/GIAB/alllowmapandsegdupregions.bb +ln -s /hive/data/genomes/hg38/bed/problematic/GIAB/notinalllowmapandsegdupregions.bb +# Updated the bigDataUrl problematic.ra and problematic.html +#############################################################################