687239fe5e1d9407369eb985448dc1f844b261b0
gperez2
  Tue Nov 5 11:24:23 2024 -0800
Moving the programaticGIAB.txt text to the problematic.txt makedoc,  refs #32715

diff --git src/hg/makeDb/doc/hg38/problematic.txt src/hg/makeDb/doc/hg38/problematic.txt
index be4a6c6..a655cfd 100644
--- src/hg/makeDb/doc/hg38/problematic.txt
+++ src/hg/makeDb/doc/hg38/problematic.txt
@@ -61,15 +61,66 @@
 ...
 
 # so turn those into bigBeds and link everything to /gbdb
 for f in $(ls *.bed); do base=$(basename -s .bed $f); bedSort ${f} ${f}.sorted; bedToBigBed ${f}.sorted../../../chrom.sizes ${base}.bb; done
 # for some reason tabix doesn't like to be run in a for loop from ls, needs find
 for f in $(find . -name "*.vcf.gz"); do tabix --verbosity 3 -p vcf "${f}"; done
 mkdir -p /gbdb/hg38/problematic/highRepro
 ln -s `pwd`/*.bb /gbdb/hg38/problematic/highRepro/
 ln -s `pwd`/*.vcf.gz* /gbdb/hg38/problematic/highRepro/
 
 # turns the beds into one single bed with the overlapped regions:
 for f in $(ls *.bed); do echo $f; n=${f/.bed/}; tawk -v name=${n} '{print $0,name}' $f > ${n}.bed4; done
 cat *.bed4 | sort -k1,1 -k2,2n > highRepro.allRegions
 bedOverlapMerge highRepro.allRegions | tawk '{print $0,0,".",$2,$2,"0,0,0",gensub(/\//, ",", "g",$4)}' > highRepro.merged
 bedToBigBed -type=bed9+1 -tab -as=highRepro.as highRepro.merged ../../../chrom.sizes highRepro.bb
+
+#############################################################################
+# problematicGIAB: Difficult regions from GIAB via NCBI - Megna/Gerardo
+# Redmine #34253
+
+# Megna workflow for v3.3:
+cp /cluster/home/mchalama/public_html/tracks/CoLoRSdb/genomes.txt /cluster/home/mchalama/public_html/tracks/difficult
+cp /cluster/home/mchalama/public_html/tracks/CoLoRSdb/hub.txt /cluster/home/mchalama/public_html/tracks/difficult
+cp /cluster/home/mchalama/public_html/tracks/CoLoRSdb/hg38/CoLoRSdb.html /cluster/home/mchalama/public_html/tracks/difficult
+cp /cluster/home/mchalama/public_html/tracks/CoLoRSdb/hg38/trackDb.txt /cluster/home/mchalama/public_html/tracks/difficult
+mkdir hg38
+mv /cluster/home/mchalama/public_html/tracks/difficult/CoLoRSdb.html /cluster/home/mchalama/public_html/tracks/difficult/hg38
+mv /cluster/home/mchalama/public_html/tracks/difficult/trackDb.txt /cluster/home/mchalama/public_html/tracks/difficult/hg38
+edit genomes.txt
+edit hub.txt; got contact of PI from here: https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/README.md
+wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/GRCh38@all/Union/GRCh38_alldifficultregions.bed.gz
+wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/GRCh38@all/Union/GRCh38_alllowmapandsegdupregions.bed.gz
+wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/GRCh38@all/Union/GRCh38_notinalldifficultregions.bed.gz
+wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/GRCh38@all/Union/GRCh38_notinalllowmapandsegdupregions.bed.gz
+#edit trackDb.txt to include the 4 datasets
+#Bed to BigBed conversion because I realized you can't see beds properly on the hub
+bedToBigBed /cluster/home/mchalama/public_html/tracks/difficult/hg38/GRCh38_alldifficultregions.bed.gz /cluster/home/mchalama/public_html/tracks/difficult/hg38.chrom.sizes GRCh38_alldifficultregions.bb
+bedToBigBed /cluster/home/mchalama/public_html/tracks/difficult/hg38/GRCh38_alllowmapandsegdupregions.bed.gz /cluster/home/mchalama/public_html/tracks/difficult/hg38.chrom.sizes GRCh38_alllowmapandsegdupregions.bb
+bedToBigBed /cluster/home/mchalama/public_html/tracks/difficult/hg38/GRCh38_notinalldifficultregions.bed.gz /cluster/home/mchalama/public_html/tracks/difficult/hg38.chrom.sizes GRCh38_notinalldifficultregions.bb
+bedToBigBed /cluster/home/mchalama/public_html/tracks/difficult/hg38/GRCh38_notinalllowmapandsegdupregions.bed.gz /cluster/home/mchalama/public_html/tracks/difficult/hg38.chrom.sizes GRCh38_notinalllowmapandsegdupregions.bb
+edit the html file to include information about the track
+#open genome browser
+#navigate to custom hub and paste the following URL
+#https://hgwdev.gi.ucsc.edu/~mchalama/tracks/difficult/hub.txt
+
+
+# Gerardo workflow for v3.5:
+# Copied the trackDb.txt into the human/hg38/problematic.ra and copied html description into problematic html
+# Downloaded the v3.5 data
+cd /hive/data/genomes/hg38/bed/
+mkdir problematic; cd problematic
+mkdir GIAB; cd GIAB
+wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.5/GRCh38@all/Union/GRCh38_alldifficultregions.bb
+wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.5/GRCh38@all/Union/GRCh38_alllowmapandsegdupregions.bb
+wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.5/GRCh38@all/Union/GRCh38_notinalldifficultregions.bb
+wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.5/GRCh38@all/Union/GRCh38_notinalllowmapandsegdupregions.bb
+cd /gbdb/hg38/
+mkdir problematic; cd problematic
+mkdir GIAB; cd GIAB
+# Made symlinks
+ln -s /hive/data/genomes/hg38/bed/problematic/GIAB/alldifficultregions.bb
+ln -s /hive/data/genomes/hg38/bed/problematic/GIAB/notinalldifficultregions.bb
+ln -s /hive/data/genomes/hg38/bed/problematic/GIAB/alllowmapandsegdupregions.bb
+ln -s /hive/data/genomes/hg38/bed/problematic/GIAB/notinalllowmapandsegdupregions.bb
+# Updated the bigDataUrl problematic.ra and problematic.html
+#############################################################################