226f4f99c439d7f22200b0f28bf30a8816442e9d
gperez2
  Sun Nov 3 14:21:21 2024 -0800
Adding the makedocs for the difficult regions from GIAB track, refs #32715

diff --git src/hg/makeDb/doc/hg38/problematicGIAB.txt src/hg/makeDb/doc/hg38/problematicGIAB.txt
new file mode 100644
index 0000000..2496204
--- /dev/null
+++ src/hg/makeDb/doc/hg38/problematicGIAB.txt
@@ -0,0 +1,49 @@
+#############################################################################
+# problematicGIAB: Difficult regions from GIAB via NCBI - Megna/Gerardo
+# Redmine #34253
+
+# Megna workflow for v3.3:
+cp /cluster/home/mchalama/public_html/tracks/CoLoRSdb/genomes.txt /cluster/home/mchalama/public_html/tracks/difficult
+cp /cluster/home/mchalama/public_html/tracks/CoLoRSdb/hub.txt /cluster/home/mchalama/public_html/tracks/difficult
+cp /cluster/home/mchalama/public_html/tracks/CoLoRSdb/hg38/CoLoRSdb.html /cluster/home/mchalama/public_html/tracks/difficult
+cp /cluster/home/mchalama/public_html/tracks/CoLoRSdb/hg38/trackDb.txt /cluster/home/mchalama/public_html/tracks/difficult
+mkdir hg38
+mv /cluster/home/mchalama/public_html/tracks/difficult/CoLoRSdb.html /cluster/home/mchalama/public_html/tracks/difficult/hg38
+mv /cluster/home/mchalama/public_html/tracks/difficult/trackDb.txt /cluster/home/mchalama/public_html/tracks/difficult/hg38
+edit genomes.txt
+edit hub.txt; got contact of PI from here: https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/README.md
+wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/GRCh38@all/Union/GRCh38_alldifficultregions.bed.gz
+wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/GRCh38@all/Union/GRCh38_alllowmapandsegdupregions.bed.gz
+wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/GRCh38@all/Union/GRCh38_notinalldifficultregions.bed.gz
+wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/GRCh38@all/Union/GRCh38_notinalllowmapandsegdupregions.bed.gz
+#edit trackDb.txt to include the 4 datasets
+#Bed to BigBed conversion because I realized you can't see beds properly on the hub
+bedToBigBed /cluster/home/mchalama/public_html/tracks/difficult/hg38/GRCh38_alldifficultregions.bed.gz /cluster/home/mchalama/public_html/tracks/difficult/hg38.chrom.sizes GRCh38_alldifficultregions.bb
+bedToBigBed /cluster/home/mchalama/public_html/tracks/difficult/hg38/GRCh38_alllowmapandsegdupregions.bed.gz /cluster/home/mchalama/public_html/tracks/difficult/hg38.chrom.sizes GRCh38_alllowmapandsegdupregions.bb
+bedToBigBed /cluster/home/mchalama/public_html/tracks/difficult/hg38/GRCh38_notinalldifficultregions.bed.gz /cluster/home/mchalama/public_html/tracks/difficult/hg38.chrom.sizes GRCh38_notinalldifficultregions.bb
+bedToBigBed /cluster/home/mchalama/public_html/tracks/difficult/hg38/GRCh38_notinalllowmapandsegdupregions.bed.gz /cluster/home/mchalama/public_html/tracks/difficult/hg38.chrom.sizes GRCh38_notinalllowmapandsegdupregions.bb
+edit the html file to include information about the track
+#open genome browser
+#navigate to custom hub and paste the following URL
+#https://hgwdev.gi.ucsc.edu/~mchalama/tracks/difficult/hub.txt
+
+
+# Gerardo workflow for v3.5:
+# Copied the trackDb.txt into the human/hg38/problematic.ra and copied html description into problematic html
+# Downloaded the v3.5 data
+cd /hive/data/genomes/hg38/bed/
+mkdir problematic; cd problematic
+mkdir GIAB; cd GIAB
+wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.5/GRCh38@all/Union/GRCh38_alldifficultregions.bb
+wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.5/GRCh38@all/Union/GRCh38_alllowmapandsegdupregions.bb
+wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.5/GRCh38@all/Union/GRCh38_notinalldifficultregions.bb
+wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.5/GRCh38@all/Union/GRCh38_notinalllowmapandsegdupregions.bb
+cd /gbdb/hg38/
+mkdir problematic; cd problematic
+mkdir GIAB; cd GIAB
+# Made symlinks  
+ln -s /hive/data/genomes/hg38/bed/problematic/GIAB/alldifficultregions.bb
+ln -s /hive/data/genomes/hg38/bed/problematic/GIAB/notinalldifficultregions.bb
+ln -s /hive/data/genomes/hg38/bed/problematic/GIAB/alllowmapandsegdupregions.bb 
+ln -s /hive/data/genomes/hg38/bed/problematic/GIAB/notinalllowmapandsegdupregions.bb
+# Updated the bigDataUrl problematic.ra and problematic.html