226f4f99c439d7f22200b0f28bf30a8816442e9d gperez2 Sun Nov 3 14:21:21 2024 -0800 Adding the makedocs for the difficult regions from GIAB track, refs #32715 diff --git src/hg/makeDb/doc/hg38/problematicGIAB.txt src/hg/makeDb/doc/hg38/problematicGIAB.txt new file mode 100644 index 0000000..2496204 --- /dev/null +++ src/hg/makeDb/doc/hg38/problematicGIAB.txt @@ -0,0 +1,49 @@ +############################################################################# +# problematicGIAB: Difficult regions from GIAB via NCBI - Megna/Gerardo +# Redmine #34253 + +# Megna workflow for v3.3: +cp /cluster/home/mchalama/public_html/tracks/CoLoRSdb/genomes.txt /cluster/home/mchalama/public_html/tracks/difficult +cp /cluster/home/mchalama/public_html/tracks/CoLoRSdb/hub.txt /cluster/home/mchalama/public_html/tracks/difficult +cp /cluster/home/mchalama/public_html/tracks/CoLoRSdb/hg38/CoLoRSdb.html /cluster/home/mchalama/public_html/tracks/difficult +cp /cluster/home/mchalama/public_html/tracks/CoLoRSdb/hg38/trackDb.txt /cluster/home/mchalama/public_html/tracks/difficult +mkdir hg38 +mv /cluster/home/mchalama/public_html/tracks/difficult/CoLoRSdb.html /cluster/home/mchalama/public_html/tracks/difficult/hg38 +mv /cluster/home/mchalama/public_html/tracks/difficult/trackDb.txt /cluster/home/mchalama/public_html/tracks/difficult/hg38 +edit genomes.txt +edit hub.txt; got contact of PI from here: https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/README.md +wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/GRCh38@all/Union/GRCh38_alldifficultregions.bed.gz +wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/GRCh38@all/Union/GRCh38_alllowmapandsegdupregions.bed.gz +wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/GRCh38@all/Union/GRCh38_notinalldifficultregions.bed.gz +wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.3/GRCh38@all/Union/GRCh38_notinalllowmapandsegdupregions.bed.gz +#edit trackDb.txt to include the 4 datasets +#Bed to BigBed conversion because I realized you can't see beds properly on the hub +bedToBigBed /cluster/home/mchalama/public_html/tracks/difficult/hg38/GRCh38_alldifficultregions.bed.gz /cluster/home/mchalama/public_html/tracks/difficult/hg38.chrom.sizes GRCh38_alldifficultregions.bb +bedToBigBed /cluster/home/mchalama/public_html/tracks/difficult/hg38/GRCh38_alllowmapandsegdupregions.bed.gz /cluster/home/mchalama/public_html/tracks/difficult/hg38.chrom.sizes GRCh38_alllowmapandsegdupregions.bb +bedToBigBed /cluster/home/mchalama/public_html/tracks/difficult/hg38/GRCh38_notinalldifficultregions.bed.gz /cluster/home/mchalama/public_html/tracks/difficult/hg38.chrom.sizes GRCh38_notinalldifficultregions.bb +bedToBigBed /cluster/home/mchalama/public_html/tracks/difficult/hg38/GRCh38_notinalllowmapandsegdupregions.bed.gz /cluster/home/mchalama/public_html/tracks/difficult/hg38.chrom.sizes GRCh38_notinalllowmapandsegdupregions.bb +edit the html file to include information about the track +#open genome browser +#navigate to custom hub and paste the following URL +#https://hgwdev.gi.ucsc.edu/~mchalama/tracks/difficult/hub.txt + + +# Gerardo workflow for v3.5: +# Copied the trackDb.txt into the human/hg38/problematic.ra and copied html description into problematic html +# Downloaded the v3.5 data +cd /hive/data/genomes/hg38/bed/ +mkdir problematic; cd problematic +mkdir GIAB; cd GIAB +wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.5/GRCh38@all/Union/GRCh38_alldifficultregions.bb +wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.5/GRCh38@all/Union/GRCh38_alllowmapandsegdupregions.bb +wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.5/GRCh38@all/Union/GRCh38_notinalldifficultregions.bb +wget https://ftp-trace.ncbi.nlm.nih.gov/ReferenceSamples/giab/release/genome-stratifications/v3.5/GRCh38@all/Union/GRCh38_notinalllowmapandsegdupregions.bb +cd /gbdb/hg38/ +mkdir problematic; cd problematic +mkdir GIAB; cd GIAB +# Made symlinks +ln -s /hive/data/genomes/hg38/bed/problematic/GIAB/alldifficultregions.bb +ln -s /hive/data/genomes/hg38/bed/problematic/GIAB/notinalldifficultregions.bb +ln -s /hive/data/genomes/hg38/bed/problematic/GIAB/alllowmapandsegdupregions.bb +ln -s /hive/data/genomes/hg38/bed/problematic/GIAB/notinalllowmapandsegdupregions.bb +# Updated the bigDataUrl problematic.ra and problematic.html