2574b18caca333a5d52d6d6b8ecc6424d403c7d7 galt Tue Oct 5 22:01:49 2021 -0700 more p13 made properly versioned gc5Base files for top, intial, p12, p13, latest bigZips doownloads dirs. refs #25091 diff --git src/hg/makeDb/doc/hg38/patchUpdate.13.txt src/hg/makeDb/doc/hg38/patchUpdate.13.txt index f23a669..f101205 100644 --- src/hg/makeDb/doc/hg38/patchUpdate.13.txt +++ src/hg/makeDb/doc/hg38/patchUpdate.13.txt @@ -152,30 +152,107 @@ rm md5sum.txt md5sum hg38* > md5sum.txt echo GRCh38.p13 > LATEST_VERSION rm -f /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips/p13 ln -s /hive/data/genomes/hg38/goldenPath/bigZips/p13 \ /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips/p13 rm -f /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips/latest ln -s /hive/data/genomes/hg38/goldenPath/bigZips/latest \ /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips/latest ln -sf /hive/data/genomes/hg38/chrom.sizes.p13 \ /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips/p13/hg38.p13.chrom.sizes ############################################################################# +# Put correct gc5Base files in downloads (DONE 2021-10-05 galt) +# I found that there were nice versioned files made by the patch process, +# but that they had never been correctly used, and in fact, the lastest one w + +cd /hive/data/genomes/hg38/goldenPath/bigZips/initial + +ln -s /hive/data/genomes/hg38/bed/gc5Base/hg38.initial.gc5Base.bw hg38.gc5Base.bw +ln -s /hive/data/genomes/hg38/bed/gc5Base/hg38.initial.gc5Base.wigVarStep.gz hg38.gc5Base.wigVarStep.gz + +md5sum hg38.* > md5sum.txt +diff md5sum.txt md5sum.txt2 +md5sum hg38.* > md5sum.txt2 +rm md5ssum.txt2 + +cd /hive/data/genomes/hg38/goldenPath/bigZips +ln -s initial/hg38.chrom.sizes hg38.chrom.sizes +ln -s initial/hg38.gc5Base.bw hg38.gc5Base.bw +ln -s initial/hg38.gc5Base.wigVarStep.gz hg38.gc5Base.wigVarStep.gz + +cd /hive/data/genomes/hg38/goldenPath/bigZips/p11 + +ln -s /hive/data/genomes/hg38/bed/gc5Base/hg38.p11.gc5Base.bw hg38.p11.gc5Base.bw +ln -s /hive/data/genomes/hg38/bed/gc5Base/hg38.p11.gc5Base.wigVarStep.gz hg38.p11.gc5Base.wigVarStep.gz +ln -s /hive/data/genomes/hg38/chrom.sizes.p11 hg38.p11.chrom.sizes + +md5sum hg38.p11.gc5Base.* >> md5sum.txt2 +diff md5sum.txt md5sum.txt2 +md5sum hg38.* > md5sum.txt2 +rm md5ssum.txt2 + +cd /hive/data/genomes/hg38/goldenPath/bigZips/p12 + +ln -s /hive/data/genomes/hg38/bed/gc5Base/hg38.p12.gc5Base.bw hg38.p12.gc5Base.bw +ln -s /hive/data/genomes/hg38/bed/gc5Base/hg38.p12.gc5Base.wigVarStep.gz hg38.p12.gc5Base.wigVarStep.gz + +md5sum hg38.p12.gc5Base.* >> md5sum.txt + +md5sum hg38.* > md5sum.txt2 +diff md5sum.txt md5sum.txt2 +rm md5sum.txt2 + +cd /hive/data/genomes/hg38/goldenPath/bigZips/p13 + +ln -s /hive/data/genomes/hg38/bed/gc5Base/hg38.p13.gc5Base.bw hg38.p13.gc5Base.bw +ln -s /hive/data/genomes/hg38/bed/gc5Base/hg38.p13.gc5Base.wigVarStep.gz hg38.p13.gc5Base.wigVarStep.gz + +md5sum hg38.p13.gc5Base.* >> md5sum.txt + +md5sum hg38.* > md5sum.txt2 +diff md5sum.txt md5sum.txt2 +rm md5sum.txt2 + +cd /hive/data/genomes/hg38/goldenPath/bigZips/latest + +ln -s ../p13/hg38.p13.chrom.sizes hg38.chrom.sizes + +ln -s ../p13/hg38.p13.gc5Base.bw hg38.gc5Base.bw +ln -s ../p13/hg38.p13.gc5Base.wigVarStep.gz hg38.gc5Base.wigVarStep.gz + +md5sum hg38.* > md5sum.txt2 + +sed -e 's/.p13//' ../p13/md5sum.txt > md5sum.p13 + +diff md5sum.p13 md5sum.txt2 +rm md5sum.p13 +rm md5sum.txt2 + +- + +cd /data/apache/htdocs-hgdownload/goldenPath/hg38/bigZips +rm hg38.chrom.sizes +ln -s /hive/data/genomes/hg38/goldenPath/bigZips/hg38.chrom.sizes hg38.chrom.sizes +ln -s /hive/data/genomes/hg38/goldenPath/bigZips/hg38.gc5Base.bw hg38.gc5Base.bw +ln -s /hive/data/genomes/hg38/goldenPath/bigZips/hg38.gc5Base.wigVarStep.gz hg38.gc5Base.wigVarStep.gz + + +############################################################################# # Build perSeqMax file for gfServer (hgBlat) (DONE 2021-08-26 galt) # When the blat server is restarted with the updated hg38.2bit file, # hg38.altsAndFixes needs to be copied over along with the new hg38.2bit file, # and gfServer needs to be restarted with -perSeqMax=hg38.altsAndFixes. cd /hive/data/genomes/hg38 cut -f 1 chrom.sizes.p13 \ | grep -E '_(alt|fix)$' \ | sed -re 's/^/hg38.2bit:/;' \ > hg38.altsAndFixes.p13 # Link for blat server installation convenience: ln -sf hg38.altsAndFixes.p13 altsAndFixes ######################################################################### # Regenerate idKeys with extended hg38 (DONE 2021-08-26 galt) @@ -327,32 +404,34 @@ ok="OK" if [ "$c0" -ne "$c1" ]; then ok="ERROR" fi printf "# checking $t: $c0 =? $c1 $ok\n" done # checking refseq: 639 =? 639 OK # checking genbank: 640 =? 640 OK # checking assembly: 640 =? 640 OK # Note how there's one fewer refseq, consistent with featureBits above. hgLoadSqlTab hg38 chromAlias $HOME/kent/src/hg/lib/chromAlias.sql ${db}.chromAlias.tab ############################################################################## -# UCSC to Ensembl (TODO 2020-08-10 galt) -# doc?? +# UCSC to Ensembl (TODO 2021-09-18 galt) +# Ask Hiram to update ensembleToUcsc and ensemblLift tables. +# FYI ensemblLift offset shows how many Ns were inserted by Ensembl to give the right coordinate to alts and fixes. +# ############################################################################ # altLocations and patchLocations (DONE 2021-08-27 galt) # indicate corresponding locations between haplotypes and reference mkdir /hive/data/genomes/hg38/bed/altLocations.p13 cd /hive/data/genomes/hg38/bed/altLocations.p13 ~/kent/src/hg/utils/automation/altScaffoldPlacementToBed.pl \ /hive/data/genomes/grcH38P13/genbank/GCA_000001405.28_GRCh38.p13_assembly_structure/{ALT_*,PATCHES}/alt_scaffolds/alt_scaffold_placement.txt \ | sort -k1,1 -k2n,2n \ > altAndFixLocations.bed wc -l altAndFixLocations.bed #892 altAndFixLocations.bed grep _alt altAndFixLocations.bed > altLocations.bed grep _fix altAndFixLocations.bed > fixLocations.bed