620c6d4617dc2b1fe159566b20c5e902821adb76 galt Wed Oct 6 16:31:09 2021 -0700 adding correctly versioned hg38.chromAlias.txt to the various hg38 bigZips top, initial, p11, p12, p13 final dirs. refs #25091 diff --git src/hg/makeDb/doc/hg38/patchUpdate.13.txt src/hg/makeDb/doc/hg38/patchUpdate.13.txt index f101205..c238378 100644 --- src/hg/makeDb/doc/hg38/patchUpdate.13.txt +++ src/hg/makeDb/doc/hg38/patchUpdate.13.txt @@ -59,30 +59,38 @@ /hive/data/genomes/grcH38P13/bed/gc5Base/grcH38P13.gc5Base.wigVarStep.gz \ | gzip -c \ > hg38.p13.gc5Base.wigVarStep.gz) #real 6m39.885s # Make a new gc5BaseBw.bw time wigToBigWig hg38.p13.gc5Base.wigVarStep.gz ../../chrom.sizes.p13 \ hg38.p13.gc5Base.bw #real 11m38.366s # Install cd /hive/data/genomes/hg38/bed/gc5Base/ ln -sf hg38.p13.gc5Base.wigVarStep.gz hg38.gc5Base.wigVarStep.gz ln -sf hg38.p13.gc5Base.bw hg38.gc5Base.bw +######################################## +# +# BIGZIPS POLICY +# Note about downloads directory policy under bigZips/ +# We want that top-level bigZips/ files to be the same as bigZips/initial/ files. +# We do not want the top-level to have a mix of some newer files plus old files. +# Even if "initial" dir is redundant, at least people will know what it means. +# ############################################################################## # Extend main database download files (DONE - 2021-04-08 - Angie) cd /hive/data/genomes/hg38/goldenPath/bigZips mkdir p13 # hg38.2bit was already extended above. ln -sf /hive/data/genomes/hg38/hg38.p13.2bit p13/ # AGP: zcat p12/hg38.p12.agp.gz \ /hive/data/genomes/grcH38P13/goldenPath/bigZips/grcH38P13.agp.gz \ | grep -v ^# \ | gzip -c > p13/hg38.p13.agp.gz # FASTA @@ -154,31 +162,32 @@ echo GRCh38.p13 > LATEST_VERSION rm -f /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips/p13 ln -s /hive/data/genomes/hg38/goldenPath/bigZips/p13 \ /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips/p13 rm -f /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips/latest ln -s /hive/data/genomes/hg38/goldenPath/bigZips/latest \ /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips/latest ln -sf /hive/data/genomes/hg38/chrom.sizes.p13 \ /usr/local/apache/htdocs-hgdownload/goldenPath/hg38/bigZips/p13/hg38.p13.chrom.sizes ############################################################################# # Put correct gc5Base files in downloads (DONE 2021-10-05 galt) # I found that there were nice versioned files made by the patch process, -# but that they had never been correctly used, and in fact, the lastest one w +# but that they had never been correctly used, and in fact, the lastest one +# was accidentally in the top level. cd /hive/data/genomes/hg38/goldenPath/bigZips/initial ln -s /hive/data/genomes/hg38/bed/gc5Base/hg38.initial.gc5Base.bw hg38.gc5Base.bw ln -s /hive/data/genomes/hg38/bed/gc5Base/hg38.initial.gc5Base.wigVarStep.gz hg38.gc5Base.wigVarStep.gz md5sum hg38.* > md5sum.txt diff md5sum.txt md5sum.txt2 md5sum hg38.* > md5sum.txt2 rm md5ssum.txt2 cd /hive/data/genomes/hg38/goldenPath/bigZips ln -s initial/hg38.chrom.sizes hg38.chrom.sizes ln -s initial/hg38.gc5Base.bw hg38.gc5Base.bw ln -s initial/hg38.gc5Base.wigVarStep.gz hg38.gc5Base.wigVarStep.gz @@ -229,30 +238,109 @@ diff md5sum.p13 md5sum.txt2 rm md5sum.p13 rm md5sum.txt2 - cd /data/apache/htdocs-hgdownload/goldenPath/hg38/bigZips rm hg38.chrom.sizes ln -s /hive/data/genomes/hg38/goldenPath/bigZips/hg38.chrom.sizes hg38.chrom.sizes ln -s /hive/data/genomes/hg38/goldenPath/bigZips/hg38.gc5Base.bw hg38.gc5Base.bw ln -s /hive/data/genomes/hg38/goldenPath/bigZips/hg38.gc5Base.wigVarStep.gz hg38.gc5Base.wigVarStep.gz ############################################################################# +# Correctly versioned hg38.chromAlias.txt files in downloads (DONE 2021-10-06 galt) +# I made nice versioned files and installed them in the right location. +# Now we did not have the problem where the lastest one was in the top level dir. + +cd /hive/data/genomes/hg38/goldenPath/bigZips + +# do not do this in the future, beyond p13 since it has already been done +mv hg38.chromAlias.txt hg38.p12.chromAlias.txt.old + +~/kent/src/hg/utils/automation/chromAliasToTxt.pl hg38 > hg38.p13.chromAlias.txt + +# do not do this in the future, beyond p13 since it has already been done +hgsql grcH38P13 -BNe 'select chrom from chromInfo' > p13.chroms +hgsql grcH38P12 -BNe 'select chrom from chromInfo' > p12.chroms +hgsql grcH38P11 -BNe 'select chrom from chromInfo' > p11.chroms + +# do not do this in the future, beyond p13 since it has already been done +grep -v --file=p13.chroms hg38.p13.chromAlias.txt > hg38.p12.chromAlias.txt +grep -v --file=p12.chroms hg38.p12.chromAlias.txt > hg38.p11.chromAlias.txt +grep -v --file=p11.chroms hg38.p11.chromAlias.txt > hg38.initial.chromAlias.txt + +# do not do this in the future, beyond p13 since it has already been done +diff hg38.p12.chromAlias.txt hg38.p12.chromAlias.txt.old +#580a581 +#> chrUn_KI270752v1 HSCHRUN_RANDOM_CTG29 KI270752.1 + +# FYI KI270752.1 is the discontinued non-human contig + +# added this to the README.txt +KI270752.1 is no longer part of the RefSeq assembly it's hamster sequence +derived from the human-hamster CHO cell line. +https://www.ncbi.nlm.nih.gov/grc/human/issues/HG-2587 + +wc -l hg38.*.chromAlias.txt* + 455 hg38.initial.chromAlias.txt + 578 hg38.p11.chromAlias.txt + 595 hg38.p12.chromAlias.txt + 596 hg38.p12.chromAlias.txt.old + 640 hg38.p13.chromAlias.txt + +--------- + +# do not do this in the future, beyond p13 since it has already been done +cd /hive/data/genomes/hg38/goldenPath/bigZips/initial +ln -s ../hg38.initial.chromAlias.txt hg38.chromAlias.txt +md5sum hg38.chromAlias.txt >> md5sum.txt + +# do not do this in the future, beyond p13 since it has already been done +cd /hive/data/genomes/hg38/goldenPath/bigZips +ln -s initial/hg38.chromAlias.txt hg38.chromAlias.txt + +# do not do this in the future, beyond p13 since it has already been done +cd /hive/data/genomes/hg38/goldenPath/bigZips/p11 +ln -s ../hg38.p11.chromAlias.txt hg38.p11.chromAlias.txt +md5sum hg38.p11.chromAlias.txt >> md5sum.txt + +# do not do this in the future, beyond p13 since it has already been done +cd /hive/data/genomes/hg38/goldenPath/bigZips/p12 +ln -s ../hg38.p12.chromAlias.txt hg38.p12.chromAlias.txt +md5sum hg38.p12.chromAlias.txt >> md5sum.txt + +# do not do this in the future, beyond p13 since it has already been done +# but in future make a copy of this block and rename stuff for p14 etc. +cd /hive/data/genomes/hg38/goldenPath/bigZips/p13 +ln -s ../hg38.p13.chromAlias.txt hg38.p13.chromAlias.txt +md5sum hg38.p13.chromAlias.txt >> md5sum.txt + +cd /hive/data/genomes/hg38/goldenPath/bigZips/latest +# adapt to whatever the most recent patch is +ln -s ../p13/hg38.p13.chromAlias.txt hg38.chromAlias.txt +md5sum hg38.chromAlias.txt >> md5sum.txt + +# do not do this in the future, beyond p13 since it has already been done +cd /data/apache/htdocs-hgdownload/goldenPath/hg38/bigZips +rm hg38.chromAlias.txt +ln -s /hive/data/genomes/hg38/goldenPath/bigZips/hg38.chromAlias.txt hg38.chromAlias.txt + + +############################################################################# # Build perSeqMax file for gfServer (hgBlat) (DONE 2021-08-26 galt) # When the blat server is restarted with the updated hg38.2bit file, # hg38.altsAndFixes needs to be copied over along with the new hg38.2bit file, # and gfServer needs to be restarted with -perSeqMax=hg38.altsAndFixes. cd /hive/data/genomes/hg38 cut -f 1 chrom.sizes.p13 \ | grep -E '_(alt|fix)$' \ | sed -re 's/^/hg38.2bit:/;' \ > hg38.altsAndFixes.p13 # Link for blat server installation convenience: ln -sf hg38.altsAndFixes.p13 altsAndFixes ######################################################################### # Regenerate idKeys with extended hg38 (DONE 2021-08-26 galt)