d0bcef580cd0c19bbe20a2c401e0baf62410790c hiram Thu Oct 26 12:52:03 2023 -0700 add sanityCheck to prevent duplicate accession IDs from leaking into the tsv orderList refs #29545 diff --git src/hg/makeDb/doc/asmHubs/asmHubs.mk src/hg/makeDb/doc/asmHubs/asmHubs.mk index 392773c..d4b6881 100644 --- src/hg/makeDb/doc/asmHubs/asmHubs.mk +++ src/hg/makeDb/doc/asmHubs/asmHubs.mk @@ -1,135 +1,144 @@ # generic makefile to construct the index pages and symlinks # for any assembly hub # # will be included by those individual build directories with the # following variables defined to customize the resulting files: # # destDir, srcDir, orderList, indexName, testIndexName, # statsName, testStatsName, dataName, testDataName, genomesTxt, hubFile # testHubFile, Name and name toolsDir=${HOME}/kent/src/hg/makeDb/doc/asmHubs htdocsHgDownload=/usr/local/apache/htdocs-hgdownload hubsDownload=${htdocsHgDownload}/hubs/${name} asmHubSrc=/hive/data/genomes/asmHubs/${name} -all:: makeDirs mkGenomes symLinks hubIndex asmStats trackData hubTxt groupsTxt +all:: sanityCheck makeDirs mkGenomes symLinks hubIndex asmStats trackData hubTxt groupsTxt makeDirs: mkdir -p ${destDir} +sanityCheck: + @goodBad=$$(cut -d'_' -f1-2 ${orderList} | sort | uniq -c | awk '$$1 > 1' | wc -l); \ + if [ $$goodBad -ne 0 ]; then \ + tsvFile=$$(basename ${orderList}); \ + echo "ERROR: duplicate accession in '$$tsvFile'"; \ + cut -d'_' -f1-2 ${orderList} | sort | uniq -c | awk '$$1 > 1'; \ + exit 255; \ + fi + sshKeyDownload: ssh -o PasswordAuthentication=no qateam@hgdownload date sshKeyDynablat: ssh -o PasswordAuthentication=no qateam@dynablat-01 date sshKeyCheck: sshKeyDownload sshKeyDynablat @printf "# ssh keys to hgdownload and dynablat-01 are good\n" mkGenomes:: @printf "# starting mkGenomes " 1>&2 @date "+%s %F %T" 1>&2 ${toolsDir}/mkGenomes.pl dynablat-01 4040 ${orderList} > ${destDir}/${genomesTxt}.txt rm -f ${destDir}/download.${genomesTxt}.txt cp -p ${destDir}/${genomesTxt}.txt ${destDir}/download.${genomesTxt}.txt @printf "# finished mkGenomes " 1>&2 @date "+%s %F %T" 1>&2 symLinks:: ${toolsDir}/mkSymLinks.pl ${orderList} @[ -d ${hubsDownload} ] && true || mkdir ${hubsDownload} @for html in ${indexName} ${statsName} ${dataName} ; do \ [ -L ${hubsDownload}/$${html}.html ] && true || ln -s ${asmHubSrc}/$${html}.html ${hubsDownload} ; \ [ -L ${hubsDownload}/download.$${html}.html ] && true || ln -s ${asmHubSrc}/download.$${html}.html ${hubsDownload} ; \ done @for txt in groups hub genomes download.genomes ; do \ [ -L ${hubsDownload}/$${txt}.txt ] && true || ln -s ${asmHubSrc}/$${txt}.txt ${hubsDownload} ; \ done hubIndex:: rm -f ${destDir}/${testIndexName}.html ${destDir}/${indexName}.html ${destDir}/download.${indexName}.html ${toolsDir}/mkHubIndex.pl ${Name} ${name} ${defaultAssembly} ${orderList} | sed -e 's#${name}/hub.txt#${name}/${hubFile}.txt#;' > ${destDir}/download.${indexName}.html sed -e "s#genome.ucsc.edu/h/#genome-test.gi.ucsc.edu/h/#g; s/hgdownload.soe/hgdownload-test.gi/g; s#genome.ucsc.edu#genome-test.gi.ucsc.edu#;" ${destDir}/download.${indexName}.html > ${destDir}/${indexName}.html chmod +x ${destDir}/${indexName}.html ${destDir}/download.${indexName}.html asmStats:: rm -f ${destDir}/download.${statsName}.html ${destDir}/${statsName}.html ${destDir}/${testStatsName}.html ${toolsDir}/mkAsmStats.pl ${Name} ${name} ${orderList} > ${destDir}/download.${statsName}.html sed -e "s#genome.ucsc.edu/h/#genome-test.gi.ucsc.edu/h/#g; s/hgdownload.soe/hgdownload-test.gi/g;" ${destDir}/download.${statsName}.html > ${destDir}/${statsName}.html chmod +x ${destDir}/${statsName}.html ${destDir}/download.${statsName}.html # trackData makes different tables for the test vs. production version # mkHubIndex.pl and mkAsmStats.pl should do this too . . . TBD trackData:: rm -f ${destDir}/${testDataName}.html ${destDir}/${dataName}.html ${destDir}/download.${dataName}.html ${toolsDir}/trackData.pl ${Name} ${name} ${orderList} > ${destDir}/download.${dataName}.html ${toolsDir}/trackData.pl -test ${Name} ${name} ${orderList} > ${destDir}/${dataName}.html chmod +x ${destDir}/${dataName}.html chmod +x ${destDir}/download.${dataName}.html indexPages: hubIndex asmStats trackData echo indexPages done hubTxt: rm -f ${destDir}/${testHubFile}.txt ${destDir}/${hubFile}.txt sed -e "s#index.html#${indexName}.html#; s#genomes.txt#${genomesTxt}.txt#;" ${srcDir}/${hubTxtFile} > ${destDir}/${hubFile}.txt # all hubs have the same set of groups, no need for any name customization groupsTxt: rm -f ${destDir}/groups.txt rm -f ${destDir}/groups.txt cp -p ${toolsDir}/groups.txt ${destDir}/groups.txt clean:: rm -f ${destDir}/${hubFile}.txt rm -f ${destDir}/${testHubFile}.txt rm -f ${destDir}/groups.txt rm -f ${destDir}/${genomesTxt}.txt rm -f ${destDir}/${indexName}.html rm -f ${destDir}/${testIndexName}.html rm -f ${destDir}/${statsName}.html rm -f ${destDir}/${testStatsName}.html sendDownload:: sshKeyCheck ${toolsDir}/mkSendList.pl ${orderList} | while read F; do \ ${toolsDir}/sendToHgdownload.sh $$F < /dev/null; done rsync -L -a -P \ /usr/local/apache/htdocs-hgdownload/hubs/${name}/groups.txt \ qateam@hgdownload:/mirrordata/hubs/${name}/ rsync -L -a -P \ /usr/local/apache/htdocs-hgdownload/hubs/${name}/${hubFile}.txt \ qateam@hgdownload:/mirrordata/hubs/${name}/ rsync -L -a -P \ /usr/local/apache/htdocs-hgdownload/hubs/${name}/download.${indexName}.html \ qateam@hgdownload:/mirrordata/hubs/${name}/${indexName}.html rsync -L -a -P \ /usr/local/apache/htdocs-hgdownload/hubs/${name}/download.${statsName}.html \ qateam@hgdownload:/mirrordata/hubs/${name}/${statsName}.html rsync -L -a -P \ /usr/local/apache/htdocs-hgdownload/hubs/${name}/download.${dataName}.html \ qateam@hgdownload:/mirrordata/hubs/${name}/${dataName}.html rsync -L -a -P \ /usr/local/apache/htdocs-hgdownload/hubs/${name}/download.${genomesTxt}.txt \ qateam@hgdownload:/mirrordata/hubs/${name}/${genomesTxt}.txt verifyTestDownload: ${toolsDir}/verifyOnDownload.sh api-test.gi.ucsc.edu ${orderList} verifyDownload: ${toolsDir}/verifyOnDownload.sh apibeta.soe.ucsc.edu ${orderList} verifyDynamicBlat: grep -v "^#" ${orderList} | cut -d'_' -f1-2 | while read asmId; do \ ${toolsDir}/testDynBlat.sh $$asmId < /dev/null; done sendIndexes:: rsync -L -a -P \ /usr/local/apache/htdocs-hgdownload/hubs/${name}/download.${indexName}.html \ qateam@hgdownload:/mirrordata/hubs/${name}/${indexName}.html rsync -L -a -P \ /usr/local/apache/htdocs-hgdownload/hubs/${name}/download.${statsName}.html \ qateam@hgdownload:/mirrordata/hubs/${name}/${statsName}.html rsync -L -a -P \ /usr/local/apache/htdocs-hgdownload/hubs/${name}/download.${dataName}.html \ qateam@hgdownload:/mirrordata/hubs/${name}/${dataName}.html