d0bcef580cd0c19bbe20a2c401e0baf62410790c hiram Thu Oct 26 12:52:03 2023 -0700 add sanityCheck to prevent duplicate accession IDs from leaking into the tsv orderList refs #29545 diff --git src/hg/makeDb/doc/asmHubs/asmHubs.mk src/hg/makeDb/doc/asmHubs/asmHubs.mk index 392773c..d4b6881 100644 --- src/hg/makeDb/doc/asmHubs/asmHubs.mk +++ src/hg/makeDb/doc/asmHubs/asmHubs.mk @@ -1,35 +1,44 @@ # generic makefile to construct the index pages and symlinks # for any assembly hub # # will be included by those individual build directories with the # following variables defined to customize the resulting files: # # destDir, srcDir, orderList, indexName, testIndexName, # statsName, testStatsName, dataName, testDataName, genomesTxt, hubFile # testHubFile, Name and name toolsDir=${HOME}/kent/src/hg/makeDb/doc/asmHubs htdocsHgDownload=/usr/local/apache/htdocs-hgdownload hubsDownload=${htdocsHgDownload}/hubs/${name} asmHubSrc=/hive/data/genomes/asmHubs/${name} -all:: makeDirs mkGenomes symLinks hubIndex asmStats trackData hubTxt groupsTxt +all:: sanityCheck makeDirs mkGenomes symLinks hubIndex asmStats trackData hubTxt groupsTxt makeDirs: mkdir -p ${destDir} +sanityCheck: + @goodBad=$$(cut -d'_' -f1-2 ${orderList} | sort | uniq -c | awk '$$1 > 1' | wc -l); \ + if [ $$goodBad -ne 0 ]; then \ + tsvFile=$$(basename ${orderList}); \ + echo "ERROR: duplicate accession in '$$tsvFile'"; \ + cut -d'_' -f1-2 ${orderList} | sort | uniq -c | awk '$$1 > 1'; \ + exit 255; \ + fi + sshKeyDownload: ssh -o PasswordAuthentication=no qateam@hgdownload date sshKeyDynablat: ssh -o PasswordAuthentication=no qateam@dynablat-01 date sshKeyCheck: sshKeyDownload sshKeyDynablat @printf "# ssh keys to hgdownload and dynablat-01 are good\n" mkGenomes:: @printf "# starting mkGenomes " 1>&2 @date "+%s %F %T" 1>&2 ${toolsDir}/mkGenomes.pl dynablat-01 4040 ${orderList} > ${destDir}/${genomesTxt}.txt rm -f ${destDir}/download.${genomesTxt}.txt cp -p ${destDir}/${genomesTxt}.txt ${destDir}/download.${genomesTxt}.txt