d0bcef580cd0c19bbe20a2c401e0baf62410790c
hiram
  Thu Oct 26 12:52:03 2023 -0700
add sanityCheck to prevent duplicate accession IDs from leaking into the tsv orderList refs #29545

diff --git src/hg/makeDb/doc/asmHubs/asmHubs.mk src/hg/makeDb/doc/asmHubs/asmHubs.mk
index 392773c..d4b6881 100644
--- src/hg/makeDb/doc/asmHubs/asmHubs.mk
+++ src/hg/makeDb/doc/asmHubs/asmHubs.mk
@@ -1,35 +1,44 @@
 # generic makefile to construct the index pages and symlinks
 # for any assembly hub
 #
 # will be included by those individual build directories with the
 # following variables defined to customize the resulting files:
 #
 # destDir, srcDir, orderList, indexName, testIndexName,
 # statsName, testStatsName, dataName, testDataName, genomesTxt, hubFile
 # testHubFile, Name and name
 
 toolsDir=${HOME}/kent/src/hg/makeDb/doc/asmHubs
 htdocsHgDownload=/usr/local/apache/htdocs-hgdownload
 hubsDownload=${htdocsHgDownload}/hubs/${name}
 asmHubSrc=/hive/data/genomes/asmHubs/${name}
 
-all:: makeDirs mkGenomes symLinks hubIndex asmStats trackData hubTxt groupsTxt
+all:: sanityCheck makeDirs mkGenomes symLinks hubIndex asmStats trackData hubTxt groupsTxt
 
 makeDirs:
 	mkdir -p ${destDir}
 
+sanityCheck:
+	@goodBad=$$(cut -d'_' -f1-2 ${orderList} | sort | uniq -c | awk '$$1 > 1' | wc -l); \
+	if [ $$goodBad -ne 0 ]; then \
+	    tsvFile=$$(basename ${orderList}); \
+	    echo "ERROR: duplicate accession in '$$tsvFile'"; \
+	    cut -d'_' -f1-2 ${orderList} | sort | uniq -c | awk '$$1 > 1'; \
+	    exit 255; \
+	fi
+
 sshKeyDownload:
 	ssh -o PasswordAuthentication=no qateam@hgdownload date
 
 sshKeyDynablat:
 	ssh -o PasswordAuthentication=no qateam@dynablat-01 date
 
 sshKeyCheck: sshKeyDownload sshKeyDynablat
 	@printf "# ssh keys to hgdownload and dynablat-01 are good\n"
 
 mkGenomes::
 	@printf "# starting mkGenomes " 1>&2
 	@date "+%s %F %T" 1>&2
 	${toolsDir}/mkGenomes.pl dynablat-01 4040 ${orderList} > ${destDir}/${genomesTxt}.txt
 	rm -f ${destDir}/download.${genomesTxt}.txt
 	cp -p ${destDir}/${genomesTxt}.txt ${destDir}/download.${genomesTxt}.txt