8f226d9ae51d5aa0907e717fb310cab91a54cf87
hiram
  Wed Jul 20 17:21:05 2022 -0700
better choice of common names and manage lower valid track count for viruses refs #29484

diff --git src/hg/makeDb/doc/asmHubs/verifyOnDownload.sh src/hg/makeDb/doc/asmHubs/verifyOnDownload.sh
index defb6e6..50eab40 100755
--- src/hg/makeDb/doc/asmHubs/verifyOnDownload.sh
+++ src/hg/makeDb/doc/asmHubs/verifyOnDownload.sh
@@ -1,74 +1,81 @@
 #!/bin/bash
 
 # set -beEu -o pipefail
 
 if [ $# -ne 2 ]; then
   printf "usage: ./verifyOnDownload.sh <host> <subset.orderList.tsv>\n" 1>&2
   printf "where <host> is something like:\n" 1>&2
   printf "api-test.gi.ucsc.edu - to use the hgwdev server\n" 1>&2
   printf "apibeta.soe.ucsc.edu - to use the hgwbeta server\n" 1>&2
   exit 255
 fi
 
 #  printf "usage: ./verifyOnDownload.sh <GCF/012/345/678/GCF_012345678.nn>\n" 1>&2
 #	${toolsDir}/mkSendList.pl ${orderList} | while read F; do \
 #	  ${toolsDir}/verifyOnDownload.sh $$F < /dev/null; done
 
 export host=$1
 export orderList=$2
 export successCount=0
 export doneCount=0
+export fileName=`basename $orderList`
+export subset=${fileName%.orderList.tsv}
 
 export minTrackCount=12
+if [ "${subset}" == "viral" ]; then
+  minTrackCount=7
+fi
+
+# printf "# DBG subset '%s' min: %d\n" "${subset}" "${minTrackCount}" 1>&2
 
 export dbHost="localhost"
 export hubSource="hgdownload-test.gi.ucsc.edu"
 if [ "${host}" = "apibeta.soe.ucsc.edu" ]; then
   hubSource="hgdownload.soe.ucsc.edu"
 fi
 
 export totalTrackCount=0
 
 for dirPath in `~/kent/src/hg/makeDb/doc/asmHubs/mkSendList.pl "${orderList}"`
 do
   ((doneCount=doneCount+1))
 
   export genome=`basename $dirPath`
 
   case $genome in
      GC*)
   trackCount=`curl -L "https://$host/list/tracks?genome=$genome;trackLeavesOnly=1;hubUrl=https://$hubSource/hubs/${dirPath}/hub.txt" \
       2> /dev/null | python -mjson.tool | egrep ": {$" \
        | tr -d '"' | sed -e 's/^ \+//; s/ {//;' | xargs echo | wc -w`
   if [ "${trackCount}" -gt "${minTrackCount}" ]; then
     ((successCount=successCount+1))
     printf "%03d\t%s\t%d tracks:\t" "${doneCount}" "${genome}" "${trackCount}"
   else
     printf "%03d\t%s\t%d (error <= %d) tracks:\t" "${doneCount}" "${genome}" "${trackCount}" "${minTrackCount}"
   fi
   totalTrackCount=`echo $totalTrackCount $trackCount | awk '{print $1+$2}'`
   curl -L "https://$host/list/hubGenomes?hubUrl=https://$hubSource/hubs/${dirPath}/hub.txt" 2> /dev/null \
      | python -mjson.tool | egrep "organism\":|description\":" | sed -e "s/'/_/g;" \
        | tr -d '"'  | xargs echo \
           | sed -e 's/genomes: //; s/description: //; s/organism: //; s/{ //g;'
        ;;
      *)
        db=`echo $genome | tr -d '_'`
  trackCount=`curl -L "https://$host/list/tracks?genome=$db;trackLeavesOnly=1" \
            2> /dev/null | python -mjson.tool | egrep ": {$" \
                | egrep -v '"'$db'":' | tr -d '"' \
                  | sed -e 's/^ \+//; s/ {//;' | xargs echo | wc -w`
   if [ "${trackCount}" -gt "${minTrackCount}" ]; then
     ((successCount=successCount+1))
     printf "%03d\t%s\t%d tracks:\t" "${doneCount}" "${db}" "${trackCount}"
   else
     printf "%03d\t%s\t%d (error < %d) tracks:\t" "${doneCount}" "${db}" "${trackCount}" "${minTrackCount}"
   fi
   totalTrackCount=`echo $totalTrackCount $trackCount | awk '{print $1+$2}'`
 hgsql -N -e "select organism,description,\",\",scientificName from dbDb where name=\"$db\";" hgcentraltest | tr "'" '_' | xargs echo | sed -e 's/ ,/,/;'
        ;;
   esac
 
 done
 export failCount=`echo $doneCount $successCount | awk '{printf "%d", $1-$2}'`
 printf "# checked %3d hubs, %3d success, %3d fail, total tracks: %d\n" "${doneCount}" "${successCount}" "${failCount}" "${totalTrackCount}"