5581e1884c54c583fe68844f8271a3baac1617ae
angie
  Fri Dec 20 15:21:56 2024 -0800
Accumulated minor updates.

diff --git src/hg/utils/otto/mpxv/getNcbiMpxv.sh src/hg/utils/otto/mpxv/getNcbiMpxv.sh
index e72c0f3..7370869 100755
--- src/hg/utils/otto/mpxv/getNcbiMpxv.sh
+++ src/hg/utils/otto/mpxv/getNcbiMpxv.sh
@@ -22,46 +22,52 @@
 attempt=0
 maxAttempts=5
 retryDelay=300
 while [[ $((++attempt)) -le $maxAttempts ]]; do
     echo "metadata attempt $attempt"
     if curl -fSs $metadataUrl | csvToTab \
         | tawk '$7 >= '$minSize' && $1 !~ /^NC_/' \
         | sed -re 's/\tUNVERIFIED: /\t/;' \
         | sed -re 's/\tMonkeypox virus /\t/;' \
         | sed -re 's/\tisolate /\t/;' \
         | sed -re 's/\tstrain /\t/;' \
         | sed -re 's/, (complete|partial) (genome|cds)\t/\t/;' \
         | sed -re 's/\tMPXV[_-]/\t/g;' \
         | sed -re 's@\t(hMPX|hMPXV|hMpxV|MpxV|MPxV|MPXV|MpxV|MPX|Monkeypox|MPXV22)/@\t@g;' \
         | sed -re 's@\t[Hh]uman/@\t@g;' \
+        | sed -re 's@RNA genome assembly, complete genome: monopartite@@;' \
         > metadata.tsv; then
         break;
     else
         echo "FAILED metadata; will try again after $retryDelay seconds"
         rm -f metadata.tsv
         sleep $retryDelay
         # Double the delay to give NCBI progressively more time
         retryDelay=$(($retryDelay * 2))
     fi
 done
 if [[ ! -f metadata.tsv ]]; then
-    echo "datasets command failed $maxAttempts times; quitting."
-#    exit 1
+    echo "metadata query failed $maxAttempts times; quitting."
+    exit 1
 fi
 wc -l metadata.tsv
 
+if [[ ! -s metadata.tsv ]]; then
+    echo "metadata query appeared to succeed but gave 0-length output"
+    exit 1
+fi
+
 attempt=0
 maxAttempts=5
 retryDelay=300
 while [[ $((++attempt)) -le $maxAttempts ]]; do
     echo "fasta attempt $attempt"
     if datasets download virus genome taxon $taxId --include genome,biosample; then
         break;
     else
         echo "FAILED fasta; will try again after $retryDelay seconds"
         rm -f ncbi_dataset.zip
         sleep $retryDelay
         # Double the delay to give NCBI progressively more time
         retryDelay=$(($retryDelay * 2))
     fi
 done