d80e5aa50badf8d4b1e0fbadfb5cd346b812d18e
angie
  Fri Dec 2 11:09:52 2022 -0800
Remove outdated options from datasets command; don't waste time sleeping after final failure.

diff --git src/hg/utils/otto/sarscov2phylo/getNcbi.sh src/hg/utils/otto/sarscov2phylo/getNcbi.sh
index a7312b1..89be6d8 100755
--- src/hg/utils/otto/sarscov2phylo/getNcbi.sh
+++ src/hg/utils/otto/sarscov2phylo/getNcbi.sh
@@ -15,41 +15,41 @@
 
 mkdir -p $ottoDir/ncbi.$today
 cd $ottoDir/ncbi.$today
 
 attempt=0
 maxAttempts=5
 retryDelay=300
 #*** From Eric Cox 1/25/22 when download failed and they were debugging firewall issues:
 #             --proxy https://www.st-va.ncbi.nlm.nih.gov/datasets/v1 \
 #*** From Mirian Tsuchiya 6/3/22: add --debug; if there's a problem send Ncbi-Phid.
 while [[ $((++attempt)) -le $maxAttempts ]]; do
     echo "datasets attempt $attempt"
     if datasets download virus genome taxon 2697049 \
             --exclude-cds \
             --exclude-protein \
-            --exclude-gpff \
-            --exclude-pdb \
             --filename ncbi_dataset.zip \
             --no-progressbar \
             --debug \
             >& datasets.log.$attempt; then
         break;
     else
         echo "FAILED; will try again after $retryDelay seconds"
-        rm -f ncbi_dataset.zip
+        mv ncbi_dataset.zip{,.fail.$attempt}
+        if [[ $attempt -lt $maxAttempts ]]; then
             sleep $retryDelay
+        fi
         # Double the delay to give NCBI progressively more time
         retryDelay=$(($retryDelay * 2))
     fi
 done
 if [[ ! -f ncbi_dataset.zip ]]; then
     echo "datasets command failed $maxAttempts times; quitting."
     exit 1
 fi
 rm -rf ncbi_dataset
 unzip -o ncbi_dataset.zip
 # Creates ./ncbi_dataset/
 
 # This makes something just like ncbi.datasets.tsv from the /table/ API query:
 time jq -c -r '[.accession, .biosample, .isolate.collectionDate, .location.geographicLocation, .host.sciName, .isolate.name, .completeness, (.length|tostring)] | join("\t")' \
     ncbi_dataset/data/data_report.jsonl \