5581e1884c54c583fe68844f8271a3baac1617ae angie Fri Dec 20 15:21:56 2024 -0800 Accumulated minor updates. diff --git src/hg/utils/otto/mpxv/getNcbiMpxv.sh src/hg/utils/otto/mpxv/getNcbiMpxv.sh index e72c0f3..7370869 100755 --- src/hg/utils/otto/mpxv/getNcbiMpxv.sh +++ src/hg/utils/otto/mpxv/getNcbiMpxv.sh @@ -22,46 +22,52 @@ attempt=0 maxAttempts=5 retryDelay=300 while [[ $((++attempt)) -le $maxAttempts ]]; do echo "metadata attempt $attempt" if curl -fSs $metadataUrl | csvToTab \ | tawk '$7 >= '$minSize' && $1 !~ /^NC_/' \ | sed -re 's/\tUNVERIFIED: /\t/;' \ | sed -re 's/\tMonkeypox virus /\t/;' \ | sed -re 's/\tisolate /\t/;' \ | sed -re 's/\tstrain /\t/;' \ | sed -re 's/, (complete|partial) (genome|cds)\t/\t/;' \ | sed -re 's/\tMPXV[_-]/\t/g;' \ | sed -re 's@\t(hMPX|hMPXV|hMpxV|MpxV|MPxV|MPXV|MpxV|MPX|Monkeypox|MPXV22)/@\t@g;' \ | sed -re 's@\t[Hh]uman/@\t@g;' \ + | sed -re 's@RNA genome assembly, complete genome: monopartite@@;' \ > metadata.tsv; then break; else echo "FAILED metadata; will try again after $retryDelay seconds" rm -f metadata.tsv sleep $retryDelay # Double the delay to give NCBI progressively more time retryDelay=$(($retryDelay * 2)) fi done if [[ ! -f metadata.tsv ]]; then - echo "datasets command failed $maxAttempts times; quitting." -# exit 1 + echo "metadata query failed $maxAttempts times; quitting." + exit 1 fi wc -l metadata.tsv +if [[ ! -s metadata.tsv ]]; then + echo "metadata query appeared to succeed but gave 0-length output" + exit 1 +fi + attempt=0 maxAttempts=5 retryDelay=300 while [[ $((++attempt)) -le $maxAttempts ]]; do echo "fasta attempt $attempt" if datasets download virus genome taxon $taxId --include genome,biosample; then break; else echo "FAILED fasta; will try again after $retryDelay seconds" rm -f ncbi_dataset.zip sleep $retryDelay # Double the delay to give NCBI progressively more time retryDelay=$(($retryDelay * 2)) fi done