265430be8faea69b3c41caa789e9a58a0ac4ebb8 angie Fri Dec 20 15:25:04 2024 -0800 Suppress progressbar when downloading, clean up when done. diff --git src/hg/utils/otto/dengue/getNcbiDengue.sh src/hg/utils/otto/dengue/getNcbiDengue.sh index f591004..ba7c5c2 100755 --- src/hg/utils/otto/dengue/getNcbiDengue.sh +++ src/hg/utils/otto/dengue/getNcbiDengue.sh @@ -38,44 +38,46 @@ # Double the delay to give NCBI progressively more time retryDelay=$(($retryDelay * 2)) fi done if [[ ! -f metadata.tsv ]]; then echo "datasets command failed $maxAttempts times; quitting." exit 1 fi wc -l metadata.tsv attempt=0 maxAttempts=5 retryDelay=300 while [[ $((++attempt)) -le $maxAttempts ]]; do echo "fasta attempt $attempt" - if datasets download virus genome taxon $taxId --include genome,biosample; then + if datasets download virus --no-progressbar genome taxon $taxId --include genome,biosample; then break; else echo "FAILED fasta; will try again after $retryDelay seconds" rm -f ncbi_dataset.zip sleep $retryDelay # Double the delay to give NCBI progressively more time retryDelay=$(($retryDelay * 2)) fi done if [[ ! -s ncbi_dataset.zip ]]; then echo "fasta query failed $maxAttempts times; quitting." exit 1 fi unzip ncbi_dataset.zip faFilter -minSize=$minSize ncbi_dataset/data/genomic.fna stdout \ | xz -T 20 > genbank.fa.xz faSize <(xzcat genbank.fa.xz) # Make sure the download wasn't truncated without reporting an error: count=$(wc -l < metadata.tsv) minSamples=12000 if (( $count < $minSamples )); then echo "*** Too few samples ($count)! Expected at least $minSamples. Halting. ***" exit 1 fi +rm -rf ncbi_dataset ncbi_dataset.zip + rm -f $dengueDir/ncbi/ncbi.latest ln -s ncbi.$today $dengueDir/ncbi/ncbi.latest