67217b614063c0bf07f4b5b69f0705d0125e32b4 angie Mon Aug 16 15:31:07 2021 -0700 Wow, 1 million seemed like such a high ceiling once... but GenBank has more than that number of SARS-CoV-2 genomes now\! diff --git src/hg/utils/otto/sarscov2phylo/searchAllSarsCov2BioSample.sh src/hg/utils/otto/sarscov2phylo/searchAllSarsCov2BioSample.sh index 10eecb2..6c18a74 100755 --- src/hg/utils/otto/sarscov2phylo/searchAllSarsCov2BioSample.sh +++ src/hg/utils/otto/sarscov2phylo/searchAllSarsCov2BioSample.sh @@ -1,19 +1,19 @@ #!/bin/bash set -beEu -o pipefail # Entrez search for all SARS-CoV-2 sequences with length >= 29,000 # "Severe acute respiratory syndrome coronavirus 2"[Organism] AND ("29000"[SLEN] : "35000"[SLEN]) query='%22Severe%20acute%20respiratory%20syndrome%20coronavirus%202%22%5BOrganism%5D' tool=searchAllSarsCov2.sh email="$USER%40soe.ucsc.edu" # Assemble the esearch URL base="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" -url="${base}esearch.fcgi?db=biosample&term=$query&tool=$tool&email=$email&retmax=1000000" +url="${base}esearch.fcgi?db=biosample&term=$query&tool=$tool&email=$email&retmax=10000000" curl -s -S "$url" \ | grep "" \ | sed -re 's@\s*([0-9]+)@\1@;' \ > all.biosample.gids.txt