06d7be056190c14b85e71bc12523f18ea6815b5e markd Mon Dec 7 00:50:29 2020 -0800 BLAT mmap index support merge with master diff --git src/hg/utils/otto/sarscov2phylo/searchAllSarsCov2BioSample.sh src/hg/utils/otto/sarscov2phylo/searchAllSarsCov2BioSample.sh new file mode 100755 index 0000000..10eecb2 --- /dev/null +++ src/hg/utils/otto/sarscov2phylo/searchAllSarsCov2BioSample.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +set -beEu -o pipefail + +# Entrez search for all SARS-CoV-2 sequences with length >= 29,000 +# "Severe acute respiratory syndrome coronavirus 2"[Organism] AND ("29000"[SLEN] : "35000"[SLEN]) +query='%22Severe%20acute%20respiratory%20syndrome%20coronavirus%202%22%5BOrganism%5D' + +tool=searchAllSarsCov2.sh +email="$USER%40soe.ucsc.edu" + +# Assemble the esearch URL +base="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" +url="${base}esearch.fcgi?db=biosample&term=$query&tool=$tool&email=$email&retmax=1000000" + +curl -s -S "$url" \ +| grep "" \ +| sed -re 's@\s*([0-9]+)@\1@;' \ + > all.biosample.gids.txt