06d7be056190c14b85e71bc12523f18ea6815b5e markd Mon Dec 7 00:50:29 2020 -0800 BLAT mmap index support merge with master diff --git src/hg/utils/otto/sarscov2phylo/bioSampleIdToText.sh src/hg/utils/otto/sarscov2phylo/bioSampleIdToText.sh new file mode 100755 index 0000000..68d0cfa --- /dev/null +++ src/hg/utils/otto/sarscov2phylo/bioSampleIdToText.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +set -beEu -o pipefail + +# stdin: series of BioSample GI# IDs (numeric IDs, *not* accessions) +# stdout: full text record for each BioSample + +url="https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" +db="biosample" +retmode="text" +tool="bioSampleIdToText" +email="$USER%40soe.ucsc.edu" +baseParams="db=$db&retmode=$retmode&tool=$tool&email=$email" +# Add &id=... for each id in input, request in batches... + +batchSize=100 + +TMPDIR=/dev/shm +paramFile=`mktemp` + +initBatch() { + count=0 + echo -n $baseParams > $paramFile +} + +sendBatch() { + curl -s -S -X POST -d @$paramFile "$url" + # Give NCBI a rest + sleep 1 +} + +initBatch + +while read id; do + echo -n "&id=$id" >> $paramFile + count=$(expr $count + 1) + if [ $count == $batchSize ]; then + sendBatch + initBatch + fi +done +if [ $count != 0 ]; then + sendBatch +fi +rm $paramFile