a501a677b1027e7103b547e54b0e2ee286ca09e7 angie Fri Mar 5 15:39:35 2021 -0800 Make getNcbi.sh more efficient: fetch only new IDs from BioSample; run nextclade and pangolin only on new sequences. diff --git src/hg/utils/otto/sarscov2phylo/updatePublic.sh src/hg/utils/otto/sarscov2phylo/updatePublic.sh index 19b36e1..bd7710f 100755 --- src/hg/utils/otto/sarscov2phylo/updatePublic.sh +++ src/hg/utils/otto/sarscov2phylo/updatePublic.sh @@ -23,26 +23,23 @@ today=$(date +%F) scriptDir=$(dirname "${BASH_SOURCE[0]}") $scriptDir/gisaidFromChunks.sh cogUkDir=$ottoDir/cogUk.$today mkdir -p $cogUkDir cd $cogUkDir $scriptDir/getCogUk.sh >& getCogUk.log ncbiDir=$ottoDir/ncbi.$today mkdir -p $ncbiDir cd $ncbiDir $scriptDir/getNcbi.sh >& getNcbi.log -$scriptDir/nextcladeNcbi.sh & -$scriptDir/pangolinNcbi.sh - $scriptDir/updateIdMapping.sh $gisaidDir/{metadata_batch_$today.tsv.gz,sequences_batch_$today.fa.xz} buildDir=$ottoDir/$today mkdir -p $buildDir cd $buildDir $scriptDir/updatePublicTree.sh $prevDate $problematicSitesVcf >& updatePublicTree.log cat hgPhyloPlace.description.txt