aa0dc230ac3da4b9ba3ab771afb381397da71c05 angie Tue Oct 31 09:57:35 2023 -0700 Use compressed CNCB fasta. diff --git src/hg/utils/otto/sarscov2phylo/updateIdMapping.sh src/hg/utils/otto/sarscov2phylo/updateIdMapping.sh index 296a339..69f2a00 100755 --- src/hg/utils/otto/sarscov2phylo/updateIdMapping.sh +++ src/hg/utils/otto/sarscov2phylo/updateIdMapping.sh @@ -23,31 +23,31 @@ ottoDir=/hive/data/outside/otto/sarscov2phylo mapScriptDir=~angie/chris_ncov # Should use a better location than this... installDir=/hive/users/angie/gisaid ncbiDir=$ottoDir/ncbi.$today cogUkDir=$ottoDir/cogUk.$today # Last time I checked, CNCB had not updated since September, just keep using what we have cncbDir=$ottoDir/cncb.latest # Set up input files for Chris's scripts to map GISAID <--> public sequences cd $mapScriptDir rm -rf input/$today mkdir input/$today cd input/$today -ln -sf $cncbDir/cncb.nonGenBank.fasta . +ln -sf $cncbDir/cncb.nonGenBank.fasta.xz . ln -sf $ncbiDir/genbank.fa.xz . ln -sf $cogUkDir/cog_all.fasta.xz . ln -sf $nextfasta . xcat $nextmeta | tail -n+2 | cut -f1,3 | uniq > seqToEpi cd $mapScriptDir time ./build.sh -t $today cd $installDir gbToDate=$ncbiDir/gbToDate cogUkToDate=$cogUkDir/cogUkToDate cncbToDate=$cncbDir/cncbToDate join -t$'\t' -a 1 -1 2 -o 1.1,1.2,1.3,2.2 \