aa0dc230ac3da4b9ba3ab771afb381397da71c05
angie
  Tue Oct 31 09:57:35 2023 -0700
Use compressed CNCB fasta.

diff --git src/hg/utils/otto/sarscov2phylo/updateIdMapping.sh src/hg/utils/otto/sarscov2phylo/updateIdMapping.sh
index 296a339..69f2a00 100755
--- src/hg/utils/otto/sarscov2phylo/updateIdMapping.sh
+++ src/hg/utils/otto/sarscov2phylo/updateIdMapping.sh
@@ -23,31 +23,31 @@
 ottoDir=/hive/data/outside/otto/sarscov2phylo
 mapScriptDir=~angie/chris_ncov
 # Should use a better location than this...
 installDir=/hive/users/angie/gisaid
 
 ncbiDir=$ottoDir/ncbi.$today
 cogUkDir=$ottoDir/cogUk.$today
 # Last time I checked, CNCB had not updated since September, just keep using what we have
 cncbDir=$ottoDir/cncb.latest
 
 # Set up input files for Chris's scripts to map GISAID <--> public sequences
 cd $mapScriptDir
 rm -rf input/$today
 mkdir input/$today
 cd input/$today
-ln -sf $cncbDir/cncb.nonGenBank.fasta .
+ln -sf $cncbDir/cncb.nonGenBank.fasta.xz .
 ln -sf $ncbiDir/genbank.fa.xz .
 ln -sf $cogUkDir/cog_all.fasta.xz .
 ln -sf $nextfasta .
 xcat $nextmeta | tail -n+2 | cut -f1,3 | uniq > seqToEpi
 
 cd $mapScriptDir
 time ./build.sh -t $today
 
 cd $installDir
 
 gbToDate=$ncbiDir/gbToDate
 cogUkToDate=$cogUkDir/cogUkToDate
 cncbToDate=$cncbDir/cncbToDate
 
 join -t$'\t' -a 1 -1 2 -o 1.1,1.2,1.3,2.2 \