a90606c0cc1d4650334172bdc0b193b2c19c1dbe
angie
  Mon May 11 17:38:27 2020 -0700
With Max's approval, convert the bedGraph Counts track to alt allele frequency (easier to explain, normalized to [0, 1]).  Also, don't include ncov.json in the archive directories -- Trevor Bedford of Nextstrain said that GISAID doesn't want it available for public download.  :(  refs #25481, #25188

diff --git src/hg/utils/otto/nextstrainNcov/doUpdate.sh src/hg/utils/otto/nextstrainNcov/doUpdate.sh
index 8084fe6..8492ccd 100755
--- src/hg/utils/otto/nextstrainNcov/doUpdate.sh
+++ src/hg/utils/otto/nextstrainNcov/doUpdate.sh
@@ -61,51 +61,55 @@
 bedToBigBed -type=bed4 -tab -verbose=0 nextstrainDiscarded.bed $chromSizes \
     nextstrainDiscarded.bb
 
 bedToBigBed -type=bed4 -tab -verbose=0 nextstrainBlacklisted.bed $chromSizes \
     nextstrainBlacklisted.bb
 
 bedToBigBed -type=bed4 -tab -verbose=0 nextstrainInformative.bed $chromSizes \
     nextstrainInformative.bb
 
 # bigWig for the tree parsimony scores track for David
 bedGraphToBigWig nextstrainParsimony.bedGraph $chromSizes nextstrainParsimony.bw
 
 # Max's nextstrainSamples*.bedGraph allele count bigWigs:
 for i in nextstrainSamples*.vcf.gz; do
     base=`basename $i .vcf.gz`
-    zcat $i | cut -f1,2,8 | cut -d';' -f1 | grep -v '#' | sed -e 's/AC=//g' | cut -f1 -d, \
-        | tawk '{print $1, $2, $2+1, $3}' > $base.bedGraph
+    zcat $i \
+    | grep -v '#' \
+    | perl -wne '@w=split("\t");
+                 $w[7] =~ m/AC=(\d+)[\d,]*;AN=(\d+)/ ||
+                          die "Cant find AC and AN in |$w[7]|";
+                 print join("\t", $w[0], $w[1]-1, $w[1], (sprintf "%.06f", $1 / $2)) . "\n";' \
+      > $base.bedGraph
     bedGraphToBigWig $base.bedGraph $chromSizes $base.bigWig
 done
 
 # Install
 mkdir -p $ottoDir/current
 cp -pf $runDir/nextstrainGene.bb $runDir/nextstrainClade.bb \
     $runDir/nextstrain*.vcf.gz{,.tbi} \
     $runDir/nextstrain*.nh \
     $runDir/nextstrainSamples*.bigWig \
     $ottoDir/current/
 ln -sf $ottoDir/current/nextstrainGene.bb $ottoDir/current/nextstrainClade.bb \
     $ottoDir/current/nextstrain*.vcf.gz{,.tbi} \
     $ottoDir/current/nextstrain*.nh \
     $ottoDir/current/nextstrainSamples*.bigWig \
     $gbdbDir/
 
 # Install but don't archive (for now) the experimental tracks for David.
 cp -pf $runDir/nextstrain{Discarded,Blacklisted,Informative}.bb \
     $runDir/nextstrainParsimony.bw \
     $ottoDir/current/
 ln -sf $ottoDir/current/nextstrain{Discarded,Blacklisted,Informative}.bb \
     $ottoDir/current/nextstrainParsimony.bw \
     $gbdbDir/
 
 # Daily archive (may overwrite files from earlier today)
 mkdir -p $ottoDir/archive/$today
 cp -pf $runDir/nextstrainGene.bb $runDir/nextstrainClade.bb \
     $runDir/nextstrain*.vcf.gz{,.tbi} \
     $runDir/nextstrain*.nh \
     $runDir/nextstrainSamples*.bigWig \
-    $runDir/ncov.json \
     $ottoDir/archive/$today
 
 echo "Updated nextstrain/ncov `date` (ncov.json date $latestDate)"