a90606c0cc1d4650334172bdc0b193b2c19c1dbe angie Mon May 11 17:38:27 2020 -0700 With Max's approval, convert the bedGraph Counts track to alt allele frequency (easier to explain, normalized to [0, 1]). Also, don't include ncov.json in the archive directories -- Trevor Bedford of Nextstrain said that GISAID doesn't want it available for public download. :( refs #25481, #25188 diff --git src/hg/utils/otto/nextstrainNcov/doUpdate.sh src/hg/utils/otto/nextstrainNcov/doUpdate.sh index 8084fe6..8492ccd 100755 --- src/hg/utils/otto/nextstrainNcov/doUpdate.sh +++ src/hg/utils/otto/nextstrainNcov/doUpdate.sh @@ -61,51 +61,55 @@ bedToBigBed -type=bed4 -tab -verbose=0 nextstrainDiscarded.bed $chromSizes \ nextstrainDiscarded.bb bedToBigBed -type=bed4 -tab -verbose=0 nextstrainBlacklisted.bed $chromSizes \ nextstrainBlacklisted.bb bedToBigBed -type=bed4 -tab -verbose=0 nextstrainInformative.bed $chromSizes \ nextstrainInformative.bb # bigWig for the tree parsimony scores track for David bedGraphToBigWig nextstrainParsimony.bedGraph $chromSizes nextstrainParsimony.bw # Max's nextstrainSamples*.bedGraph allele count bigWigs: for i in nextstrainSamples*.vcf.gz; do base=`basename $i .vcf.gz` - zcat $i | cut -f1,2,8 | cut -d';' -f1 | grep -v '#' | sed -e 's/AC=//g' | cut -f1 -d, \ - | tawk '{print $1, $2, $2+1, $3}' > $base.bedGraph + zcat $i \ + | grep -v '#' \ + | perl -wne '@w=split("\t"); + $w[7] =~ m/AC=(\d+)[\d,]*;AN=(\d+)/ || + die "Cant find AC and AN in |$w[7]|"; + print join("\t", $w[0], $w[1]-1, $w[1], (sprintf "%.06f", $1 / $2)) . "\n";' \ + > $base.bedGraph bedGraphToBigWig $base.bedGraph $chromSizes $base.bigWig done # Install mkdir -p $ottoDir/current cp -pf $runDir/nextstrainGene.bb $runDir/nextstrainClade.bb \ $runDir/nextstrain*.vcf.gz{,.tbi} \ $runDir/nextstrain*.nh \ $runDir/nextstrainSamples*.bigWig \ $ottoDir/current/ ln -sf $ottoDir/current/nextstrainGene.bb $ottoDir/current/nextstrainClade.bb \ $ottoDir/current/nextstrain*.vcf.gz{,.tbi} \ $ottoDir/current/nextstrain*.nh \ $ottoDir/current/nextstrainSamples*.bigWig \ $gbdbDir/ # Install but don't archive (for now) the experimental tracks for David. cp -pf $runDir/nextstrain{Discarded,Blacklisted,Informative}.bb \ $runDir/nextstrainParsimony.bw \ $ottoDir/current/ ln -sf $ottoDir/current/nextstrain{Discarded,Blacklisted,Informative}.bb \ $ottoDir/current/nextstrainParsimony.bw \ $gbdbDir/ # Daily archive (may overwrite files from earlier today) mkdir -p $ottoDir/archive/$today cp -pf $runDir/nextstrainGene.bb $runDir/nextstrainClade.bb \ $runDir/nextstrain*.vcf.gz{,.tbi} \ $runDir/nextstrain*.nh \ $runDir/nextstrainSamples*.bigWig \ - $runDir/ncov.json \ $ottoDir/archive/$today echo "Updated nextstrain/ncov `date` (ncov.json date $latestDate)"