914384264d6d019d05db34ceec0e0ce8ddc9d8e3
max
  Fri Sep 30 02:09:13 2022 -0700
fixing monkeyprimer primer 102 manually, arg, refs #30010

diff --git src/hg/makeDb/doc/mpxvRivers/pcrAmplicon.txt src/hg/makeDb/doc/mpxvRivers/pcrAmplicon.txt
index d66d524..bdaa336 100644
--- src/hg/makeDb/doc/mpxvRivers/pcrAmplicon.txt
+++ src/hg/makeDb/doc/mpxvRivers/pcrAmplicon.txt
@@ -1,33 +1,36 @@
 # by Braney, for the Chen et al protocol
 mkdir /hive/data/genomes/mpxvRivers/bed/pcrAmplicon
 cd /hive/data/genomes/mpxvRivers/bed/pcrAmplicon
 
 # grab this file:     https://www.protocols.io/view/monkeypox-virus-multiplexed-pcr-amplicon-sequencin-5qpvob1nbl4o/v2/materials/MPXV-primer_genome-positions.tsv
 
 dos2unix MPXV-primer_genome-positions.tsv
 
 # convert left and right primers on separate lines to the one line formation isPcr expects
 grep LEFT MPXV-primer_genome-positions.tsv > left
 grep RIGHT MPXV-primer_genome-positions.tsv > right
 paste left right | tawk '{print $1, $3, $12}' > query.txt
 
 # do the isPcr
 isPcr ../../mpxvRivers.2bit query.txt ispcr.out
 
 # convert isPcr fasta output to bed12 with left and right as two "exons" plus two extra fields with sequence in them
 grep ">" ispcr.out | tr -d '>' | tr ':' ' ' | awk '/+/ {strand="PS"} /-/ {strand="MS"} {print $1, $2, $3, strand, $4,$5,$6}' | tr '-' ' ' | tr '+' ' ' | sed 's/bp//' | awk '{$2=$2-1;print $1, $2, $3, $4, 0, $5, $2,$3, "0,0,0", 2, length($7) "," length($8), 0 "," $3 - length($8) - $2,$7,$8}' | sed 's/MS/-/' | sed 's/PS/+/' | sed 's/_LEFT//' | sort -k1,1 -k2,2n > pcrAmplicon.bed
 
+# Sep 30: fixed up pcrAmplicon manually, added _102 pair as left primer cannot be found by isPCR, there is a mutation in the genome.
+# would have needed to change isPrimer options rather than add manually, noticed too late.
+
 bedToBigBed pcrAmplicon.bed ../../chrom.sizes pcrAmplicon.bb -as=pcrAmplicon.as -type=bed12+2
 
 # for the Welkers et al protocol
 # Max, Tue Sep 20 08:05:32 PDT 2022
 cd /hive/data/genomes/mpxvRivers/bed/pcrWelkers
 # downloaded pool1.tsv and pool2.tsv manually
 cat pool1.tsv pool2.tsv | tawk '{print ">"$1"\n"$3}' > primers.fa
 cat pool*.tsv | grep -v name | grep LEFT | cut -f1,3 > left.tmp
 cat pool*.tsv | grep -v name | grep RIGHT | cut -f1,3 > right.tmp
 paste left.tmp right.tmp | cut -f1,2,4 | sed -e 's/_LEFT//g' > query.txt
 isPcr ../../mpxvRivers.2bit query.txt ispcr.out
 cat ispcr.out | grep \>  | tr -d '>' | sed -re 's/(NC_063383.1):/\1\t/g' | sed -re 's/([0-9]+)[+-]([0-9]+)/\1\t\2/' | tr ' ' '\t' | tawk '{$8=$4; print }' | sed -e 's/monkeypox-2500_//' | tawk '{size=$5; prim1=$6; prim2=$7; name=$8; $5="1000"; $6="+"; $7=$2; $8=$3; $9="0"; $10="2"; $11=length(prim1)","length(prim2); $12="0,"($3-$2-length(prim2)); $13=name; $14=size; $15=prim1; $16=prim2; print}' > pcrWelkers.bed
 bedSort pcrWelkers.bed pcrWelkers.bed 
 bedToBigBed pcrWelkers.bed ../../chrom.sizes pcrWelkers.bb -tab -as=~/kent/src/hg/makeDb/doc/mpxvRivers/pcrPrimers.as -type=bed4+