3f76453673b9825c8057d3f16f2c4dd12800daab braney Tue Sep 13 14:25:13 2022 -0700 added PCR amplicon track to mpxvRivers diff --git src/hg/makeDb/doc/mpxvRivers/pcrAmplicon.txt src/hg/makeDb/doc/mpxvRivers/pcrAmplicon.txt new file mode 100644 index 0000000..e30e1b2 --- /dev/null +++ src/hg/makeDb/doc/mpxvRivers/pcrAmplicon.txt @@ -0,0 +1,19 @@ +mkdir /hive/data/genomes/mpxvRivers/bed/pcrAmplicon +cd /hive/data/genomes/mpxvRivers/bed/pcrAmplicon + +# grab this file: https://www.protocols.io/view/monkeypox-virus-multiplexed-pcr-amplicon-sequencin-5qpvob1nbl4o/v2/materials/MPXV-primer_genome-positions.tsv + +dos2unix MPXV-primer_genome-positions.tsv + +# convert left and right primers on separate lines to the one line formation isPcr expects +grep LEFT MPXV-primer_genome-positions.tsv > left +grep RIGHT MPXV-primer_genome-positions.tsv > right +paste left right | tawk '{print $1, $3, $12}' > query.txt + +# do the isPcr +isPcr ../../mpxvRivers.2bit query.txt ispcr.out + +# convert isPcr fasta output to bed12 with left and right as two "exons" plus two extra fields with sequence in them +grep ">" ispcr.out | tr -d '>' | tr ':' ' ' | awk '/+/ {strand="PS"} /-/ {strand="MS"} {print $1, $2, $3, strand, $4,$5,$6}' | tr '-' ' ' | tr '+' ' ' | sed 's/bp//' | awk '{$2=$2-1;print $1, $2, $3, $4, 0, $5, $2,$3, "0,0,0", 2, length($7) "," length($8), 0 "," $3 - length($8) - $2,$7,$8}' | sed 's/MS/-/' | sed 's/PS/+/' | sed 's/_LEFT//' | sort -k1,1 -k2,2n > pcrAmplicon.bed + +bedToBigBed pcrAmplicon.bed ../../chrom.sizes pcrAmplicon.bb -as=pcrAmplicon.as -type=bed12+2