e2b40d37a4575d5df1665e85214a387050a1f094
lrnassar
  Thu May 12 14:40:19 2022 -0700
Building gnomAD pLI track on hg38 (prev only on hg19) refs #29358

diff --git src/hg/makeDb/doc/hg38/gnomadPLI.txt src/hg/makeDb/doc/hg38/gnomadPLI.txt
new file mode 100644
index 0000000..dcb3d19
--- /dev/null
+++ src/hg/makeDb/doc/hg38/gnomadPLI.txt
@@ -0,0 +1,42 @@
+###Lou 5/12/2022 refs #29358
+#Porting gnomadPli from hg19 to hg38
+
+mkdir /hive/data/genomes/hg38/bed/gnomAD.2.1.1
+cd /hive/data/genomes/hg38/bed/gnomAD.2.1.1
+ln -s /hive/data/outside/gnomAD.2/constraint/*.bgz /hive/data/genomes/hg38/bed/gnomAD.2.1.1/
+cp /hive/data/outside/gnomAD.2/*.as .
+
+hgsql -Ne "select name from wgEncodeGencodeCompV40" hg38 | tr '.' '\t' | cut -f1 > hg38.gencodeV40.transcripts
+
+transcriptFile=gnomad.v2.1.1.lof_metrics.by_transcript.txt.bgz
+geneFile=gnomad.v2.1.1.lof_metrics.by_gene.txt.bgz
+
+hgsql -Ne "select name from wgEncodeGencodeCompV40" hg38 | tr '.' '\t' | cut -f1 > hg38.gencodeV40.transcripts
+hgsql -Ne "select * from wgEncodeGencodeCompV40" hg38 | cut -f2- | genePredToBed stdin stdout | sed -Ee 's/\.[0-9]+//' | sort -k4 > hg38.gencodeV40.bed12
+
+gzip -cd $geneFile | tail -n +2 \
+    | tawk '{print $75,$76,$77,$64,$65,$1,$2,$3,$4,$5,$33,$12,$13,$14,$32,$17,$20,$21,$24,$25,$26,$27,$28,$29,$30}' \
+    | sort -k7 | join -t $'\t' -1 4 -2 7 hg38.gencodeV40.bed12 - \
+    | ~/kent/src/hg/makeDb/gnomad/combine.awk -v doTranscripts=false 2>genes.chromMismatches
+
+gzip -cd $transcriptFile | tail -n +2 \
+    | tawk '{print $76,$77,$78,$65,$66,$1,$2,$4,$5,$6,$34,$13,$14,$15,$33,$18,$21,$22,$25,$26,$27,$28,$29,$30,$31}' \
+    | sort -k7 | join -t $'\t' -1 4 -2 7 hg38.gencodeV40.bed12 - \
+    | ~/kent/src/hg/makeDb/gnomad/combine.awk -v doTranscripts=true 2>transcripts.chromMismatches
+
+sort -k1,1 -k2,2n pliByTranscript.tab > pliByTranscript.bed
+sort -k1,1 -k2,2n missenseByTranscript.tab > missenseByTranscript.bed
+sort -k1,1 -k2,2n pliByGene.tab > pliByGene.bed
+sort -k1,1 -k2,2n missenseByGene.tab > missenseByGene.bed
+
+sizes=/hive/data/genomes/hg38/chrom.sizes
+bedToBigBed -type=bed12+6 -as=pliMetrics.as -tab -extraIndex=name,geneName pliByGene.bed $sizes pliByGene.bb
+bedToBigBed -type=bed12+6 -as=pliMetrics.as -tab -extraIndex=name,geneName pliByTranscript.bed $sizes pliByTranscript.bb
+bedToBigBed -type=bed12+5 -as=missenseMetrics.as -tab -extraIndex=name,geneName missenseByGene.bed $sizes missenseByGene.bb
+bedToBigBed -type=bed12+5 -as=missenseMetrics.as -tab -extraIndex=name,geneName missenseByTranscript.bed $sizes missenseByTranscript.bb
+
+cd /gbdb/hg38/gnomAD/pLI/
+ln -s /hive/data/genomes/hg38/bed/gnomAD.2.1.1/pliByGene.bb
+ln -s /hive/data/genomes/hg38/bed/gnomAD.2.1.1/pliByTranscript.bb
+ln -s /hive/data/genomes/hg38/bed/gnomAD.2.1.1/missenseByGene.bb
+ln -s /hive/data/genomes/hg38/bed/gnomAD.2.1.1/missenseByTranscript.bb