f0026f13ab0b15e4eff7093bcc7ee31e1c34770c
chmalee
  Thu Jan 16 10:02:12 2020 -0800
Last gnomAD related scripts to add

diff --git src/hg/makeDb/gnomad/combine.awk src/hg/makeDb/gnomad/combine.awk
new file mode 100755
index 0000000..3c2fdae
--- /dev/null
+++ src/hg/makeDb/gnomad/combine.awk
@@ -0,0 +1,80 @@
+#!/usr/bin/awk -f
+### # re-organizes the output of a join command into a valid bed12+
+# should be run in a pipe, for example:
+# join -t $'\t' -1 4 -2 7 gencode.bed12 pliByTranscript.trimmed | combineGencodePli.awk -v doTranscripts=true | sort -k1,1 -k2,2n > pliByTranscript.bed
+#
+# The doTranscripts argument tells the script which accession to use as the name field
+# in the final bed, the ENST accession for by_transcript, and the ENSG accession for
+# by_gene.
+###
+BEGIN {
+    FS="\t";
+    OFS="\t";
+    isTranscripts=doTranscripts
+}
+
+{
+chrom=$2
+gnomadChrom = sprintf("chr%s", $13)
+if (chrom != gnomadChrom) {
+    # so far just the multiple mapping PAR regions
+    printf "bad join: %s\n", $0 > "/dev/stderr"
+    next
+}
+
+chromStart=$3
+chromEnd=$4
+if (isTranscripts == "true")
+    name=$1
+else
+    name=$16
+
+if ($29 == "NA") {
+    score = -1
+    if ($28 != "NA" && $27 != "NA" && $29 != "NA" && $30 != "NA" && $35 != "NA" && $36 != "NA") {
+        # doesn't come up with this version but you never know
+        printf "error: 'NA' value for pLI but not other metrics, line: %d\n", NR > "/dev/stderr"
+        next
+    }
+    pLof=sprintf("pLoF exp: NA, obs: NA, pLI = NA, o/e = NA (NA)")
+    mouseOver=sprintf("LOEUF: NA, pLI: NA")
+} else {
+    score=$29
+    pLof=sprintf("pLoF exp: %.1f, obs: %d, pLI = %.2f, o/e = %.2f (%.2f - %.2f)", $28,$27,$29,$30,$35,$36)
+    mouseOver=sprintf("LOEUF: %.2f, pLI: %.2f", $36, $29)
+}
+strand=$6
+thickStart=$7
+thickEnd=$8
+itemRgb=""
+if (score == -1) {itemRgb = "0,0,0"}
+else if (score >= 0 && score < 0.1) {itemRgb = "0,244,153"}
+else if (score >= 0.1 && score < 0.2) {itemRgb = "74,240,94"}
+else if (score >= 0.2 && score < 0.3) {itemRgb = "127,233,58"}
+else if (score >= 0.3 && score < 0.4) {itemRgb = "165,224,26"}
+else if (score >= 0.4 && score < 0.5) {itemRgb = "191,210,22"}
+else if (score >= 0.5 && score < 0.6) {itemRgb = "210,191,13"}
+else if (score >= 0.6 && score < 0.7) {itemRgb = "224,165,8"}
+else if (score >= 0.7 && score < 0.8) {itemRgb = "233,127,5"}
+else if (score >= 0.8 && score < 0.9) {itemRgb = "240,74,3"}
+else if (score >= 0.9 && score <= 1) {itemRgb = "244,0,2"}
+else {
+    printf "error: score '%s' out of range for gene/transcript: %s\n", score, name > "/dev/stderr"
+}
+
+if (score == -1)
+    bedScore = 0
+else {
+    score=sprintf("%.2f", $29)
+    bedScore=sprintf("%d",score*1000)
+}
+
+blockCount=$10
+blockSizes=$11
+blockStarts=$12
+geneName=$18
+missense=sprintf("Missense exp: %.1f, obs: %d, Z = %.2f, o/e = %.2f (%.2f - %.2f)", $20,$19,$22,$21,$33,$34)
+synonymous=sprintf("Synonymous exp: %.1f, obs: %d, Z = %.2f, o/e = %.2f (%.2f - %.2f)", $24,$23,$26,$25,$31,$32)
+
+print chrom, chromStart, chromEnd, name, bedScore, strand, thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts, mouseOver, score, geneName, synonymous, missense, pLof
+}