be4311c07e14feb728abc6425ee606ffaa611a58 markd Fri Jan 22 06:46:58 2021 -0800 merge with master diff --git src/hg/makeDb/gnomad/combine.awk src/hg/makeDb/gnomad/combine.awk index c348079..3ab87e2 100755 --- src/hg/makeDb/gnomad/combine.awk +++ src/hg/makeDb/gnomad/combine.awk @@ -1,80 +1,82 @@ #!/usr/bin/awk -f ### # re-organizes the output of a join command into a valid bed12+ # should be run in a pipe, for example: # join -t $'\t' -1 4 -2 7 gencode.bed12 pliByTranscript.trimmed | combineGencodePli.awk -v doTranscripts=true | sort -k1,1 -k2,2n > pliByTranscript.bed # # The doTranscripts argument tells the script which accession to use as the name field -# in the final bed, the ENST accession for by_transcript, and the ENSG accession for +# in the final bed, which is the ENST accession for by_transcript, and the ENSG accession for # by_gene. ### BEGIN { FS="\t"; OFS="\t"; isTranscripts=doTranscripts } { chrom=$2 gnomadChrom = sprintf("chr%s", $13) if (chrom != gnomadChrom) { # so far just the multiple mapping PAR regions printf "bad join: %s\n", $0 > "/dev/stderr" next } chromStart=$3 chromEnd=$4 if (isTranscripts == "true") name=$1 else name=$16 if ($29 == "NA") { - score = -1 + pLI = -1 if ($28 != "NA" && $27 != "NA" && $29 != "NA" && $30 != "NA" && $35 != "NA" && $36 != "NA") { # doesn't come up with this version but you never know printf "error: 'NA' value for pLI but not other metrics, line: %d\n", NR > "/dev/stderr" next } pLof=sprintf("pLoF exp: NA, obs: NA, pLI = NA, o/e = NA (NA)") mouseOver=sprintf("LOEUF: NA, pLI: NA") } else { - score=$29 + pLI=sprintf("%0.2f", $29) pLof=sprintf("pLoF exp: %.1f, obs: %d, pLI = %.2f, o/e = %.2f (%.2f - %.2f)", $28,$27,$29,$30,$35,$36) mouseOver=sprintf("LOEUF: %.2f, pLI: %.2f", $36, $29) + loeuf=sprintf("%0.2f", $36) } strand=$6 thickStart=$7 thickEnd=$8 itemRgb="" -if (score == -1) {itemRgb = "160,160,160"} -else if (score >= 0 && score < 0.1) {itemRgb = "0,244,153"} -else if (score >= 0.1 && score < 0.2) {itemRgb = "74,240,94"} -else if (score >= 0.2 && score < 0.3) {itemRgb = "127,233,58"} -else if (score >= 0.3 && score < 0.4) {itemRgb = "165,224,26"} -else if (score >= 0.4 && score < 0.5) {itemRgb = "191,210,22"} -else if (score >= 0.5 && score < 0.6) {itemRgb = "210,191,13"} -else if (score >= 0.6 && score < 0.7) {itemRgb = "224,165,8"} -else if (score >= 0.7 && score < 0.8) {itemRgb = "233,127,5"} -else if (score >= 0.8 && score < 0.9) {itemRgb = "240,74,3"} -else if (score >= 0.9 && score <= 1) {itemRgb = "244,0,2"} + +if (loeuf == -1) {itemRgb = "160,160,160"} +else if (loeuf >= 0 && loeuf < 0.1) {itemRgb = "244,0,2"} +else if (loeuf >= 0.1 && loeuf < 0.2) {itemRgb = "240,74,3"} +else if (loeuf >= 0.2 && loeuf < 0.3) {itemRgb = "233,127,5"} +else if (loeuf >= 0.3 && loeuf < 0.4) {itemRgb = "224,165,8"} +else if (loeuf >= 0.4 && loeuf < 0.5) {itemRgb = "210,191,13"} +else if (loeuf >= 0.5 && loeuf < 0.6) {itemRgb = "191,210,22"} +else if (loeuf >= 0.6 && loeuf < 0.7) {itemRgb = "165,224,26"} +else if (loeuf >= 0.7 && loeuf < 0.8) {itemRgb = "127,233,58"} +else if (loeuf >= 0.8 && loeuf < 0.9) {itemRgb = "74,240,94"} +else if (loeuf >= 0.9) {itemRgb = "0,244,153"} else { - printf "error: score '%s' out of range for gene/transcript: %s\n", score, name > "/dev/stderr" + printf "error: loeuf '%s' out of range for gene/transcript: %s\n", loeuf, name > "/dev/stderr" } -if (score == -1) +if (pLI == -1) bedScore = 0 else { - score=sprintf("%.2f", $29) + score=sprintf("%0.2f",pLI) bedScore=sprintf("%d",score*1000) } blockCount=$10 blockSizes=$11 blockStarts=$12 geneName=$18 missense=sprintf("Missense exp: %.1f, obs: %d, Z = %.2f, o/e = %.2f (%.2f - %.2f)", $20,$19,$22,$21,$33,$34) synonymous=sprintf("Synonymous exp: %.1f, obs: %d, Z = %.2f, o/e = %.2f (%.2f - %.2f)", $24,$23,$26,$25,$31,$32) -print chrom, chromStart, chromEnd, name, bedScore, strand, thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts, mouseOver, score, geneName, synonymous, missense, pLof +print chrom, chromStart, chromEnd, name, bedScore, strand, thickStart, thickEnd, itemRgb, blockCount, blockSizes, blockStarts, mouseOver, loeuf, pLI, geneName, synonymous, missense, pLof }