9592c56977d703a9f84162e8a4ed35105b93eede chmalee Wed Apr 1 10:11:28 2020 -0700 Get a short list of genes into gnomad structural variants mouseover or a message that there are too many genes if the list is too long, also fix overflowing item boxes on details pages, refs #24179 diff --git src/hg/makeDb/gnomad/gnomadSvToUcsc.awk src/hg/makeDb/gnomad/gnomadSvToUcsc.awk index 74a43f5..5a405f3 100755 --- src/hg/makeDb/gnomad/gnomadSvToUcsc.awk +++ src/hg/makeDb/gnomad/gnomadSvToUcsc.awk @@ -1,89 +1,114 @@ #!/usr/bin/awk -f # turn a gnomad SV file to ucsc bigBed 9+ BEGIN { FS="\t"; OFS="\t"; } { chrom=$1 start=$2 end=$3 origName=$4 +# get the list of genes affected for the mouseOver +geneListStr="" +numGenes = 0 +for (i = 5; i <= 12; i++) + { + if ($i != "NA" && $i != "True" && $i != "False") + { + newGeneCount = split($i,geneList,","); + if (numGenes + newGeneCount <= 2) + { + for (j = 1; j <= newGeneCount; j++) + { + if (numGenes == 0 && j == 1) {geneListStr = geneList[j]} + else {geneListStr = geneListStr ", " geneList[j]} + } + } + else {geneListStr = "Too many genes affected, click on item for full list."} + numGenes += newGeneCount; + } + } +if (numGenes == 0) {geneListStr = "NA"} + +# make the list of affected genes for each type print nicely by adding a space after the commas +for (i = 5; i <= 15; i++) + gsub(",", ", ", $i) PROTEIN_CODING__COPY_GAIN=$5 PROTEIN_CODING__DUP_LOF=$6 PROTEIN_CODING__DUP_PARTIAL=$7 PROTEIN_CODING__INTERGENIC=$8 PROTEIN_CODING__INTRONIC=$9 PROTEIN_CODING__INV_SPAN=$10 PROTEIN_CODING__LOF=$11 PROTEIN_CODING__MSV_EXON_OVR=$12 PROTEIN_CODING__NEAREST_TSS=$13 PROTEIN_CODING__PROMOTER=$14 PROTEIN_CODING__UTR=$15 # size of NA when variant is a breakend or something else if ($16 > 0) svlen=$16 else svlen = "NA" svtype=$17 an=$18 ac="" af="" nhet=$21 nhomalt=$22 split($19,acArray,",") for (i = 1; i < length(acArray) - 1; i++) { ac = ac "" acArray[i] ", " } ac = ac "" acArray[length(acArray)] split($20,afArray,",") for (i = 1; i < length(afArray) - 1; i++) { af = af "" sprintf("%0.2g, ", afArray[i]) } af = af "" sprintf("%0.2g", afArray[length(afArray)]) split($4,a,"_") name=a[3] "_" a[4] "_" a[5] -color="" -mouseOver = "Position: " chrom ":" start+1 "-" end ", Size: " svlen ", Class: " svtype ", Allele Count: " ac ", Allele Number: " an ", Allele Frequency: " af +mouseOver = "Gene(s) affected: " geneListStr ", Position: " chrom ":" start+1 "-" end ", Size: " svlen ", Class: " svtype ", Allele Count: " ac ", Allele Number: " an ", Allele Frequency: " af +color="" switch(svtype) { case "BND": color = "154,182,160" break case "CPX": color = "182,239,195" break case "CTX": color = "154,182,160" break case "DEL": color = "231,154,144" break case "DUP": color = "143,184,214" break case "INS": color = "231,183,237" break case "INV": color = "250,199,140" break case "MCNV": color = "183,170,214" break default: color = "154,182,160" break } print chrom, start, end, name, 0, ".", start, end, color, svlen, svtype, ac, an, af, nhet, nhomalt, PROTEIN_CODING__COPY_GAIN, PROTEIN_CODING__DUP_LOF, PROTEIN_CODING__DUP_PARTIAL, PROTEIN_CODING__INTERGENIC, PROTEIN_CODING__INTRONIC, PROTEIN_CODING__INV_SPAN, PROTEIN_CODING__LOF, PROTEIN_CODING__MSV_EXON_OVR, PROTEIN_CODING__NEAREST_TSS, PROTEIN_CODING__PROMOTER, PROTEIN_CODING__UTR, mouseOver }