36f5fda1377cc3f22da3db6ed52823186632ff15 angie Wed Aug 1 12:34:56 2012 -0700 Feature #8551 (Sequence Ontology terms for predicted functional effects on SNP details pages):Adding splice_region_variant, defined by SO as "A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron." diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c index c663c97..c0cb99d 100644 --- src/hg/hgc/hgc.c +++ src/hg/hgc/hgc.c @@ -16547,124 +16547,141 @@ char *indivAlleles[64]; int alleleCount = chopString(alleleStr, "/", indivAlleles, ArraySize(indivAlleles)); int j; for (j = 0; j < alleleCount; j++) { char *al = indivAlleles[j]; boolean alIsAlpha = (isalpha(al[0]) && !sameString(al, "lengthTooLong")); if ((snpIsRc ^ geneIsRc) && alIsAlpha) reverseComplement(al, strlen(al)); char alBase = al[0]; if (alBase == '\0' || sameString(al, refAllele)) continue; int alSize = sameString(al, "-") ? 0 : alIsAlpha ? strlen(al) : -1; if (alSize != refAlleleSize && alSize >= 0 && refAlleleSize >=0) { + int diff = alSize - refAlleleSize; if ((diff % 3) != 0) - printf(firstTwoColumnsPctS "%s</TD></TR>\n", + printf(firstTwoColumnsPctS "%s\n", geneTrack, geneName, snpMisoLinkFromFunc("frameshift")); else if (diff > 0) - printf(firstTwoColumnsPctS "%s (insertion of %d codon%s)</TD></TR>\n", + printf(firstTwoColumnsPctS "%s (insertion of %d codon%s)\n", geneTrack, geneName, snpMisoLinkFromFunc("inframe_insertion"), (int)(diff/3), (diff > 3) ? "s" : ""); else - printf(firstTwoColumnsPctS "%s (deletion of %d codon%s)</TD></TR>\n", + printf(firstTwoColumnsPctS "%s (deletion of %d codon%s)\n", geneTrack, geneName, snpMisoLinkFromFunc("inframe_deletion"), (int)(-diff/3), (diff < -3) ? "s" : ""); } else if (alSize == 1 && refIsSingleBase) { char snpCodon[4]; safecpy(snpCodon, sizeof(snpCodon), refCodon); snpCodon[snpCodonPos] = alBase; char snpAA = lookupCodon(snpCodon); if (snpAA == '\0') snpAA = '*'; char refCodonHtml[16], snpCodonHtml[16]; safecpy(refCodonHtml, sizeof(refCodonHtml), highlightCodonBase(refCodon, snpCodonPos)); safecpy(snpCodonHtml, sizeof(snpCodonHtml), highlightCodonBase(snpCodon, snpCodonPos)); if (refAA != snpAA) { if (refAA == '*') - printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n", + printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)\n", geneTrack, geneName, snpMisoLinkFromFunc("stop-loss"), refAA, refCodonHtml, snpAA, snpCodonHtml); else if (snpAA == '*') - printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n", + printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)\n", geneTrack, geneName, snpMisoLinkFromFunc("nonsense"), refAA, refCodonHtml, snpAA, snpCodonHtml); else - printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n", + printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)\n", geneTrack, geneName, snpMisoLinkFromFunc("missense"), refAA, refCodonHtml, snpAA, snpCodonHtml); } else { if (refAA == '*') - printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n", + printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)\n", geneTrack, geneName, snpMisoLinkFromFunc("stop_retained_variant"), refAA, refCodonHtml, snpAA, snpCodonHtml); else - printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n", + printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)\n", geneTrack, geneName, snpMisoLinkFromFunc("coding-synon"), refAA, refCodonHtml, snpAA, snpCodonHtml); } } else - printf(firstTwoColumnsPctS "%s %s --> %s</TD></TR>\n", + printf(firstTwoColumnsPctS "%s %s --> %s\n", geneTrack, geneName, snpMisoLinkFromFunc("cds-synonymy-unknown"), refAllele, al); } } void printSnp125FunctionInGene(struct snp125 *snp, char *geneTable, char *geneTrack, struct genePred *gene) /* Given a SNP and a gene that overlaps it, say where in the gene it overlaps * and if in CDS, say what effect the coding alleles have. */ { int snpStart = snp->chromStart, snpEnd = snp->chromEnd; int cdsStart = gene->cdsStart, cdsEnd = gene->cdsEnd; boolean geneIsRc = sameString(gene->strand, "-"); char *geneName = getSymbolForGeneName(geneTable, gene->name); int i, iStart = 0, iEnd = gene->exonCount, iIncr = 1; if (geneIsRc) { iStart = gene->exonCount - 1; iEnd = -1; iIncr = -1; } for (i = iStart; i != iEnd; i += iIncr) { int exonStart = gene->exonStarts[i], exonEnd = gene->exonEnds[i]; if (snpEnd > exonStart && snpStart < exonEnd) { if (snpEnd > cdsStart && snpStart < cdsEnd) printSnp125FunctionInCDS(snp, geneTable, geneTrack, gene, i, geneName); else if (cdsEnd > cdsStart) { boolean is5Prime = geneIsRc ^ (snpEnd < cdsStart); - printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName, + printf(firstTwoColumnsPctS "%s\n", geneTrack, geneName, snpMisoLinkFromFunc((is5Prime) ? "untranslated-5" : "untranslated-3")); } else - printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName, + printf(firstTwoColumnsPctS "%s\n", geneTrack, geneName, snpMisoLinkFromFunc("ncRNA")); } + // SO term splice_region_variant applies to first/last 3 bases of exon + // and first/last 3-8 bases of intron + if ((i > 0 && snpStart < exonStart+3 && snpEnd > exonStart) || + (i < gene->exonCount-1 && snpStart < exonEnd && snpEnd > exonEnd-3)) + printf(", %s", snpMisoLinkFromFunc("splice_region_variant")); + puts("</TD></TR>"); if (i > 0) { int intronStart = gene->exonEnds[i-1], intronEnd = gene->exonStarts[i]; + if (snpEnd < intronStart || snpStart > intronEnd) + continue; if (snpStart < intronStart+2 && snpEnd > intronStart) printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName, snpMisoLinkFromFunc(geneIsRc ? "splice-3" : "splice-5")); - else if (snpStart < intronEnd-2 && snpEnd > intronStart+2) + else if (snpStart < intronStart+8 && snpEnd > intronStart+2) + printf(firstTwoColumnsPctS "%s, %s</TD></TR>\n", geneTrack, geneName, + snpMisoLinkFromFunc("intron_variant"), + snpMisoLinkFromFunc("splice_region_variant")); + else if (snpStart < intronEnd-8 && snpEnd > intronStart+8) printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName, snpMisoLinkFromFunc("intron")); + else if (snpStart < intronEnd-2 && snpEnd > intronEnd-8) + printf(firstTwoColumnsPctS "%s, %s</TD></TR>\n", geneTrack, geneName, + snpMisoLinkFromFunc("intron_variant"), + snpMisoLinkFromFunc("splice_region_variant")); else if (snpStart < intronEnd && snpEnd > intronEnd-2) printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName, snpMisoLinkFromFunc(geneIsRc ? "splice-5" : "splice-3")); } } } void printSnp125NearGenes(struct sqlConnection *conn, struct snp125 *snp, char *geneTable, char *geneTrack) /* Search upstream and downstream of snp for neigh */ { struct sqlResult *sr; char query[512]; char **row; int snpStart = snp->chromStart, snpEnd = snp->chromEnd;