4dcb41c179c6fae38abffb52dd2b764b74301c7c angie Mon Jul 30 16:39:17 2012 -0700 Feature #8551 (Sequence Ontology terms for predicted functional effects on SNP details pages):at Ensembl's request, instead of displaying dbSNP's predicted function terms, instead show terms from the Sequence Ontology (SO). Where possible, terms from Ensembl's list at http://staging.ensembl.org/info/docs/variation/predicted_data.html are used. diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c index 9a27b7f..12b1ed4 100644 --- src/hg/hgc/hgc.c +++ src/hg/hgc/hgc.c @@ -16565,150 +16565,172 @@ int j; for (j = 0; j < alleleCount; j++) { char *al = indivAlleles[j]; boolean alIsAlpha = (isalpha(al[0]) && !sameString(al, "lengthTooLong")); if ((snpIsRc ^ geneIsRc) && alIsAlpha) reverseComplement(al, strlen(al)); char alBase = al[0]; if (alBase == '\0' || sameString(al, refAllele)) continue; int alSize = sameString(al, "-") ? 0 : alIsAlpha ? strlen(al) : -1; if (alSize != refAlleleSize && alSize >= 0 && refAlleleSize >=0) { int diff = alSize - refAlleleSize; if ((diff % 3) != 0) - printf(firstTwoColumnsPctS "frameshift</TD></TR>\n", - geneTrack, geneName); + printf(firstTwoColumnsPctS "%s</TD></TR>\n", + geneTrack, geneName, snpMisoLinkFromFunc("frameshift")); else if (diff > 0) - printf(firstTwoColumnsPctS "%sinsertion of %d codon%s</TD></TR>\n", - (snpCodonPos == 0 ? "" : "frameshift and"), - geneTrack, geneName, (int)(diff/3), (diff > 3) ? "s" : ""); + printf(firstTwoColumnsPctS "%s (insertion of %d codon%s)</TD></TR>\n", + geneTrack, geneName, snpMisoLinkFromFunc("inframe_insertion"), + (int)(diff/3), (diff > 3) ? "s" : ""); else - printf(firstTwoColumnsPctS "%sdeletion of %d codon%s</TD></TR>\n", - (snpCodonPos == 0 ? "" : "frameshift and"), - geneTrack, geneName, (int)(-diff/3), (diff < -3) ? "s" : ""); + printf(firstTwoColumnsPctS "%s (deletion of %d codon%s)</TD></TR>\n", + geneTrack, geneName, snpMisoLinkFromFunc("inframe_deletion"), + (int)(-diff/3), (diff < -3) ? "s" : ""); } else if (alSize == 1 && refIsSingleBase) { char snpCodon[4]; safecpy(snpCodon, sizeof(snpCodon), refCodon); snpCodon[snpCodonPos] = alBase; char snpAA = lookupCodon(snpCodon); if (snpAA == '\0') snpAA = '*'; char refCodonHtml[16], snpCodonHtml[16]; safecpy(refCodonHtml, sizeof(refCodonHtml), highlightCodonBase(refCodon, snpCodonPos)); safecpy(snpCodonHtml, sizeof(snpCodonHtml), highlightCodonBase(snpCodon, snpCodonPos)); if (refAA != snpAA) - printf(firstTwoColumnsPctS "%ssense %c (%s) --> %c (%s)</TD></TR>\n", - geneTrack, geneName, - ((refAA == '*' || snpAA == '*') ? "non" : "mis"), + { + if (refAA == '*') + printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n", + geneTrack, geneName, snpMisoLinkFromFunc("stop-loss"), + refAA, refCodonHtml, snpAA, snpCodonHtml); + else if (snpAA == '*') + printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n", + geneTrack, geneName, snpMisoLinkFromFunc("nonsense"), refAA, refCodonHtml, snpAA, snpCodonHtml); else - printf(firstTwoColumnsPctS - "coding-synon %c (%s) --> %c (%s)</TD></TR>\n", - geneTrack, geneName, refAA, refCodonHtml, snpAA, snpCodonHtml); + printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n", + geneTrack, geneName, snpMisoLinkFromFunc("missense"), + refAA, refCodonHtml, snpAA, snpCodonHtml); } else - printf(firstTwoColumnsPctS "%s --> %s</TD></TR>\n", - geneTrack, geneName, refAllele, al); + { + if (refAA == '*') + printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n", + geneTrack, geneName, snpMisoLinkFromFunc("stop_retained_variant"), + refAA, refCodonHtml, snpAA, snpCodonHtml); + else + printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n", + geneTrack, geneName, snpMisoLinkFromFunc("coding-synon"), + refAA, refCodonHtml, snpAA, snpCodonHtml); + } + } + else + printf(firstTwoColumnsPctS "%s %s --> %s</TD></TR>\n", + geneTrack, geneName, snpMisoLinkFromFunc("cds-synonymy-unknown"), refAllele, al); } } void printSnp125FunctionInGene(struct snp125 *snp, char *geneTable, char *geneTrack, struct genePred *gene) /* Given a SNP and a gene that overlaps it, say where in the gene it overlaps * and if in CDS, say what effect the coding alleles have. */ { int snpStart = snp->chromStart, snpEnd = snp->chromEnd; int cdsStart = gene->cdsStart, cdsEnd = gene->cdsEnd; boolean geneIsRc = sameString(gene->strand, "-"); char *geneName = getSymbolForGeneName(geneTable, gene->name); int i, iStart = 0, iEnd = gene->exonCount, iIncr = 1; if (geneIsRc) { iStart = gene->exonCount - 1; iEnd = -1; iIncr = -1; } for (i = iStart; i != iEnd; i += iIncr) { int exonStart = gene->exonStarts[i], exonEnd = gene->exonEnds[i]; if (snpEnd > exonStart && snpStart < exonEnd) { if (snpEnd > cdsStart && snpStart < cdsEnd) printSnp125FunctionInCDS(snp, geneTable, geneTrack, gene, i, geneName); else if (cdsEnd > cdsStart) - printf(firstTwoColumnsPctS "untranslated-%d</TD></TR>\n", geneTrack, geneName, - (geneIsRc ^ (snpEnd < cdsStart)) ? 5 : 3); + { + boolean is5Prime = geneIsRc ^ (snpEnd < cdsStart); + printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName, + snpMisoLinkFromFunc((is5Prime) ? "untranslated-5" : "untranslated-3")); + } else - printf(firstTwoColumnsPctS "noncoding gene</TD></TR>\n", geneTrack, geneName); + printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName, + snpMisoLinkFromFunc("ncRNA")); } if (i > 0) { int intronStart = gene->exonEnds[i-1], intronEnd = gene->exonStarts[i]; if (snpStart < intronStart+2 && snpEnd > intronStart) - printf(firstTwoColumnsPctS "intron, splice-%d</TD></TR>\n", - geneTrack, geneName, - (geneIsRc ? 3 : 5)); + printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName, + snpMisoLinkFromFunc(geneIsRc ? "splice-3" : "splice-5")); else if (snpStart < intronEnd-2 && snpEnd > intronStart+2) - printf(firstTwoColumnsPctS "intron</TD></TR>\n", geneTrack, geneName); + printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName, + snpMisoLinkFromFunc("intron")); else if (snpStart < intronEnd && snpEnd > intronEnd-2) - printf(firstTwoColumnsPctS "intron, splice-%d</TD></TR>\n", - geneTrack, geneName, - (geneIsRc ? 5 : 3)); + printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName, + snpMisoLinkFromFunc(geneIsRc ? "splice-5" : "splice-3")); } } } void printSnp125NearGenes(struct sqlConnection *conn, struct snp125 *snp, char *geneTable, char *geneTrack) /* Search upstream and downstream of snp for neigh */ { struct sqlResult *sr; char query[512]; char **row; int snpStart = snp->chromStart, snpEnd = snp->chromEnd; int nearCount = 0; int maxDistance = 10000; /* query to the left: */ safef(query, sizeof(query), "select name,txEnd,strand from %s " "where chrom = '%s' and txStart < %d and txEnd > %d", geneTable, snp->chrom, snpStart, snpStart - maxDistance); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *gene = row[0]; char *geneName = getSymbolForGeneName(geneTable, gene); int end = sqlUnsigned(row[1]); char *strand = row[2]; - printf(firstTwoColumnsPctS "%d bases %sstream</TD></TR>\n", - geneTrack, geneName, (snpStart - end + 1), - (strand[0] == '-' ? "up" : "down")); + boolean isRc = strand[0] == '-'; + printf(firstTwoColumnsPctS "%s (%d bases %sstream)</TD></TR>\n", + geneTrack, geneName, snpMisoLinkFromFunc(isRc ? "near-gene-5" : "near-gene-3"), + (snpStart - end + 1), (isRc ? "up" : "down")); nearCount++; } sqlFreeResult(&sr); /* query to the right: */ safef(query, sizeof(query), "select name,txStart,strand from %s " "where chrom = '%s' and txStart < %d and txEnd > %d", geneTable, snp->chrom, snpEnd + maxDistance, snpEnd); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *gene = row[0]; char *geneName = getSymbolForGeneName(geneTable, gene); int start = sqlUnsigned(row[1]); char *strand = row[2]; - printf(firstTwoColumnsPctS "%d bases %sstream</TD></TR>\n", - geneTrack, geneName, (start - snpEnd + 1), - (strand[0] == '-' ? "down" : "up")); + boolean isRc = strand[0] == '-'; + printf(firstTwoColumnsPctS "%s (%d bases %sstream)</TD></TR>\n", + geneTrack, geneName, snpMisoLinkFromFunc(isRc ? "near-gene-3" : "near-gene-5"), + (start - snpEnd + 1), (isRc ? "down" : "up")); nearCount++; } sqlFreeResult(&sr); if (nearCount == 0) printf("<TR><TD>%s </TD><TD></TD><TD>intergenic</TD></TR>", geneTrack); } static struct genePred *getGPsWithFrames(struct sqlConnection *conn, char *geneTable, char *chrom, int start, int end) /* Given a known-to-exist genePred table name and a range, return * genePreds in range with exonFrames populated. */ { struct genePred *gpList = NULL; boolean hasBin; struct sqlResult *sr = hRangeQuery(conn, geneTable, chrom, start, end, NULL, &hasBin); @@ -16783,87 +16805,98 @@ /* Translate an integer function code from NCBI into an abbreviated description. * Do not free return value! */ // Might be a good idea to flesh this out with all codes, libify, and share with // snpNcbiToUcsc instead of partially duplicating. { switch (funcCode) { case 3: return "coding-synon"; case 8: return "cds-reference"; case 41: return "nonsense"; case 42: return "missense"; + case 43: + return "stop-loss"; case 44: return "frameshift"; + case 45: + return "cds-indel"; default: { static char buf[16]; safef(buf, sizeof(buf), "%d", funcCode); return buf; } } } void printSnp125CodingAnnotations(struct trackDb *tdb, struct snp125 *snp) /* If tdb specifies extra table(s) that contain protein-coding annotations, * show the effects of SNP on transcript coding sequences. */ { char *tables = trackDbSetting(tdb, "codingAnnotations"); if (isEmpty(tables)) return; struct sqlConnection *conn = hAllocConn(database); struct slName *tbl, *tableList = slNameListFromString(tables, ','); struct dyString *query = dyStringNew(0); for (tbl = tableList; tbl != NULL; tbl = tbl->next) { if (!sqlTableExists(conn, tbl->name)) continue; char setting[512]; safef(setting, sizeof(setting), "codingAnnoLabel_%s", tbl->name); - char *label = trackDbSettingOrDefault(tdb, setting, tbl->name); + char *label = trackDbSettingOrDefault(tdb, setting, NULL); + if (label == NULL && endsWith(tbl->name, "DbSnp")) + label = "dbSNP"; + else + label = tbl->name; boolean hasBin = hIsBinned(database, tbl->name); boolean hasCoords = (sqlFieldIndex(conn, tbl->name, "chrom") != -1); int rowOffset = hasBin + (hasCoords ? 3 : 0); dyStringClear(query); dyStringPrintf(query, "select * from %s where name = '%s'", tbl->name, snp->name); if (hasCoords) dyStringPrintf(query, " and chrom = '%s' and chromStart = %d", seqName, snp->chromStart); struct sqlResult *sr = sqlGetResult(conn, query->string); char **row; boolean first = TRUE; while ((row = sqlNextRow(sr)) != NULL) { if (first) { printf("<BR><B>Coding annotations by %s:</B><BR>\n", label); first = FALSE; } struct snp125CodingCoordless *anno = snp125CodingCoordlessLoad(row+rowOffset); int i; boolean gotRef = (anno->funcCodes[0] == 8); for (i = 0; i < anno->alleleCount; i++) { memSwapChar(anno->peptides[i], strlen(anno->peptides[i]), 'X', '*'); if (anno->funcCodes[i] == 8) continue; + char *txName = anno->transcript; + if (startsWith("NM_", anno->transcript)) + txName = getSymbolForGeneName("refGene", anno->transcript); char *func = dbSnpFuncFromInt(anno->funcCodes[i]); - printf("%s: %s ", anno->transcript, func); - if (sameString(func, "frameshift")) + printf("%s: %s ", txName, snpMisoLinkFromFunc(func)); + if (sameString(func, "frameshift") || sameString(func, "cds-indel")) { puts("<BR>"); continue; } if (gotRef) printf("%s (%s) --> ", anno->peptides[0], highlightCodonBase(anno->codons[0], anno->frame)); printf("%s (%s)<BR>\n", anno->peptides[i], highlightCodonBase(anno->codons[i], anno->frame)); } } sqlFreeResult(&sr); } hFreeConn(&conn); } @@ -16954,31 +16987,32 @@ void printSnp125Info(struct trackDb *tdb, struct snp132Ext *snp, int version) /* print info on a snp125 */ { struct snp125 *snp125 = (struct snp125 *)snp; printSnpOrthoSummary(tdb, snp->name, snp->observed); if (differentString(snp->strand,"?")) printf("<B>Strand: </B>%s<BR>\n", snp->strand); printf("<B>Observed: </B>%s<BR>\n", snp->observed); printSnpAlleleAndOrthos(tdb, snp125, version); puts("<BR><TABLE border=0 cellspacing=0 cellpadding=0>"); if (version <= 127) printf("<TR><TD><B><A HREF=\"#LocType\">Location Type</A></B></TD><TD>%s</TD></TR>\n", snp->locType); printf("<TR><TD><B><A HREF=\"#Class\">Class</A></B></TD><TD>%s</TD></TR>\n", snp->class); printf("<TR><TD><B><A HREF=\"#Valid\">Validation</A></B></TD><TD>%s</TD></TR>\n", snp->valid); -printf("<TR><TD><B><A HREF=\"#Func\">Function</A></B></TD><TD>%s</TD></TR>\n", snp->func); +printf("<TR><TD><B><A HREF=\"#Func\">Function</A></B></TD><TD>%s</TD></TR>\n", + snpMisoLinkFromFunc(snp->func)); printf("<TR><TD><B><A HREF=\"#MolType\">Molecule Type</A> </B></TD><TD>%s</TD></TR>\n", snp->molType); if (snp->avHet>0) printf("<TR><TD><B><A HREF=\"#AvHet\">Average Heterozygosity</A> </TD>" "<TD></B>%.3f +/- %.3f</TD></TR>\n", snp->avHet, snp->avHetSE); printf("<TR><TD><B><A HREF=\"#Weight\">Weight</A></B></TD><TD>%d</TD></TR>\n", snp->weight); if (version >= 132) printSnp132ExtraColumns(tdb, snp); else printf("</TABLE>\n"); printSnp125CodingAnnotations(tdb, snp125); writeSnpExceptionWithVersion(tdb, snp, version); printSnp125Function(tdb, snp125); }