4dcb41c179c6fae38abffb52dd2b764b74301c7c angie Mon Jul 30 16:39:17 2012 -0700 Feature #8551 (Sequence Ontology terms for predicted functional effects on SNP details pages):at Ensembl's request, instead of displaying dbSNP's predicted function terms, instead show terms from the Sequence Ontology (SO). Where possible, terms from Ensembl's list at http://staging.ensembl.org/info/docs/variation/predicted_data.html are used. diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c index 9a27b7f..12b1ed4 100644 --- src/hg/hgc/hgc.c +++ src/hg/hgc/hgc.c @@ -16565,150 +16565,172 @@ int j; for (j = 0; j < alleleCount; j++) { char *al = indivAlleles[j]; boolean alIsAlpha = (isalpha(al[0]) && !sameString(al, "lengthTooLong")); if ((snpIsRc ^ geneIsRc) && alIsAlpha) reverseComplement(al, strlen(al)); char alBase = al[0]; if (alBase == '\0' || sameString(al, refAllele)) continue; int alSize = sameString(al, "-") ? 0 : alIsAlpha ? strlen(al) : -1; if (alSize != refAlleleSize && alSize >= 0 && refAlleleSize >=0) { int diff = alSize - refAlleleSize; if ((diff % 3) != 0) - printf(firstTwoColumnsPctS "frameshift\n", - geneTrack, geneName); + printf(firstTwoColumnsPctS "%s\n", + geneTrack, geneName, snpMisoLinkFromFunc("frameshift")); else if (diff > 0) - printf(firstTwoColumnsPctS "%sinsertion of %d codon%s\n", - (snpCodonPos == 0 ? "" : "frameshift and"), - geneTrack, geneName, (int)(diff/3), (diff > 3) ? "s" : ""); + printf(firstTwoColumnsPctS "%s (insertion of %d codon%s)\n", + geneTrack, geneName, snpMisoLinkFromFunc("inframe_insertion"), + (int)(diff/3), (diff > 3) ? "s" : ""); else - printf(firstTwoColumnsPctS "%sdeletion of %d codon%s\n", - (snpCodonPos == 0 ? "" : "frameshift and"), - geneTrack, geneName, (int)(-diff/3), (diff < -3) ? "s" : ""); + printf(firstTwoColumnsPctS "%s (deletion of %d codon%s)\n", + geneTrack, geneName, snpMisoLinkFromFunc("inframe_deletion"), + (int)(-diff/3), (diff < -3) ? "s" : ""); } else if (alSize == 1 && refIsSingleBase) { char snpCodon[4]; safecpy(snpCodon, sizeof(snpCodon), refCodon); snpCodon[snpCodonPos] = alBase; char snpAA = lookupCodon(snpCodon); if (snpAA == '\0') snpAA = '*'; char refCodonHtml[16], snpCodonHtml[16]; safecpy(refCodonHtml, sizeof(refCodonHtml), highlightCodonBase(refCodon, snpCodonPos)); safecpy(snpCodonHtml, sizeof(snpCodonHtml), highlightCodonBase(snpCodon, snpCodonPos)); if (refAA != snpAA) - printf(firstTwoColumnsPctS "%ssense %c (%s) --> %c (%s)\n", - geneTrack, geneName, - ((refAA == '*' || snpAA == '*') ? "non" : "mis"), + { + if (refAA == '*') + printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)\n", + geneTrack, geneName, snpMisoLinkFromFunc("stop-loss"), + refAA, refCodonHtml, snpAA, snpCodonHtml); + else if (snpAA == '*') + printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)\n", + geneTrack, geneName, snpMisoLinkFromFunc("nonsense"), refAA, refCodonHtml, snpAA, snpCodonHtml); else - printf(firstTwoColumnsPctS - "coding-synon %c (%s) --> %c (%s)\n", - geneTrack, geneName, refAA, refCodonHtml, snpAA, snpCodonHtml); + printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)\n", + geneTrack, geneName, snpMisoLinkFromFunc("missense"), + refAA, refCodonHtml, snpAA, snpCodonHtml); } else - printf(firstTwoColumnsPctS "%s --> %s\n", - geneTrack, geneName, refAllele, al); + { + if (refAA == '*') + printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)\n", + geneTrack, geneName, snpMisoLinkFromFunc("stop_retained_variant"), + refAA, refCodonHtml, snpAA, snpCodonHtml); + else + printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)\n", + geneTrack, geneName, snpMisoLinkFromFunc("coding-synon"), + refAA, refCodonHtml, snpAA, snpCodonHtml); + } + } + else + printf(firstTwoColumnsPctS "%s %s --> %s\n", + geneTrack, geneName, snpMisoLinkFromFunc("cds-synonymy-unknown"), refAllele, al); } } void printSnp125FunctionInGene(struct snp125 *snp, char *geneTable, char *geneTrack, struct genePred *gene) /* Given a SNP and a gene that overlaps it, say where in the gene it overlaps * and if in CDS, say what effect the coding alleles have. */ { int snpStart = snp->chromStart, snpEnd = snp->chromEnd; int cdsStart = gene->cdsStart, cdsEnd = gene->cdsEnd; boolean geneIsRc = sameString(gene->strand, "-"); char *geneName = getSymbolForGeneName(geneTable, gene->name); int i, iStart = 0, iEnd = gene->exonCount, iIncr = 1; if (geneIsRc) { iStart = gene->exonCount - 1; iEnd = -1; iIncr = -1; } for (i = iStart; i != iEnd; i += iIncr) { int exonStart = gene->exonStarts[i], exonEnd = gene->exonEnds[i]; if (snpEnd > exonStart && snpStart < exonEnd) { if (snpEnd > cdsStart && snpStart < cdsEnd) printSnp125FunctionInCDS(snp, geneTable, geneTrack, gene, i, geneName); else if (cdsEnd > cdsStart) - printf(firstTwoColumnsPctS "untranslated-%d\n", geneTrack, geneName, - (geneIsRc ^ (snpEnd < cdsStart)) ? 5 : 3); + { + boolean is5Prime = geneIsRc ^ (snpEnd < cdsStart); + printf(firstTwoColumnsPctS "%s\n", geneTrack, geneName, + snpMisoLinkFromFunc((is5Prime) ? "untranslated-5" : "untranslated-3")); + } else - printf(firstTwoColumnsPctS "noncoding gene\n", geneTrack, geneName); + printf(firstTwoColumnsPctS "%s\n", geneTrack, geneName, + snpMisoLinkFromFunc("ncRNA")); } if (i > 0) { int intronStart = gene->exonEnds[i-1], intronEnd = gene->exonStarts[i]; if (snpStart < intronStart+2 && snpEnd > intronStart) - printf(firstTwoColumnsPctS "intron, splice-%d\n", - geneTrack, geneName, - (geneIsRc ? 3 : 5)); + printf(firstTwoColumnsPctS "%s\n", geneTrack, geneName, + snpMisoLinkFromFunc(geneIsRc ? "splice-3" : "splice-5")); else if (snpStart < intronEnd-2 && snpEnd > intronStart+2) - printf(firstTwoColumnsPctS "intron\n", geneTrack, geneName); + printf(firstTwoColumnsPctS "%s\n", geneTrack, geneName, + snpMisoLinkFromFunc("intron")); else if (snpStart < intronEnd && snpEnd > intronEnd-2) - printf(firstTwoColumnsPctS "intron, splice-%d\n", - geneTrack, geneName, - (geneIsRc ? 5 : 3)); + printf(firstTwoColumnsPctS "%s\n", geneTrack, geneName, + snpMisoLinkFromFunc(geneIsRc ? "splice-5" : "splice-3")); } } } void printSnp125NearGenes(struct sqlConnection *conn, struct snp125 *snp, char *geneTable, char *geneTrack) /* Search upstream and downstream of snp for neigh */ { struct sqlResult *sr; char query[512]; char **row; int snpStart = snp->chromStart, snpEnd = snp->chromEnd; int nearCount = 0; int maxDistance = 10000; /* query to the left: */ safef(query, sizeof(query), "select name,txEnd,strand from %s " "where chrom = '%s' and txStart < %d and txEnd > %d", geneTable, snp->chrom, snpStart, snpStart - maxDistance); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *gene = row[0]; char *geneName = getSymbolForGeneName(geneTable, gene); int end = sqlUnsigned(row[1]); char *strand = row[2]; - printf(firstTwoColumnsPctS "%d bases %sstream\n", - geneTrack, geneName, (snpStart - end + 1), - (strand[0] == '-' ? "up" : "down")); + boolean isRc = strand[0] == '-'; + printf(firstTwoColumnsPctS "%s (%d bases %sstream)\n", + geneTrack, geneName, snpMisoLinkFromFunc(isRc ? "near-gene-5" : "near-gene-3"), + (snpStart - end + 1), (isRc ? "up" : "down")); nearCount++; } sqlFreeResult(&sr); /* query to the right: */ safef(query, sizeof(query), "select name,txStart,strand from %s " "where chrom = '%s' and txStart < %d and txEnd > %d", geneTable, snp->chrom, snpEnd + maxDistance, snpEnd); sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { char *gene = row[0]; char *geneName = getSymbolForGeneName(geneTable, gene); int start = sqlUnsigned(row[1]); char *strand = row[2]; - printf(firstTwoColumnsPctS "%d bases %sstream\n", - geneTrack, geneName, (start - snpEnd + 1), - (strand[0] == '-' ? "down" : "up")); + boolean isRc = strand[0] == '-'; + printf(firstTwoColumnsPctS "%s (%d bases %sstream)\n", + geneTrack, geneName, snpMisoLinkFromFunc(isRc ? "near-gene-3" : "near-gene-5"), + (start - snpEnd + 1), (isRc ? "down" : "up")); nearCount++; } sqlFreeResult(&sr); if (nearCount == 0) printf("
Location Type | %s |
Class | %s |
Validation | %s |
Function | %s |
Function | %s |
Molecule Type | %s |
Average Heterozygosity | " "%.3f +/- %.3f |
Weight | %d |