4dcb41c179c6fae38abffb52dd2b764b74301c7c
angie
  Mon Jul 30 16:39:17 2012 -0700
Feature #8551 (Sequence Ontology terms for predicted functional effects on SNP details pages):at Ensembl's request, instead of displaying dbSNP's predicted function terms, instead show
terms from the Sequence Ontology (SO).  Where possible, terms from Ensembl's list at
http://staging.ensembl.org/info/docs/variation/predicted_data.html are used.

diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c
index 9a27b7f..12b1ed4 100644
--- src/hg/hgc/hgc.c
+++ src/hg/hgc/hgc.c
@@ -16565,150 +16565,172 @@
 int j;
 for (j = 0;  j < alleleCount;  j++)
     {
     char *al = indivAlleles[j];
     boolean alIsAlpha = (isalpha(al[0]) && !sameString(al, "lengthTooLong"));
     if ((snpIsRc ^ geneIsRc) && alIsAlpha)
 	reverseComplement(al, strlen(al));
     char alBase = al[0];
     if (alBase == '\0' || sameString(al, refAllele))
 	continue;
     int alSize = sameString(al, "-") ? 0 : alIsAlpha ? strlen(al) : -1;
     if (alSize != refAlleleSize && alSize >= 0 && refAlleleSize >=0)
 	{
 	int diff = alSize - refAlleleSize;
 	if ((diff % 3) != 0)
-	    printf(firstTwoColumnsPctS "frameshift</TD></TR>\n",
-		   geneTrack, geneName);
+	    printf(firstTwoColumnsPctS "%s</TD></TR>\n",
+		   geneTrack, geneName, snpMisoLinkFromFunc("frameshift"));
 	else if (diff > 0)
-	    printf(firstTwoColumnsPctS "%sinsertion of %d codon%s</TD></TR>\n",
-		   (snpCodonPos == 0 ? "" : "frameshift and"),
-		   geneTrack, geneName, (int)(diff/3), (diff > 3) ?  "s" : "");
+	    printf(firstTwoColumnsPctS "%s (insertion of %d codon%s)</TD></TR>\n",
+		   geneTrack, geneName, snpMisoLinkFromFunc("inframe_insertion"),
+		   (int)(diff/3), (diff > 3) ?  "s" : "");
 	else
-	    printf(firstTwoColumnsPctS "%sdeletion of %d codon%s</TD></TR>\n",
-		   (snpCodonPos == 0 ? "" : "frameshift and"),
-		   geneTrack, geneName, (int)(-diff/3), (diff < -3) ?  "s" : "");
+	    printf(firstTwoColumnsPctS "%s (deletion of %d codon%s)</TD></TR>\n",
+		   geneTrack, geneName, snpMisoLinkFromFunc("inframe_deletion"),
+		   (int)(-diff/3), (diff < -3) ?  "s" : "");
 	}
     else if (alSize == 1 && refIsSingleBase)
 	{
 	char snpCodon[4];
 	safecpy(snpCodon, sizeof(snpCodon), refCodon);
 	snpCodon[snpCodonPos] = alBase;
 	char snpAA = lookupCodon(snpCodon);
 	if (snpAA == '\0') snpAA = '*';
 	char refCodonHtml[16], snpCodonHtml[16];
 	safecpy(refCodonHtml, sizeof(refCodonHtml), highlightCodonBase(refCodon, snpCodonPos));
 	safecpy(snpCodonHtml, sizeof(snpCodonHtml), highlightCodonBase(snpCodon, snpCodonPos));
 	if (refAA != snpAA)
-	    printf(firstTwoColumnsPctS "%ssense %c (%s) --> %c (%s)</TD></TR>\n",
-		   geneTrack, geneName,
-		   ((refAA == '*' || snpAA == '*') ? "non" : "mis"),
+	    {
+	    if (refAA == '*')
+		printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n",
+		       geneTrack, geneName, snpMisoLinkFromFunc("stop-loss"),
+		       refAA, refCodonHtml, snpAA, snpCodonHtml);
+	    else if (snpAA == '*')
+		printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n",
+		       geneTrack, geneName, snpMisoLinkFromFunc("nonsense"),
 		   refAA, refCodonHtml, snpAA, snpCodonHtml);
 	else
-	    printf(firstTwoColumnsPctS
-		   "coding-synon %c (%s) --> %c (%s)</TD></TR>\n",
-		   geneTrack, geneName, refAA, refCodonHtml, snpAA, snpCodonHtml);
+		printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n",
+		       geneTrack, geneName, snpMisoLinkFromFunc("missense"),
+		       refAA, refCodonHtml, snpAA, snpCodonHtml);
 	}
     else
-	printf(firstTwoColumnsPctS "%s --> %s</TD></TR>\n",
-	       geneTrack, geneName, refAllele, al);
+	    {
+	    if (refAA == '*')
+		printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n",
+		       geneTrack, geneName, snpMisoLinkFromFunc("stop_retained_variant"),
+		       refAA, refCodonHtml, snpAA, snpCodonHtml);
+	    else
+		printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n",
+		       geneTrack, geneName, snpMisoLinkFromFunc("coding-synon"),
+		       refAA, refCodonHtml, snpAA, snpCodonHtml);
+	    }
+	}
+    else
+	printf(firstTwoColumnsPctS "%s %s --> %s</TD></TR>\n",
+	       geneTrack, geneName, snpMisoLinkFromFunc("cds-synonymy-unknown"), refAllele, al);
     }
 }
 
 void printSnp125FunctionInGene(struct snp125 *snp, char *geneTable, char *geneTrack,
 			       struct genePred *gene)
 /* Given a SNP and a gene that overlaps it, say where in the gene it overlaps
  * and if in CDS, say what effect the coding alleles have. */
 {
 int snpStart = snp->chromStart, snpEnd = snp->chromEnd;
 int cdsStart = gene->cdsStart, cdsEnd = gene->cdsEnd;
 boolean geneIsRc = sameString(gene->strand, "-");
 char *geneName = getSymbolForGeneName(geneTable, gene->name);
 int i, iStart = 0, iEnd = gene->exonCount, iIncr = 1;
 if (geneIsRc)
     { iStart = gene->exonCount - 1;  iEnd = -1;  iIncr = -1; }
 for (i = iStart;  i != iEnd;  i += iIncr)
     {
     int exonStart = gene->exonStarts[i], exonEnd = gene->exonEnds[i];
     if (snpEnd > exonStart && snpStart < exonEnd)
 	{
 	if (snpEnd > cdsStart && snpStart < cdsEnd)
 	    printSnp125FunctionInCDS(snp, geneTable, geneTrack, gene, i, geneName);
 	else if (cdsEnd > cdsStart)
-	    printf(firstTwoColumnsPctS "untranslated-%d</TD></TR>\n", geneTrack, geneName,
-		   (geneIsRc ^ (snpEnd < cdsStart)) ? 5 : 3);
+	    {
+	    boolean is5Prime = geneIsRc ^ (snpEnd < cdsStart);
+	    printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName,
+		   snpMisoLinkFromFunc((is5Prime) ? "untranslated-5" : "untranslated-3"));
+	    }
 	else
-	    printf(firstTwoColumnsPctS "noncoding gene</TD></TR>\n", geneTrack, geneName);
+	    printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName,
+		   snpMisoLinkFromFunc("ncRNA"));
 	}
     if (i > 0)
 	{
 	int intronStart = gene->exonEnds[i-1], intronEnd = gene->exonStarts[i];
 	if (snpStart < intronStart+2 && snpEnd > intronStart)
-	    printf(firstTwoColumnsPctS "intron, splice-%d</TD></TR>\n",
-		   geneTrack, geneName,
-		   (geneIsRc ? 3 : 5));
+	    printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName,
+		   snpMisoLinkFromFunc(geneIsRc ? "splice-3" : "splice-5"));
 	else if (snpStart < intronEnd-2 && snpEnd > intronStart+2)
-	    printf(firstTwoColumnsPctS "intron</TD></TR>\n", geneTrack, geneName);
+	    printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName,
+		   snpMisoLinkFromFunc("intron"));
 	else if (snpStart < intronEnd && snpEnd > intronEnd-2)
-	    printf(firstTwoColumnsPctS "intron, splice-%d</TD></TR>\n",
-		   geneTrack, geneName,
-		   (geneIsRc ? 5 : 3));
+	    printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName,
+		   snpMisoLinkFromFunc(geneIsRc ? "splice-5" : "splice-3"));
 	}
     }
 }
 
 void printSnp125NearGenes(struct sqlConnection *conn, struct snp125 *snp, char *geneTable,
 			  char *geneTrack)
 /* Search upstream and downstream of snp for neigh */
 {
 struct sqlResult *sr;
 char query[512];
 char **row;
 int snpStart = snp->chromStart, snpEnd = snp->chromEnd;
 int nearCount = 0;
 int maxDistance = 10000;
 /* query to the left: */
 safef(query, sizeof(query), "select name,txEnd,strand from %s "
       "where chrom = '%s' and txStart < %d and txEnd > %d",
       geneTable, snp->chrom, snpStart, snpStart - maxDistance);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     char *gene = row[0];
     char *geneName = getSymbolForGeneName(geneTable, gene);
     int end = sqlUnsigned(row[1]);
     char *strand = row[2];
-    printf(firstTwoColumnsPctS "%d bases %sstream</TD></TR>\n",
-	   geneTrack, geneName, (snpStart - end + 1),
-	   (strand[0] == '-' ? "up" : "down"));
+    boolean isRc = strand[0] == '-';
+    printf(firstTwoColumnsPctS "%s (%d bases %sstream)</TD></TR>\n",
+	   geneTrack, geneName, snpMisoLinkFromFunc(isRc ? "near-gene-5" : "near-gene-3"),
+	   (snpStart - end + 1), (isRc ? "up" : "down"));
     nearCount++;
     }
 sqlFreeResult(&sr);
 /* query to the right: */
 safef(query, sizeof(query), "select name,txStart,strand from %s "
       "where chrom = '%s' and txStart < %d and txEnd > %d",
       geneTable, snp->chrom, snpEnd + maxDistance, snpEnd);
 sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     char *gene = row[0];
     char *geneName = getSymbolForGeneName(geneTable, gene);
     int start = sqlUnsigned(row[1]);
     char *strand = row[2];
-    printf(firstTwoColumnsPctS "%d bases %sstream</TD></TR>\n",
-	   geneTrack, geneName, (start - snpEnd + 1),
-	   (strand[0] == '-' ? "down" : "up"));
+    boolean isRc = strand[0] == '-';
+    printf(firstTwoColumnsPctS "%s (%d bases %sstream)</TD></TR>\n",
+	   geneTrack, geneName, snpMisoLinkFromFunc(isRc ? "near-gene-3" : "near-gene-5"),
+	   (start - snpEnd + 1), (isRc ? "down" : "up"));
     nearCount++;
     }
 sqlFreeResult(&sr);
 if (nearCount == 0)
     printf("<TR><TD>%s&nbsp;&nbsp;</TD><TD></TD><TD>intergenic</TD></TR>", geneTrack);
 }
 
 static struct genePred *getGPsWithFrames(struct sqlConnection *conn, char *geneTable,
 					 char *chrom, int start, int end)
 /* Given a known-to-exist genePred table name and a range, return
  * genePreds in range with exonFrames populated. */
 {
 struct genePred *gpList = NULL;
 boolean hasBin;
 struct sqlResult *sr = hRangeQuery(conn, geneTable, chrom, start, end, NULL, &hasBin);
@@ -16783,87 +16805,98 @@
 /* Translate an integer function code from NCBI into an abbreviated description.
  * Do not free return value! */
 // Might be a good idea to flesh this out with all codes, libify, and share with
 // snpNcbiToUcsc instead of partially duplicating.
 {
 switch (funcCode)
     {
     case 3:
 	return "coding-synon";
     case 8:
 	return "cds-reference";
     case 41:
 	return "nonsense";
     case 42:
 	return "missense";
+    case 43:
+	return "stop-loss";
     case 44:
 	return "frameshift";
+    case 45:
+	return "cds-indel";
     default:
 	{
 	static char buf[16];
 	safef(buf, sizeof(buf), "%d", funcCode);
 	return buf;
 	}
     }
 
 }
 
 void printSnp125CodingAnnotations(struct trackDb *tdb, struct snp125 *snp)
 /* If tdb specifies extra table(s) that contain protein-coding annotations,
  * show the effects of SNP on transcript coding sequences. */
 {
 char *tables = trackDbSetting(tdb, "codingAnnotations");
 if (isEmpty(tables))
     return;
 struct sqlConnection *conn = hAllocConn(database);
 struct slName *tbl, *tableList = slNameListFromString(tables, ',');
 struct dyString *query = dyStringNew(0);
 for (tbl = tableList;  tbl != NULL;  tbl = tbl->next)
     {
     if (!sqlTableExists(conn, tbl->name))
 	continue;
     char setting[512];
     safef(setting, sizeof(setting), "codingAnnoLabel_%s", tbl->name);
-    char *label = trackDbSettingOrDefault(tdb, setting, tbl->name);
+    char *label = trackDbSettingOrDefault(tdb, setting, NULL);
+    if (label == NULL && endsWith(tbl->name, "DbSnp"))
+	label = "dbSNP";
+    else
+	label = tbl->name;
     boolean hasBin = hIsBinned(database, tbl->name);
     boolean hasCoords = (sqlFieldIndex(conn, tbl->name, "chrom") != -1);
     int rowOffset = hasBin + (hasCoords ? 3 : 0);
     dyStringClear(query);
     dyStringPrintf(query, "select * from %s where name = '%s'", tbl->name, snp->name);
     if (hasCoords)
 	dyStringPrintf(query, " and chrom = '%s' and chromStart = %d", seqName, snp->chromStart);
     struct sqlResult *sr = sqlGetResult(conn, query->string);
     char **row;
     boolean first = TRUE;
     while ((row = sqlNextRow(sr)) != NULL)
 	{
 	if (first)
 	    {
 	    printf("<BR><B>Coding annotations by %s:</B><BR>\n", label);
 	    first = FALSE;
 	    }
 	struct snp125CodingCoordless *anno = snp125CodingCoordlessLoad(row+rowOffset);
 	int i;
 	boolean gotRef = (anno->funcCodes[0] == 8);
 	for (i = 0;  i < anno->alleleCount;  i++)
 	    {
 	    memSwapChar(anno->peptides[i], strlen(anno->peptides[i]), 'X', '*');
 	    if (anno->funcCodes[i] == 8)
 		continue;
+	    char *txName = anno->transcript;
+	    if (startsWith("NM_", anno->transcript))
+		txName = getSymbolForGeneName("refGene", anno->transcript);
 	    char *func = dbSnpFuncFromInt(anno->funcCodes[i]);
-	    printf("%s: %s ", anno->transcript, func);
-	    if (sameString(func, "frameshift"))
+	    printf("%s: %s ", txName, snpMisoLinkFromFunc(func));
+	    if (sameString(func, "frameshift") || sameString(func, "cds-indel"))
 		{
 		puts("<BR>");
 		continue;
 		}
 	    if (gotRef)
 		printf("%s (%s) --> ", anno->peptides[0],
 		       highlightCodonBase(anno->codons[0], anno->frame));
 	    printf("%s (%s)<BR>\n", anno->peptides[i],
 		   highlightCodonBase(anno->codons[i], anno->frame));
 	    }
 	}
     sqlFreeResult(&sr);
     }
 hFreeConn(&conn);
 }
@@ -16954,31 +16987,32 @@
 void printSnp125Info(struct trackDb *tdb, struct snp132Ext *snp, int version)
 /* print info on a snp125 */
 {
 struct snp125 *snp125 = (struct snp125 *)snp;
 printSnpOrthoSummary(tdb, snp->name, snp->observed);
 if (differentString(snp->strand,"?"))
     printf("<B>Strand: </B>%s<BR>\n", snp->strand);
 printf("<B>Observed: </B>%s<BR>\n", snp->observed);
 printSnpAlleleAndOrthos(tdb, snp125, version);
 puts("<BR><TABLE border=0 cellspacing=0 cellpadding=0>");
 if (version <= 127)
     printf("<TR><TD><B><A HREF=\"#LocType\">Location Type</A></B></TD><TD>%s</TD></TR>\n",
 	   snp->locType);
 printf("<TR><TD><B><A HREF=\"#Class\">Class</A></B></TD><TD>%s</TD></TR>\n", snp->class);
 printf("<TR><TD><B><A HREF=\"#Valid\">Validation</A></B></TD><TD>%s</TD></TR>\n", snp->valid);
-printf("<TR><TD><B><A HREF=\"#Func\">Function</A></B></TD><TD>%s</TD></TR>\n", snp->func);
+printf("<TR><TD><B><A HREF=\"#Func\">Function</A></B></TD><TD>%s</TD></TR>\n",
+       snpMisoLinkFromFunc(snp->func));
 printf("<TR><TD><B><A HREF=\"#MolType\">Molecule Type</A>&nbsp;&nbsp;</B></TD><TD>%s</TD></TR>\n",
        snp->molType);
 if (snp->avHet>0)
     printf("<TR><TD><B><A HREF=\"#AvHet\">Average Heterozygosity</A>&nbsp;&nbsp;</TD>"
 	   "<TD></B>%.3f +/- %.3f</TD></TR>\n", snp->avHet, snp->avHetSE);
 printf("<TR><TD><B><A HREF=\"#Weight\">Weight</A></B></TD><TD>%d</TD></TR>\n", snp->weight);
 if (version >= 132)
     printSnp132ExtraColumns(tdb, snp);
 else
     printf("</TABLE>\n");
 printSnp125CodingAnnotations(tdb, snp125);
 writeSnpExceptionWithVersion(tdb, snp, version);
 printSnp125Function(tdb, snp125);
 }