src/hg/hgc/hgc.c 1.1548

1.1548 2009/06/04 20:15:07 angie
Added printSnp125CodingAnnotations, to display coding effects of SNPs from a table specified by a new trackDb setting (e.g. dbSNP's annotations).
Index: src/hg/hgc/hgc.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/hgc/hgc.c,v
retrieving revision 1.1547
retrieving revision 1.1548
diff -b -B -U 4 -r1.1547 -r1.1548
--- src/hg/hgc/hgc.c	28 May 2009 16:48:30 -0000	1.1547
+++ src/hg/hgc/hgc.c	4 Jun 2009 20:15:07 -0000	1.1548
@@ -75,8 +75,9 @@
 #include "snp.h"
 #include "snpMap.h"
 #include "snpExceptions.h"
 #include "snp125Exceptions.h"
+#include "snp125CodingCoordless.h"
 #include "cnpIafrate.h"
 #include "cnpIafrate2.h"
 #include "cnpLocke.h"
 #include "cnpSebat.h"
@@ -14841,8 +14842,30 @@
     if (*pRefAA == '\0') *pRefAA = '*';
     }
 }
 
+static char *highlightCodonBase(char *codon, int offset)
+/* If codon is a triplet and offset is 0 to 2, highlight the base at the offset.
+ * Otherwise just return the given codon sequence unmodified.
+ * Don't free the return value! */
+{
+static struct dyString *dy = NULL;
+if (dy == NULL)
+    dy = dyStringNew(0);
+dyStringClear(dy);
+if (strlen(codon) != 3)
+    dyStringAppend(dy, codon);
+else if (offset == 0)
+    dyStringPrintf(dy, "<B>%c</B>%c%c", codon[0], codon[1], codon[2]);
+else if (offset == 1)
+    dyStringPrintf(dy, "%c<B>%c</B>%c", codon[0], codon[1], codon[2]);
+else if (offset == 2)
+    dyStringPrintf(dy, "%c%c<B>%c</B>", codon[0], codon[1], codon[2]);
+else
+    dyStringAppend(dy, codon);
+return dy->string;
+}
+
 void printSnp125FunctionInCDS(struct snp125 *snp, char *geneTable, char *geneTrack,
 			      struct genePred *gene, int exonIx, char *geneName)
 /* Show the effect of each observed allele of snp on the given exon of gene. */
 {
@@ -14876,15 +14899,17 @@
     if (alSize != refAlleleSize && alSize >= 0 && refAlleleSize >=0)
 	{
 	int diff = alSize - refAlleleSize;
 	if ((diff % 3) != 0)
-	    printf(firstTwoColumnsPctS "cds-reference, frameshift</TD></TR>\n",
+	    printf(firstTwoColumnsPctS "frameshift</TD></TR>\n",
 		   geneTrack, geneName);
 	else if (diff > 0)
-	    printf(firstTwoColumnsPctS "cds-reference, insertion of %d codon%s</TD></TR>\n",
+	    printf(firstTwoColumnsPctS "%sinsertion of %d codon%s</TD></TR>\n",
+		   (snpCodonPos == 0 ? "" : "frameshift and"),
 		   geneTrack, geneName, (int)(diff/3), (diff > 3) ?  "s" : "");
 	else
-	    printf(firstTwoColumnsPctS "cds-reference, deletion of %d codon%s</TD></TR>\n",
+	    printf(firstTwoColumnsPctS "%sdeletion of %d codon%s</TD></TR>\n",
+		   (snpCodonPos == 0 ? "" : "frameshift and"),
 		   geneTrack, geneName, (int)(-diff/3), (diff < -3) ?  "s" : "");
 	}
     else if (alSize == 1 && refIsSingleBase)
 	{
@@ -14892,20 +14917,23 @@
 	safecpy(snpCodon, sizeof(snpCodon), refCodon);
 	snpCodon[snpCodonPos] = alBase;
 	char snpAA = lookupCodon(snpCodon);
 	if (snpAA == '\0') snpAA = '*';
+	char refCodonHtml[16], snpCodonHtml[16];
+	safecpy(refCodonHtml, sizeof(refCodonHtml), highlightCodonBase(refCodon, snpCodonPos));
+	safecpy(snpCodonHtml, sizeof(snpCodonHtml), highlightCodonBase(snpCodon, snpCodonPos));
 	if (refAA != snpAA)
-	    printf(firstTwoColumnsPctS "cds-reference, %ssense %c (%s) --> %c (%s)</TD></TR>\n",
+	    printf(firstTwoColumnsPctS "%ssense %c (%s) --> %c (%s)</TD></TR>\n",
 		   geneTrack, geneName,
 		   ((refAA == '*' || snpAA == '*') ? "non" : "mis"),
-		   refAA, refCodon, snpAA, snpCodon);
+		   refAA, refCodonHtml, snpAA, snpCodonHtml);
 	else
 	    printf(firstTwoColumnsPctS
-		   "cds-reference, coding-synon %c (%s) --> %c (%s)</TD></TR>\n",
-		   geneTrack, geneName, refAA, refCodon, snpAA, snpCodon);
+		   "coding-synon %c (%s) --> %c (%s)</TD></TR>\n",
+		   geneTrack, geneName, refAA, refCodonHtml, snpAA, snpCodonHtml);
 	}
     else
-	printf(firstTwoColumnsPctS "cds-reference, %s -> %s</TD></TR>\n",
+	printf(firstTwoColumnsPctS "%s --> %s</TD></TR>\n",
 	       geneTrack, geneName, refAllele, al);
     }
 }
 
@@ -15059,8 +15087,95 @@
 printf("</TABLE>\n");
 hFreeConn(&conn);
 }
 
+char *dbSnpFuncFromInt(unsigned char funcCode)
+/* Translate an integer function code from NCBI into an abbreviated description.
+ * Do not free return value! */
+// Might be a good idea to flesh this out with all codes, libify, and share with
+// snpNcbiToUcsc instead of partially duplicating.
+{
+switch (funcCode)
+    {
+    case 3:
+	return "coding-synon";
+    case 8:
+	return "cds-reference";
+    case 41:
+	return "nonsense";
+    case 42:
+	return "missense";
+    case 44:
+	return "frameshift";
+    default:
+	{
+	static char buf[16];
+	safef(buf, sizeof(buf), "%d", funcCode);
+	return buf;
+	}
+    }
+
+}
+
+void printSnp125CodingAnnotations(struct trackDb *tdb, struct snp125 *snp)
+/* If tdb specifies extra table(s) that contain protein-coding annotations,
+ * show the effects of SNP on transcript coding sequences. */
+{
+char *tables = trackDbSetting(tdb, "codingAnnotations");
+if (isEmpty(tables))
+    return;
+struct sqlConnection *conn = hAllocConn(database);
+struct slName *tbl, *tableList = slNameListFromString(tables, ',');
+struct dyString *query = dyStringNew(0);
+for (tbl = tableList;  tbl != NULL;  tbl = tbl->next)
+    {
+    char setting[512];
+    safef(setting, sizeof(setting), "codingAnnoLabel_%s", tbl->name);
+    char *label = trackDbSettingOrDefault(tdb, setting, tbl->name);
+    boolean hasBin = hIsBinned(database, tbl->name);
+    boolean hasCoords = (sqlFieldIndex(conn, tbl->name, "chrom") != -1);
+    int rowOffset = hasBin + (hasCoords ? 3 : 0);
+    dyStringClear(query);
+    dyStringPrintf(query, "select * from %s where name = '%s'", tbl->name, snp->name);
+    if (hasCoords)
+	dyStringPrintf(query, " and chrom = '%s' and chromStart = %d", seqName, snp->chromStart);
+    struct sqlResult *sr = sqlGetResult(conn, query->string);
+    char **row;
+    boolean first = TRUE;
+    while ((row = sqlNextRow(sr)) != NULL)
+	{
+	if (first)
+	    {
+	    printf("<BR><B>Coding annotations by %s:</B><BR>\n", label);
+	    first = FALSE;
+	    }
+	struct snp125CodingCoordless *anno = snp125CodingCoordlessLoad(row+rowOffset);
+	int i;
+	boolean gotRef = (anno->funcCodes[0] == 8);
+	for (i = 0;  i < anno->alleleCount;  i++)
+	    {
+	    memSwapChar(anno->peptides[i], strlen(anno->peptides[i]), 'X', '*');
+	    if (anno->funcCodes[i] == 8)
+		continue;
+	    char *func = dbSnpFuncFromInt(anno->funcCodes[i]);
+	    printf("%s: %s ", anno->transcript, func);
+	    if (sameString(func, "frameshift"))
+		{
+		puts("<BR>");
+		continue;
+		}
+	    if (gotRef)
+		printf("%s (%s) --> ", anno->peptides[0],
+		       highlightCodonBase(anno->codons[0], anno->frame));
+	    printf("%s (%s)<BR>\n", anno->peptides[i],
+		   highlightCodonBase(anno->codons[i], anno->frame));
+	    }
+	}
+    sqlFreeResult(&sr);
+    }
+hFreeConn(&conn);
+}
+
 void printSnp125Info(struct trackDb *tdb, struct snp125 snp, int version)
 /* print info on a snp125 */
 {
 printSnpOrthoSummary(tdb, snp.name, snp.observed);
@@ -15078,8 +15193,9 @@
 if (snp.avHet>0)
     printf("<BR><B><A HREF=\"#AvHet\">Average Heterozygosity</A>: </B>%.3f +/- %.3f", snp.avHet, snp.avHetSE);
 printf("<BR><B><A HREF=\"#Weight\">Weight</A>: </B>%d",           snp.weight);
 printf("<BR>\n");
+printSnp125CodingAnnotations(tdb, &snp);
 printSnp125Function(tdb, &snp);
 }
 
 void writeSnpExceptionWithVersion(char *table, char *itemName, int version)