36f5fda1377cc3f22da3db6ed52823186632ff15
angie
  Wed Aug 1 12:34:56 2012 -0700
Feature #8551 (Sequence Ontology terms for predicted functional effects on SNP details pages):Adding splice_region_variant, defined by SO as "A sequence variant in
which a change has occurred within the region of the splice site,
either within 1-3 bases of the exon or 3-8 bases of the intron."

diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c
index c663c97..c0cb99d 100644
--- src/hg/hgc/hgc.c
+++ src/hg/hgc/hgc.c
@@ -16547,124 +16547,141 @@
 char *indivAlleles[64];
 int alleleCount = chopString(alleleStr, "/", indivAlleles, ArraySize(indivAlleles));
 int j;
 for (j = 0;  j < alleleCount;  j++)
     {
     char *al = indivAlleles[j];
     boolean alIsAlpha = (isalpha(al[0]) && !sameString(al, "lengthTooLong"));
     if ((snpIsRc ^ geneIsRc) && alIsAlpha)
 	reverseComplement(al, strlen(al));
     char alBase = al[0];
     if (alBase == '\0' || sameString(al, refAllele))
 	continue;
     int alSize = sameString(al, "-") ? 0 : alIsAlpha ? strlen(al) : -1;
     if (alSize != refAlleleSize && alSize >= 0 && refAlleleSize >=0)
 	{
+
 	int diff = alSize - refAlleleSize;
 	if ((diff % 3) != 0)
-	    printf(firstTwoColumnsPctS "%s</TD></TR>\n",
+	    printf(firstTwoColumnsPctS "%s\n",
 		   geneTrack, geneName, snpMisoLinkFromFunc("frameshift"));
 	else if (diff > 0)
-	    printf(firstTwoColumnsPctS "%s (insertion of %d codon%s)</TD></TR>\n",
+	    printf(firstTwoColumnsPctS "%s (insertion of %d codon%s)\n",
 		   geneTrack, geneName, snpMisoLinkFromFunc("inframe_insertion"),
 		   (int)(diff/3), (diff > 3) ?  "s" : "");
 	else
-	    printf(firstTwoColumnsPctS "%s (deletion of %d codon%s)</TD></TR>\n",
+	    printf(firstTwoColumnsPctS "%s (deletion of %d codon%s)\n",
 		   geneTrack, geneName, snpMisoLinkFromFunc("inframe_deletion"),
 		   (int)(-diff/3), (diff < -3) ?  "s" : "");
 	}
     else if (alSize == 1 && refIsSingleBase)
 	{
 	char snpCodon[4];
 	safecpy(snpCodon, sizeof(snpCodon), refCodon);
 	snpCodon[snpCodonPos] = alBase;
 	char snpAA = lookupCodon(snpCodon);
 	if (snpAA == '\0') snpAA = '*';
 	char refCodonHtml[16], snpCodonHtml[16];
 	safecpy(refCodonHtml, sizeof(refCodonHtml), highlightCodonBase(refCodon, snpCodonPos));
 	safecpy(snpCodonHtml, sizeof(snpCodonHtml), highlightCodonBase(snpCodon, snpCodonPos));
 	if (refAA != snpAA)
 	    {
 	    if (refAA == '*')
-		printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n",
+		printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)\n",
 		       geneTrack, geneName, snpMisoLinkFromFunc("stop-loss"),
 		       refAA, refCodonHtml, snpAA, snpCodonHtml);
 	    else if (snpAA == '*')
-		printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n",
+		printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)\n",
 		       geneTrack, geneName, snpMisoLinkFromFunc("nonsense"),
 		       refAA, refCodonHtml, snpAA, snpCodonHtml);
 	    else
-		printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n",
+		printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)\n",
 		       geneTrack, geneName, snpMisoLinkFromFunc("missense"),
 		       refAA, refCodonHtml, snpAA, snpCodonHtml);
 	    }
 	else
 	    {
 	    if (refAA == '*')
-		printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n",
+		printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)\n",
 		       geneTrack, geneName, snpMisoLinkFromFunc("stop_retained_variant"),
 		       refAA, refCodonHtml, snpAA, snpCodonHtml);
 	    else
-		printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)</TD></TR>\n",
+		printf(firstTwoColumnsPctS "%s %c (%s) --> %c (%s)\n",
 		       geneTrack, geneName, snpMisoLinkFromFunc("coding-synon"),
 		       refAA, refCodonHtml, snpAA, snpCodonHtml);
 	    }
 	}
     else
-	printf(firstTwoColumnsPctS "%s %s --> %s</TD></TR>\n",
+	printf(firstTwoColumnsPctS "%s %s --> %s\n",
 	       geneTrack, geneName, snpMisoLinkFromFunc("cds-synonymy-unknown"), refAllele, al);
     }
 }
 
 void printSnp125FunctionInGene(struct snp125 *snp, char *geneTable, char *geneTrack,
 			       struct genePred *gene)
 /* Given a SNP and a gene that overlaps it, say where in the gene it overlaps
  * and if in CDS, say what effect the coding alleles have. */
 {
 int snpStart = snp->chromStart, snpEnd = snp->chromEnd;
 int cdsStart = gene->cdsStart, cdsEnd = gene->cdsEnd;
 boolean geneIsRc = sameString(gene->strand, "-");
 char *geneName = getSymbolForGeneName(geneTable, gene->name);
 int i, iStart = 0, iEnd = gene->exonCount, iIncr = 1;
 if (geneIsRc)
     { iStart = gene->exonCount - 1;  iEnd = -1;  iIncr = -1; }
 for (i = iStart;  i != iEnd;  i += iIncr)
     {
     int exonStart = gene->exonStarts[i], exonEnd = gene->exonEnds[i];
     if (snpEnd > exonStart && snpStart < exonEnd)
 	{
 	if (snpEnd > cdsStart && snpStart < cdsEnd)
 	    printSnp125FunctionInCDS(snp, geneTable, geneTrack, gene, i, geneName);
 	else if (cdsEnd > cdsStart)
 	    {
 	    boolean is5Prime = geneIsRc ^ (snpEnd < cdsStart);
-	    printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName,
+	    printf(firstTwoColumnsPctS "%s\n", geneTrack, geneName,
 		   snpMisoLinkFromFunc((is5Prime) ? "untranslated-5" : "untranslated-3"));
 	    }
 	else
-	    printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName,
+	    printf(firstTwoColumnsPctS "%s\n", geneTrack, geneName,
 		   snpMisoLinkFromFunc("ncRNA"));
 	}
+    // SO term splice_region_variant applies to first/last 3 bases of exon
+    // and first/last 3-8 bases of intron
+    if ((i > 0 && snpStart < exonStart+3 && snpEnd > exonStart) ||
+	(i < gene->exonCount-1 && snpStart < exonEnd && snpEnd > exonEnd-3))
+	printf(", %s", snpMisoLinkFromFunc("splice_region_variant"));
+    puts("</TD></TR>");
     if (i > 0)
 	{
 	int intronStart = gene->exonEnds[i-1], intronEnd = gene->exonStarts[i];
+	if (snpEnd < intronStart || snpStart > intronEnd)
+	    continue;
 	if (snpStart < intronStart+2 && snpEnd > intronStart)
 	    printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName,
 		   snpMisoLinkFromFunc(geneIsRc ? "splice-3" : "splice-5"));
-	else if (snpStart < intronEnd-2 && snpEnd > intronStart+2)
+	else if (snpStart < intronStart+8 && snpEnd > intronStart+2)
+	    printf(firstTwoColumnsPctS "%s, %s</TD></TR>\n", geneTrack, geneName,
+		   snpMisoLinkFromFunc("intron_variant"),
+		   snpMisoLinkFromFunc("splice_region_variant"));
+	else if (snpStart < intronEnd-8 && snpEnd > intronStart+8)
 	    printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName,
 		   snpMisoLinkFromFunc("intron"));
+	else if (snpStart < intronEnd-2 && snpEnd > intronEnd-8)
+	    printf(firstTwoColumnsPctS "%s, %s</TD></TR>\n", geneTrack, geneName,
+		   snpMisoLinkFromFunc("intron_variant"),
+		   snpMisoLinkFromFunc("splice_region_variant"));
 	else if (snpStart < intronEnd && snpEnd > intronEnd-2)
 	    printf(firstTwoColumnsPctS "%s</TD></TR>\n", geneTrack, geneName,
 		   snpMisoLinkFromFunc(geneIsRc ? "splice-5" : "splice-3"));
 	}
     }
 }
 
 void printSnp125NearGenes(struct sqlConnection *conn, struct snp125 *snp, char *geneTable,
 			  char *geneTrack)
 /* Search upstream and downstream of snp for neigh */
 {
 struct sqlResult *sr;
 char query[512];
 char **row;
 int snpStart = snp->chromStart, snpEnd = snp->chromEnd;