4dcb41c179c6fae38abffb52dd2b764b74301c7c
angie
  Mon Jul 30 16:39:17 2012 -0700
Feature #8551 (Sequence Ontology terms for predicted functional effects on SNP details pages):at Ensembl's request, instead of displaying dbSNP's predicted function terms, instead show
terms from the Sequence Ontology (SO).  Where possible, terms from Ensembl's list at
http://staging.ensembl.org/info/docs/variation/predicted_data.html are used.

diff --git src/hg/lib/snp125Ui.c src/hg/lib/snp125Ui.c
index e30e03a..e43c91e 100644
--- src/hg/lib/snp125Ui.c
+++ src/hg/lib/snp125Ui.c
@@ -275,60 +275,144 @@
     "blue",   // untranslated
     "black",  // intron
     "red",    // splice-site
     "black",  // cds-reference
 };
 
 /* NCBI has added some new, more specific function types that map onto 
  * pre-existing simpler function classes.  This mapping is an array of 
  * arrays, each of which has the simpler type (from snp125FuncDataName
  * above) followed by more specific subtypes, if any.  All arrays are
  * NULL-terminated. */
 static char *locusSyn[] =
     {"locus",		"gene-segment", "near-gene-3", "near-gene-5", NULL};
 static char *nonsynonSyn[] =
     {"coding-nonsynon",	"nonsense", "missense", "frameshift", "stop-loss", "cds-indel",
-     "coding-synonymy-unknown", NULL};
+     "coding-synonymy-unknown", "cds-synonymy-unknown", NULL};
 static char *untranslatedSyn[] =
     {"untranslated",	"untranslated-3", "untranslated-5", NULL};
 static char *spliceSyn[] =
     {"splice-site",	"splice-3", "splice-5", NULL};
 static char *cdsRefSyn[] =
     {"cds-reference",	"coding",
      NULL};
 char **snp125FuncDataSynonyms[] = {
     locusSyn,
     nonsynonSyn,
     untranslatedSyn,
     spliceSyn,
     cdsRefSyn,
     NULL
 };
 
 static char *snp125FuncOldIncludeVars[] = {
     "snp125FuncUnknownInclude",
     "snp125FuncLocusInclude",
     "snp125FuncSynonInclude",
     "snp125FuncNonSynonInclude",
     "snp125FuncUntranslatedInclude",
     "snp125FuncIntronInclude",
     "snp125FuncSpliceInclude",
     "snp125FuncReferenceInclude",
 };
 
 int snp125FuncArraySize   = ArraySize(snp125FuncLabels);
 
+// Map func terms (from all snpNNN to date) to Sequence Ontology terms and IDs:
+struct snpFuncSO
+    {
+    char *funcTerm;	// term found in snpNNN.func
+    char *soTerm;	// corresponding Sequence Ontology term
+    char *soId;		// corresponding Sequence Ontology accession
+    };
+
+static struct snpFuncSO snpFuncToSO[] = {
+    { "locus", "feature_variant", "SO:0001878" },
+    { "locus-region", "feature_variant", "SO:0001878" },
+    { "coding", "coding_sequence_variant", "SO:0001580" },
+    { "coding-synon", "synonymous_variant", "SO:0001819" },
+    { "coding-nonsynon", "protein_altering_variant", "SO:0001818" },
+    { "untranslated", "UTR_variant", "SO:0001622" },
+    { "mrna-utr", "UTR_variant", "SO:0001622" },
+    { "intron", "intron_variant", "SO:0001627" },
+    { "splice-site", "splice_site_variant", "SO:0001629" },
+    { "cds-reference", "coding_sequence_variant", "SO:0001580" },
+    { "cds-synonymy-unknown", "coding_sequence_variant", "SO:0001580" },
+    { "near-gene-3", "downstream_gene_variant", "SO:0001632" },
+    { "near-gene-5", "upstream_gene_variant", "SO:0001631" },
+    { "ncRNA", "nc_transcript_variant", "SO:0001619" },
+    { "nonsense", "stop_gained", "SO:0001587" },
+    { "missense", "missense_variant", "SO:0001583" },
+    { "stop-loss", "stop_lost", "SO:0001578" },
+    { "frameshift", "frameshift_variant", "SO:0001589" },
+    { "cds-indel", "inframe_indel", "SO:0001820" },
+    { "untranslated-3", "3_prime_UTR_variant", "SO:0001624" },
+    { "untranslated-5", "5_prime_UTR_variant", "SO:0001623" },
+    { "splice-3", "splice_acceptor_variant", "SO:0001574" },
+    { "splice-5", "splice_donor_variant", "SO:0001575" },
+    // And some that dbSNP doesn't use at this point, but we do, to match Ensembl:
+    { "inframe_insertion", "inframe_insertion", "SO:0001821" },
+    { "inframe_deletion", "inframe_deletion", "SO:0001822" },
+    { "stop_retained_variant", "stop_retained_variant", "SO:0001567" },
+    { NULL, NULL, NULL }
+};
+
+static boolean snpSOFromFunc(char *funcTerm, char **retSoTerm, char **retSoId)
+/* Look up snpNNN.func term (or SO term) in static array snpFuncToSO and set
+ * corresponding Sequence Ontology term and accession; return TRUE if found. */
+{
+if (isEmpty(funcTerm))
+    return FALSE;
+int i;
+for (i = 0;  snpFuncToSO[i].funcTerm != NULL;  i++)
+    {
+    struct snpFuncSO *info = &(snpFuncToSO[i]);
+    if (sameString(funcTerm, info->funcTerm) || sameString(funcTerm, info->soTerm))
+	{
+	if (retSoTerm != NULL)
+	    *retSoTerm = info->soTerm;
+	if (retSoId != NULL)
+	    *retSoId = info->soId;
+	return TRUE;
+	}
+    }
+return FALSE;
+}
+
+#define MISO_BASE_URL "http://sequenceontology.org/browser/current_release/term/"
+
+char *snpMisoLinkFromFunc(char *funcTerm)
+/* If we can map funcTerm to a Sequence Ontology term, return a link to the MISO SO browser;
+ * otherwise just return the same term. funcTerm may be a comma-separated list of terms. */
+{
+char *soId = NULL, *soTerm = NULL;
+struct dyString *dy = dyStringNew(256);
+char *terms[128];
+int termCount = chopCommas(cloneString(funcTerm), terms);
+int i;
+for (i = 0;  i < termCount;  i++)
+    {
+    if (i > 0)
+	dyStringAppend(dy, ", ");
+    boolean gotSO = snpSOFromFunc(terms[i], &soTerm, &soId);
+    if (gotSO)
+	dyStringPrintf(dy, "<A HREF=\""MISO_BASE_URL"%s\" TARGET=_BLANK>%s</A>", soId, soTerm);
+    else
+	dyStringAppend(dy, terms[i]);
+    }
+return dyStringCannibalize(&dy);
+}
 
 /****** LocType related controls *******/
 /* Types: unknown, range, exact, between,
           rangeInsertion, rangeSubstitution, rangeDeletion */
 
 char *snp125LocTypeLabels[] = {
     "Unknown",
     "Range",
     "Exact",
     "Between",
     "RangeInsertion",
     "RangeSubstitution",
     "RangeDeletion",
 };
 char *snp125LocTypeOldColorVars[] = {
@@ -381,78 +465,84 @@
     "FlankMismatchGenomeEqual",
     "FlankMismatchGenomeShorter",
     "NamedDeletionZeroSpan",
     "NamedInsertionNonzeroSpan",
     "SingleClassLongerSpan",
     "SingleClassZeroSpan",
     "SingleClassTriAllelic",
     "SingleClassQuadAllelic",
     "ObservedWrongFormat",
     "ObservedTooLong",
     "ObservedContainsIupac",
     "ObservedMismatch",
     "MultipleAlignments",
     "NonIntegerChromCount",
     "AlleleFreqSumNot1",
+    "SingleAlleleFreq",
+    "InconsistentAlleles",
 };
 
 char *snp132ExceptionVarName[] = {
     "NoExceptions",
     "RefAlleleMismatch",
     "RefAlleleRevComp",
     "DuplicateObserved",
     "MixedObserved",
     "FlankMismatchGenomeLonger",
     "FlankMismatchGenomeEqual",
     "FlankMismatchGenomeShorter",
     "NamedDeletionZeroSpan",
     "NamedInsertionNonzeroSpan",
     "SingleClassLongerSpan",
     "SingleClassZeroSpan",
     "SingleClassTriAllelic",
     "SingleClassQuadAllelic",
     "ObservedWrongFormat",
     "ObservedTooLong",
     "ObservedContainsIupac",
     "ObservedMismatch",
     "MultipleAlignments",
     "NonIntegerChromCount",
     "AlleleFreqSumNot1",
+    "SingleAlleleFreq",
+    "InconsistentAlleles",
 };
 
 char *snp132ExceptionDefault[] = {
     "black",	// NoExceptions
     "red",	// RefAlleleMismatch
     "red",	// RefAlleleRevComp
     "red",	// DuplicateObserved
     "red",	// MixedObserved
     "red",	// FlankMismatchGenomeLonger
     "red",	// FlankMismatchGenomeEqual
     "red",	// FlankMismatchGenomeShorter
     "red",	// NamedDeletionZeroSpan
     "red",	// NamedInsertionNonzeroSpan
     "red",	// SingleClassLongerSpan
     "red",	// SingleClassZeroSpan
     "gray",	// SingleClassTriAllelic
     "gray",	// SingleClassQuadAllelic
     "red",	// ObservedWrongFormat
     "gray",	// ObservedTooLong
     "gray",	// ObservedContainsIupac
     "red",	// ObservedMismatch
     "red",	// MultipleAlignments
     "gray",	// NonIntegerChromCount
     "gray",	// AlleleFreqSumNot1
+    "gray",	// SingleAlleleFreq
+    "gray",	// InconsistentAlleles
 };
 
 int snp132ExceptionArraySize = ArraySize(snp132ExceptionLabels);
 
 /****** Miscellaneous attributes (dbSNP's bitfields) related controls *******/
 
 char *snp132BitfieldLabels[] = {
     "None",
     "Clinically Associated",
     "MAF >= 5% in Some Population",
     "MAF >= 5% in All Populations",
     "Appears in OMIM/OMIA",
     "Has Microattribution/Third-Party Annotation",
     "Submitted by Locus-Specific Database",
     "Genotype Conflict",