4dcb41c179c6fae38abffb52dd2b764b74301c7c angie Mon Jul 30 16:39:17 2012 -0700 Feature #8551 (Sequence Ontology terms for predicted functional effects on SNP details pages):at Ensembl's request, instead of displaying dbSNP's predicted function terms, instead show terms from the Sequence Ontology (SO). Where possible, terms from Ensembl's list at http://staging.ensembl.org/info/docs/variation/predicted_data.html are used. diff --git src/hg/lib/snp125Ui.c src/hg/lib/snp125Ui.c index e30e03a..e43c91e 100644 --- src/hg/lib/snp125Ui.c +++ src/hg/lib/snp125Ui.c @@ -275,60 +275,144 @@ "blue", // untranslated "black", // intron "red", // splice-site "black", // cds-reference }; /* NCBI has added some new, more specific function types that map onto * pre-existing simpler function classes. This mapping is an array of * arrays, each of which has the simpler type (from snp125FuncDataName * above) followed by more specific subtypes, if any. All arrays are * NULL-terminated. */ static char *locusSyn[] = {"locus", "gene-segment", "near-gene-3", "near-gene-5", NULL}; static char *nonsynonSyn[] = {"coding-nonsynon", "nonsense", "missense", "frameshift", "stop-loss", "cds-indel", - "coding-synonymy-unknown", NULL}; + "coding-synonymy-unknown", "cds-synonymy-unknown", NULL}; static char *untranslatedSyn[] = {"untranslated", "untranslated-3", "untranslated-5", NULL}; static char *spliceSyn[] = {"splice-site", "splice-3", "splice-5", NULL}; static char *cdsRefSyn[] = {"cds-reference", "coding", NULL}; char **snp125FuncDataSynonyms[] = { locusSyn, nonsynonSyn, untranslatedSyn, spliceSyn, cdsRefSyn, NULL }; static char *snp125FuncOldIncludeVars[] = { "snp125FuncUnknownInclude", "snp125FuncLocusInclude", "snp125FuncSynonInclude", "snp125FuncNonSynonInclude", "snp125FuncUntranslatedInclude", "snp125FuncIntronInclude", "snp125FuncSpliceInclude", "snp125FuncReferenceInclude", }; int snp125FuncArraySize = ArraySize(snp125FuncLabels); +// Map func terms (from all snpNNN to date) to Sequence Ontology terms and IDs: +struct snpFuncSO + { + char *funcTerm; // term found in snpNNN.func + char *soTerm; // corresponding Sequence Ontology term + char *soId; // corresponding Sequence Ontology accession + }; + +static struct snpFuncSO snpFuncToSO[] = { + { "locus", "feature_variant", "SO:0001878" }, + { "locus-region", "feature_variant", "SO:0001878" }, + { "coding", "coding_sequence_variant", "SO:0001580" }, + { "coding-synon", "synonymous_variant", "SO:0001819" }, + { "coding-nonsynon", "protein_altering_variant", "SO:0001818" }, + { "untranslated", "UTR_variant", "SO:0001622" }, + { "mrna-utr", "UTR_variant", "SO:0001622" }, + { "intron", "intron_variant", "SO:0001627" }, + { "splice-site", "splice_site_variant", "SO:0001629" }, + { "cds-reference", "coding_sequence_variant", "SO:0001580" }, + { "cds-synonymy-unknown", "coding_sequence_variant", "SO:0001580" }, + { "near-gene-3", "downstream_gene_variant", "SO:0001632" }, + { "near-gene-5", "upstream_gene_variant", "SO:0001631" }, + { "ncRNA", "nc_transcript_variant", "SO:0001619" }, + { "nonsense", "stop_gained", "SO:0001587" }, + { "missense", "missense_variant", "SO:0001583" }, + { "stop-loss", "stop_lost", "SO:0001578" }, + { "frameshift", "frameshift_variant", "SO:0001589" }, + { "cds-indel", "inframe_indel", "SO:0001820" }, + { "untranslated-3", "3_prime_UTR_variant", "SO:0001624" }, + { "untranslated-5", "5_prime_UTR_variant", "SO:0001623" }, + { "splice-3", "splice_acceptor_variant", "SO:0001574" }, + { "splice-5", "splice_donor_variant", "SO:0001575" }, + // And some that dbSNP doesn't use at this point, but we do, to match Ensembl: + { "inframe_insertion", "inframe_insertion", "SO:0001821" }, + { "inframe_deletion", "inframe_deletion", "SO:0001822" }, + { "stop_retained_variant", "stop_retained_variant", "SO:0001567" }, + { NULL, NULL, NULL } +}; + +static boolean snpSOFromFunc(char *funcTerm, char **retSoTerm, char **retSoId) +/* Look up snpNNN.func term (or SO term) in static array snpFuncToSO and set + * corresponding Sequence Ontology term and accession; return TRUE if found. */ +{ +if (isEmpty(funcTerm)) + return FALSE; +int i; +for (i = 0; snpFuncToSO[i].funcTerm != NULL; i++) + { + struct snpFuncSO *info = &(snpFuncToSO[i]); + if (sameString(funcTerm, info->funcTerm) || sameString(funcTerm, info->soTerm)) + { + if (retSoTerm != NULL) + *retSoTerm = info->soTerm; + if (retSoId != NULL) + *retSoId = info->soId; + return TRUE; + } + } +return FALSE; +} + +#define MISO_BASE_URL "http://sequenceontology.org/browser/current_release/term/" + +char *snpMisoLinkFromFunc(char *funcTerm) +/* If we can map funcTerm to a Sequence Ontology term, return a link to the MISO SO browser; + * otherwise just return the same term. funcTerm may be a comma-separated list of terms. */ +{ +char *soId = NULL, *soTerm = NULL; +struct dyString *dy = dyStringNew(256); +char *terms[128]; +int termCount = chopCommas(cloneString(funcTerm), terms); +int i; +for (i = 0; i < termCount; i++) + { + if (i > 0) + dyStringAppend(dy, ", "); + boolean gotSO = snpSOFromFunc(terms[i], &soTerm, &soId); + if (gotSO) + dyStringPrintf(dy, "<A HREF=\""MISO_BASE_URL"%s\" TARGET=_BLANK>%s</A>", soId, soTerm); + else + dyStringAppend(dy, terms[i]); + } +return dyStringCannibalize(&dy); +} /****** LocType related controls *******/ /* Types: unknown, range, exact, between, rangeInsertion, rangeSubstitution, rangeDeletion */ char *snp125LocTypeLabels[] = { "Unknown", "Range", "Exact", "Between", "RangeInsertion", "RangeSubstitution", "RangeDeletion", }; char *snp125LocTypeOldColorVars[] = { @@ -381,78 +465,84 @@ "FlankMismatchGenomeEqual", "FlankMismatchGenomeShorter", "NamedDeletionZeroSpan", "NamedInsertionNonzeroSpan", "SingleClassLongerSpan", "SingleClassZeroSpan", "SingleClassTriAllelic", "SingleClassQuadAllelic", "ObservedWrongFormat", "ObservedTooLong", "ObservedContainsIupac", "ObservedMismatch", "MultipleAlignments", "NonIntegerChromCount", "AlleleFreqSumNot1", + "SingleAlleleFreq", + "InconsistentAlleles", }; char *snp132ExceptionVarName[] = { "NoExceptions", "RefAlleleMismatch", "RefAlleleRevComp", "DuplicateObserved", "MixedObserved", "FlankMismatchGenomeLonger", "FlankMismatchGenomeEqual", "FlankMismatchGenomeShorter", "NamedDeletionZeroSpan", "NamedInsertionNonzeroSpan", "SingleClassLongerSpan", "SingleClassZeroSpan", "SingleClassTriAllelic", "SingleClassQuadAllelic", "ObservedWrongFormat", "ObservedTooLong", "ObservedContainsIupac", "ObservedMismatch", "MultipleAlignments", "NonIntegerChromCount", "AlleleFreqSumNot1", + "SingleAlleleFreq", + "InconsistentAlleles", }; char *snp132ExceptionDefault[] = { "black", // NoExceptions "red", // RefAlleleMismatch "red", // RefAlleleRevComp "red", // DuplicateObserved "red", // MixedObserved "red", // FlankMismatchGenomeLonger "red", // FlankMismatchGenomeEqual "red", // FlankMismatchGenomeShorter "red", // NamedDeletionZeroSpan "red", // NamedInsertionNonzeroSpan "red", // SingleClassLongerSpan "red", // SingleClassZeroSpan "gray", // SingleClassTriAllelic "gray", // SingleClassQuadAllelic "red", // ObservedWrongFormat "gray", // ObservedTooLong "gray", // ObservedContainsIupac "red", // ObservedMismatch "red", // MultipleAlignments "gray", // NonIntegerChromCount "gray", // AlleleFreqSumNot1 + "gray", // SingleAlleleFreq + "gray", // InconsistentAlleles }; int snp132ExceptionArraySize = ArraySize(snp132ExceptionLabels); /****** Miscellaneous attributes (dbSNP's bitfields) related controls *******/ char *snp132BitfieldLabels[] = { "None", "Clinically Associated", "MAF >= 5% in Some Population", "MAF >= 5% in All Populations", "Appears in OMIM/OMIA", "Has Microattribution/Third-Party Annotation", "Submitted by Locus-Specific Database", "Genotype Conflict",