e49078c13bb3c9243511050a99f54455db3aa1b8
angie
Fri Aug 12 16:24:48 2016 -0700
Mapping of SO terms to SO IDs was in snp125Ui.c but really should be in soTerm.c. soTerm was updated to include intergenic_variant but snp125Ui was not (HT Christopher Lee). Now snp125Ui still maps dbSNP func terms to SO terms, but the term string to ID mapping is all in soTerm now. refs #17209
diff --git src/hg/lib/snp125Ui.c src/hg/lib/snp125Ui.c
index bbe5956..39af44f 100644
--- src/hg/lib/snp125Ui.c
+++ src/hg/lib/snp125Ui.c
@@ -1,22 +1,23 @@
/* snp125Ui.c - enums & char arrays for snp UI features and shared util code */
/* Copyright (C) 2014 The Regents of the University of California
* See README in this or parent directory for licensing information. */
#include "snp125Ui.h"
#include "snp125.h"
#include "common.h"
+#include "soTerm.h"
char *snp125OrthoTable(struct trackDb *tdb, int *retSpeciesCount)
/* Look for a setting that specifies a table with orthologous alleles.
* If retSpeciesCount is not null, set it to the number of other species
* whose alleles are in the table. Do not free the returned string. */
{
char *table = trackDbSetting(tdb, "chimpMacaqueOrthoTable");
int speciesCount = 2;
if (table == NULL)
{
table = trackDbSetting(tdb, "chimpOrangMacOrthoTable");
speciesCount = 3;
}
if (retSpeciesCount != NULL)
@@ -314,110 +315,101 @@
};
static char *snp125FuncOldIncludeVars[] = {
"snp125FuncUnknownInclude",
"snp125FuncLocusInclude",
"snp125FuncSynonInclude",
"snp125FuncNonSynonInclude",
"snp125FuncUntranslatedInclude",
"snp125FuncIntronInclude",
"snp125FuncSpliceInclude",
"snp125FuncReferenceInclude",
};
int snp125FuncArraySize = ArraySize(snp125FuncLabels);
-// Map func terms (from all snpNNN to date) to Sequence Ontology terms and IDs:
+// Map func terms (from all snpNNN to date) to Sequence Ontology terms:
struct snpFuncSO
{
char *funcTerm; // term found in snpNNN.func
char *soTerm; // corresponding Sequence Ontology term
- char *soId; // corresponding Sequence Ontology accession
};
static struct snpFuncSO snpFuncToSO[] = {
- { "locus", "feature_variant", "SO:0001878" },
- { "locus-region", "feature_variant", "SO:0001878" },
- { "coding", "coding_sequence_variant", "SO:0001580" },
- { "coding-synon", "synonymous_variant", "SO:0001819" },
- { "coding-nonsynon", "protein_altering_variant", "SO:0001818" },
- { "untranslated", "UTR_variant", "SO:0001622" },
- { "mrna-utr", "UTR_variant", "SO:0001622" },
- { "intron", "intron_variant", "SO:0001627" },
- { "splice-site", "splice_site_variant", "SO:0001629" },
- { "cds-reference", "coding_sequence_variant", "SO:0001580" },
- { "cds-synonymy-unknown", "coding_sequence_variant", "SO:0001580" },
- { "near-gene-3", "downstream_gene_variant", "SO:0001632" },
- { "near-gene-5", "upstream_gene_variant", "SO:0001631" },
- { "ncRNA", "nc_transcript_variant", "SO:0001619" },
- { "nonsense", "stop_gained", "SO:0001587" },
- { "missense", "missense_variant", "SO:0001583" },
- { "stop-loss", "stop_lost", "SO:0001578" },
- { "frameshift", "frameshift_variant", "SO:0001589" },
- { "cds-indel", "inframe_indel", "SO:0001820" },
- { "untranslated-3", "3_prime_UTR_variant", "SO:0001624" },
- { "untranslated-5", "5_prime_UTR_variant", "SO:0001623" },
- { "splice-3", "splice_acceptor_variant", "SO:0001574" },
- { "splice-5", "splice_donor_variant", "SO:0001575" },
- // And some that dbSNP doesn't use at this point, but we do, to match Ensembl:
- { "inframe_insertion", "inframe_insertion", "SO:0001821" },
- { "inframe_deletion", "inframe_deletion", "SO:0001822" },
- { "stop_retained_variant", "stop_retained_variant", "SO:0001567" },
- { "splice_region_variant", "splice_region_variant", "SO:0001630" },
- { NULL, NULL, NULL }
-};
-
-static boolean snpSOFromFunc(char *funcTerm, char **retSoTerm, char **retSoId)
-/* Look up snpNNN.func term (or SO term) in static array snpFuncToSO and set
- * corresponding Sequence Ontology term and accession; return TRUE if found. */
+ { "locus", "feature_variant" },
+ { "locus-region", "feature_variant" },
+ { "coding", "coding_sequence_variant" },
+ { "coding-synon", "synonymous_variant" },
+ { "coding-nonsynon", "protein_altering_variant" },
+ { "untranslated", "UTR_variant" },
+ { "mrna-utr", "UTR_variant" },
+ { "intron", "intron_variant" },
+ { "splice-site", "splice_site_variant" },
+ { "cds-reference", "coding_sequence_variant" },
+ { "cds-synonymy-unknown", "coding_sequence_variant" },
+ { "near-gene-3", "downstream_gene_variant" },
+ { "near-gene-5", "upstream_gene_variant" },
+ { "ncRNA", "nc_transcript_variant" },
+ { "nonsense", "stop_gained" },
+ { "missense", "missense_variant" },
+ { "stop-loss", "stop_lost" },
+ { "frameshift", "frameshift_variant" },
+ { "cds-indel", "inframe_indel" },
+ { "untranslated-3", "3_prime_UTR_variant" },
+ { "untranslated-5", "5_prime_UTR_variant" },
+ { "splice-3", "splice_acceptor_variant" },
+ { "splice-5", "splice_donor_variant" },
+ { NULL, NULL }
+};
+
+static char *snpSOFromFunc(char *funcTerm)
+/* Look up snpNNN.func term in static array snpFuncToSO and return SO term if found, else NULL. */
{
if (isEmpty(funcTerm))
- return FALSE;
+ return NULL;
int i;
for (i = 0; snpFuncToSO[i].funcTerm != NULL; i++)
{
- struct snpFuncSO *info = &(snpFuncToSO[i]);
- if (sameString(funcTerm, info->funcTerm) || sameString(funcTerm, info->soTerm))
- {
- if (retSoTerm != NULL)
- *retSoTerm = info->soTerm;
- if (retSoId != NULL)
- *retSoId = info->soId;
- return TRUE;
- }
+ if (sameString(funcTerm, snpFuncToSO[i].funcTerm))
+ return snpFuncToSO[i].soTerm;
}
-return FALSE;
+return NULL;
}
#define MISO_BASE_URL "http://sequenceontology.org/browser/current_release/term/"
char *snpMisoLinkFromFunc(char *funcTerm)
/* If we can map funcTerm to a Sequence Ontology term, return a link to the MISO SO browser;
* otherwise just return the same term. funcTerm may be a comma-separated list of terms. */
{
-char *soId = NULL, *soTerm = NULL;
struct dyString *dy = dyStringNew(256);
char *terms[128];
int termCount = chopCommas(cloneString(funcTerm), terms);
int i;
for (i = 0; i < termCount; i++)
{
if (i > 0)
dyStringAppend(dy, ", ");
- boolean gotSO = snpSOFromFunc(terms[i], &soTerm, &soId);
- if (gotSO)
- dyStringPrintf(dy, "%s", soId, soTerm);
+ char *soTerm = terms[i];
+ int soId = soTermStringToId(soTerm);
+ if (soId < 0)
+ {
+ soTerm = snpSOFromFunc(terms[i]);
+ soId = soTermStringToId(soTerm);
+ }
+ if (soId >= 0)
+ dyStringPrintf(dy, "%s", soId, soTerm);
else
dyStringAppend(dy, terms[i]);
}
return dyStringCannibalize(&dy);
}
/****** LocType related controls *******/
/* Types: unknown, range, exact, between,
rangeInsertion, rangeSubstitution, rangeDeletion */
char *snp125LocTypeLabels[] = {
"Unknown",
"Range",
"Exact",
"Between",