9a5c1dc41298119f9569b12985a34146212dfe1a angie Wed Apr 10 09:36:38 2019 -0700 Added soTermStringIdToId, soTermCmp, and soTermToMisoLink, for use with new dbSNP data. refs #23283 diff --git src/hg/lib/soTerm.c src/hg/lib/soTerm.c index a37b578..2dcb507 100644 --- src/hg/lib/soTerm.c +++ src/hg/lib/soTerm.c @@ -1,23 +1,27 @@ /* soTerm - Sequence Ontology terms that we use for compatibility with Ensembl & others. */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" +#include "dystring.h" #include "soTerm.h" +#define SO_PREFIX "SO:" +#define MISO_URL_BASE "http://www.sequenceontology.org/browser/current_svn/term/" + struct soStringToId // Map SO term string name to int ID { char *term; // string name for term, e.g. "stop_lost" enum soTerm id; // integer ID for term, e.g. 1578 for "SO:0001578" }; static struct soStringToId soStringToId[] = { { "regulatory_region_variant", regulatory_region_variant }, { "stop_retained_variant", stop_retained_variant }, { "exon_loss_variant", exon_loss_variant }, { "splice_acceptor_variant", splice_acceptor_variant }, { "splice_donor_variant", splice_donor_variant }, { "complex_transcript_variant", complex_transcript_variant }, { "stop_lost", stop_lost }, @@ -67,15 +71,111 @@ } int soTermStringToId(char *soTermStr) /* Translate soTermStr into its numeric ID. Return -1 if soTermStr is not recognized. */ { if (isEmpty(soTermStr)) return -1; int i; for (i = 0; soStringToId[i].term != NULL; i++) { if (sameString(soTermStr, soStringToId[i].term)) return soStringToId[i].id; } return -1; } + +enum soTerm soTermStringIdToId(char *soIdStr) +/* Given a string like "SO:0001627", parse out the numeric ID and convert to enum soTerm. */ +{ +int id = 0; +if (startsWith(SO_PREFIX, soIdStr)) + { + char *numPart = soIdStr + strlen(SO_PREFIX); + if (!isAllDigits(numPart)) + errAbort("soTermStringToId: expected '"SO_PREFIX"' followed by a number, but got '%s'", + soIdStr); + else + id = atoi(numPart); + } +else + errAbort("soTermStringToId: expected string starting with '"SO_PREFIX"' but got '%s'", soIdStr); +return (enum soTerm)id; +} + +char *soTermToMisoLink(enum soTerm term) +/* Return an HTML <a> link to the MISO browser page for term + * (except if it's soUnknown, just return text not a link). */ +{ +char *link = NULL; +if (term == soUnknown) + link = cloneString(soTermToString(term)); +else + { + struct dyString *dy = dyStringCreate("<a href='" MISO_URL_BASE SO_PREFIX "%07d' " + "target=_blank>%s</a>", term, soTermToString(term)); + link = dyStringCannibalize(&dy); + } +return link; +} + +// Ranking of functional impact, highest first, adapted from recommendations in +// http://snpeff.sourceforge.net/VCFannotationformat_v1.0.pdf : +enum soTerm funcImpactOrder[] = + { + transcript_ablation, + exon_loss_variant, + frameshift_variant, + stop_gained, + stop_lost, + splice_acceptor_variant, + splice_donor_variant, + splice_site_variant, + missense_variant, + inframe_insertion, + inframe_deletion, + inframe_indel, + splice_region_variant, + initiator_codon_variant, + protein_altering_variant, + synonymous_variant, + incomplete_terminal_codon_variant, + stop_retained_variant, + coding_sequence_variant, + _5_prime_UTR_variant, + _3_prime_UTR_variant, + UTR_variant, + complex_transcript_variant, + upstream_gene_variant, + downstream_gene_variant, + TF_binding_site_variant, + regulatory_region_variant, + mature_miRNA_variant, + feature_variant, + intron_variant, + intergenic_variant, + non_coding_transcript_exon_variant, + nc_transcript_variant, + NMD_transcript_variant, + no_sequence_alteration, + }; + +INLINE int getFuncImpactRank(enum soTerm id) +/* Return the index of id in a sequence ordered by descending functional impact. */ +{ +int i; +for (i = 0; i < ArraySize(funcImpactOrder); i++) + if (id == funcImpactOrder[i]) + return i; +errAbort("soTermCmp: unrecognized soTerm SO:%d", id); +return -1; +} + +int soTermCmp(const void *a, const void *b) +/* Compare two enum soTerms for sorting by descending order of functional impact. */ +{ +enum soTerm id1 = *(enum soTerm *)a; +enum soTerm id2 = *(enum soTerm *)b; +int impact1 = getFuncImpactRank(id1); +int impact2 = getFuncImpactRank(id2); +return impact1 - impact2; +}