9a5c1dc41298119f9569b12985a34146212dfe1a
angie
  Wed Apr 10 09:36:38 2019 -0700
Added soTermStringIdToId, soTermCmp, and soTermToMisoLink, for use with new dbSNP data.  refs #23283

diff --git src/hg/lib/soTerm.c src/hg/lib/soTerm.c
index a37b578..2dcb507 100644
--- src/hg/lib/soTerm.c
+++ src/hg/lib/soTerm.c
@@ -1,23 +1,27 @@
 /* soTerm - Sequence Ontology terms that we use for compatibility with Ensembl & others. */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "common.h"
+#include "dystring.h"
 #include "soTerm.h"
 
+#define SO_PREFIX "SO:"
+#define MISO_URL_BASE "http://www.sequenceontology.org/browser/current_svn/term/"
+
 struct soStringToId
 // Map SO term string name to int ID
     {
     char *term;      // string name for term, e.g. "stop_lost"
     enum soTerm id;  // integer ID for term, e.g. 1578 for "SO:0001578"
     };
 
 static struct soStringToId soStringToId[] = {
     { "regulatory_region_variant", regulatory_region_variant },
     { "stop_retained_variant", stop_retained_variant },
     { "exon_loss_variant", exon_loss_variant },
     { "splice_acceptor_variant", splice_acceptor_variant },
     { "splice_donor_variant", splice_donor_variant },
     { "complex_transcript_variant", complex_transcript_variant },
     { "stop_lost", stop_lost },
@@ -67,15 +71,111 @@
 }
 
 int soTermStringToId(char *soTermStr)
 /* Translate soTermStr into its numeric ID.  Return -1 if soTermStr is not recognized. */
 {
 if (isEmpty(soTermStr))
     return -1;
 int i;
 for (i = 0;  soStringToId[i].term != NULL;  i++)
     {
     if (sameString(soTermStr, soStringToId[i].term))
         return soStringToId[i].id;
     }
 return -1;
 }
+
+enum soTerm soTermStringIdToId(char *soIdStr)
+/* Given a string like "SO:0001627", parse out the numeric ID and convert to enum soTerm. */
+{
+int id = 0;
+if (startsWith(SO_PREFIX, soIdStr))
+    {
+    char *numPart = soIdStr + strlen(SO_PREFIX);
+    if (!isAllDigits(numPart))
+        errAbort("soTermStringToId: expected '"SO_PREFIX"' followed by a number, but got '%s'",
+                 soIdStr);
+    else
+        id = atoi(numPart);
+    }
+else
+    errAbort("soTermStringToId: expected string starting with '"SO_PREFIX"' but got '%s'", soIdStr);
+return (enum soTerm)id;
+}
+
+char *soTermToMisoLink(enum soTerm term)
+/* Return an HTML <a> link to the MISO browser page for term
+ * (except if it's soUnknown, just return text not a link). */
+{
+char *link = NULL;
+if (term == soUnknown)
+    link = cloneString(soTermToString(term));
+else
+    {
+    struct dyString *dy = dyStringCreate("<a href='" MISO_URL_BASE SO_PREFIX "%07d' "
+                                         "target=_blank>%s</a>", term, soTermToString(term));
+    link = dyStringCannibalize(&dy);
+    }
+return link;
+}
+
+// Ranking of functional impact, highest first, adapted from recommendations in
+// http://snpeff.sourceforge.net/VCFannotationformat_v1.0.pdf :
+enum soTerm funcImpactOrder[] =
+    {
+    transcript_ablation,
+    exon_loss_variant,
+    frameshift_variant,
+    stop_gained,
+    stop_lost,
+    splice_acceptor_variant,
+    splice_donor_variant,
+    splice_site_variant,
+    missense_variant,
+    inframe_insertion,
+    inframe_deletion,
+    inframe_indel,
+    splice_region_variant,
+    initiator_codon_variant,
+    protein_altering_variant,
+    synonymous_variant,
+    incomplete_terminal_codon_variant,
+    stop_retained_variant,
+    coding_sequence_variant,
+    _5_prime_UTR_variant,
+    _3_prime_UTR_variant,
+    UTR_variant,
+    complex_transcript_variant,
+    upstream_gene_variant,
+    downstream_gene_variant,
+    TF_binding_site_variant,
+    regulatory_region_variant,
+    mature_miRNA_variant,
+    feature_variant,
+    intron_variant,
+    intergenic_variant,
+    non_coding_transcript_exon_variant,
+    nc_transcript_variant,
+    NMD_transcript_variant,
+    no_sequence_alteration,
+    };
+
+INLINE int getFuncImpactRank(enum soTerm id)
+/* Return the index of id in a sequence ordered by descending functional impact. */
+{
+int i;
+for (i = 0;  i < ArraySize(funcImpactOrder);  i++)
+    if (id == funcImpactOrder[i])
+        return i;
+errAbort("soTermCmp: unrecognized soTerm SO:%d", id);
+return -1;
+}
+
+int soTermCmp(const void *a, const void *b)
+/* Compare two enum soTerms for sorting by descending order of functional impact. */
+{
+enum soTerm id1 = *(enum soTerm *)a;
+enum soTerm id2 = *(enum soTerm *)b;
+int impact1 = getFuncImpactRank(id1);
+int impact2 = getFuncImpactRank(id2);
+return impact1 - impact2;
+}