4bb621ce7eb13b685bf5c9fde408220abda8d35e angie Wed Aug 24 12:08:48 2016 -0700 Jonathan noticed in code review that I threw away some SO terms that appeared only in snp125Ui.c in e49078c1, and also suggested using a single mapping table instead of duplicating the mappings in soTerm.c. Better now. refs #17897, #17209 Also corrected a SO term: non_coding_exon_variant --> non_coding_transcript_exon_variant. diff --git src/hg/lib/soTerm.c src/hg/lib/soTerm.c index 6563aee..aed1341 100644 --- src/hg/lib/soTerm.c +++ src/hg/lib/soTerm.c @@ -1,102 +1,79 @@ /* soTerm - Sequence Ontology terms that we use for compatibility with Ensembl & others. */ /* Copyright (C) 2014 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "soTerm.h" -char *soTermToString(enum soTerm termNumber) -/* Translate termNumber to its string equivalent. Do not modify or free result. */ -{ -switch (termNumber) - { - case regulatory_region_variant : return "regulatory_region_variant"; break; - case stop_retained_variant : return "stop_retained_variant"; break; - case exon_loss : return "exon_loss"; break; - case splice_acceptor_variant : return "splice_acceptor_variant"; break; - case splice_donor_variant : return "splice_donor_variant"; break; - case complex_transcript_variant : return "complex_transcript_variant"; break; - case stop_lost : return "stop_lost"; break; - case coding_sequence_variant : return "coding_sequence_variant"; break; - case initiator_codon_variant : return "initiator_codon_variant"; break; - case missense_variant : return "missense_variant"; break; - case stop_gained : return "stop_gained"; break; - case frameshift_variant : return "frameshift_variant"; break; - case nc_transcript_variant : return "nc_transcript_variant"; break; - case mature_miRNA_variant : return "mature_miRNA_variant"; break; - case NMD_transcript_variant : return "NMD_transcript_variant"; break; - case _5_prime_UTR_variant : return "5_prime_UTR_variant"; break; - case _3_prime_UTR_variant : return "3_prime_UTR_variant"; break; - case incomplete_terminal_codon_variant : return "incomplete_terminal_codon_variant"; break; - case intron_variant : return "intron_variant"; break; - case intergenic_variant : return "intergenic_variant"; break; - case splice_region_variant : return "splice_region_variant"; break; - case upstream_gene_variant : return "upstream_gene_variant"; break; - case downstream_gene_variant : return "downstream_gene_variant"; break; - case TF_binding_site_variant : return "TF_binding_site_variant"; break; - case non_coding_exon_variant : return "non_coding_transcript_exon_variant"; break; - case protein_altering_variant : return "protein_altering_variant"; break; - case synonymous_variant : return "synonymous_variant"; break; - case inframe_deletion : return "inframe_deletion"; break; - case inframe_insertion : return "inframe_insertion"; break; - default: - errAbort("soTermToString: don't recognize term %u", termNumber); - } -return "ERROR"; // never get here -} - -// Map term strings back to integer IDs: -struct strId +struct soStringToId +// Map SO term string name to int ID { - char *str; - int id; + char *term; // string name for term, e.g. "stop_lost" + enum soTerm id; // integer ID for term, e.g. 1578 for "SO:0001578" }; -static struct strId strToId[] = - { +static struct soStringToId soStringToId[] = { { "regulatory_region_variant", regulatory_region_variant }, { "stop_retained_variant", stop_retained_variant }, { "exon_loss", exon_loss }, { "splice_acceptor_variant", splice_acceptor_variant }, { "splice_donor_variant", splice_donor_variant }, { "complex_transcript_variant", complex_transcript_variant }, { "stop_lost", stop_lost }, { "coding_sequence_variant", coding_sequence_variant }, { "initiator_codon_variant", initiator_codon_variant }, { "missense_variant", missense_variant }, { "stop_gained", stop_gained }, { "frameshift_variant", frameshift_variant }, { "nc_transcript_variant", nc_transcript_variant }, { "mature_miRNA_variant", mature_miRNA_variant }, { "NMD_transcript_variant", NMD_transcript_variant }, + { "UTR_variant", UTR_variant }, { "5_prime_UTR_variant", _5_prime_UTR_variant }, { "3_prime_UTR_variant", _3_prime_UTR_variant }, { "incomplete_terminal_codon_variant", incomplete_terminal_codon_variant }, { "intron_variant", intron_variant }, { "intergenic_variant", intergenic_variant }, + { "splice_site_variant", splice_site_variant }, { "splice_region_variant", splice_region_variant }, { "upstream_gene_variant", upstream_gene_variant }, { "downstream_gene_variant", downstream_gene_variant }, { "TF_binding_site_variant", TF_binding_site_variant }, - { "non_coding_exon_variant", non_coding_exon_variant }, + { "non_coding_transcript_exon_variant", non_coding_transcript_exon_variant }, { "protein_altering_variant", protein_altering_variant }, { "synonymous_variant", synonymous_variant }, + { "inframe_indel", inframe_indel }, { "inframe_insertion", inframe_insertion }, { "inframe_deletion", inframe_deletion }, + { "feature_variant", feature_variant }, { NULL, 0 } }; +char *soTermToString(enum soTerm termNumber) +/* Translate termNumber to its string equivalent; errAbort if not found. + * Do not modify or free result. */ +{ +int i; +for (i = 0; soStringToId[i].term != NULL; i++) + { + if (termNumber == soStringToId[i].id) + return soStringToId[i].term; + } +errAbort("soTermToString: don't recognize term %u", termNumber); +return NULL; // never get here +} + int soTermStringToId(char *soTermStr) /* Translate soTermStr into its numeric ID. Return -1 if soTermStr is not recognized. */ { if (isEmpty(soTermStr)) return -1; int i; -for (i = 0; strToId[i].str != NULL; i++) +for (i = 0; soStringToId[i].term != NULL; i++) { - if (sameString(soTermStr, strToId[i].str)) - return strToId[i].id; + if (sameString(soTermStr, soStringToId[i].term)) + return soStringToId[i].id; } return -1; }