23e3f7b1a4c78acd2198da73f4a4392290c51736 angie Fri Apr 19 16:12:24 2013 -0700 Added detection of many new SO terms to gpFx: {5,3} UTR, splice_{donor,acceptor},non_coding_exon, and very detailed coding change terms to match Ensembl e.g. stop_retained_variant, NMD_transcript_variant). Also tweaks to annoFormatVep to represent insertion positions the way they do. TODO: lots more testing! and convert gpFx to use localmem. refs #6152 diff --git src/hg/inc/soTerm.h src/hg/inc/soTerm.h index ce5b21e..befac04 100644 --- src/hg/inc/soTerm.h +++ src/hg/inc/soTerm.h @@ -1,16 +1,19 @@ -/* soTerm.h --- Sequence Ontology terms and supporting data structures */ +/* soTerm.h --- a subset of locally used Sequence Ontology terms */ + +// To explore the full tree of SO terms and their relationships, try the MISO browser: +// http://www.sequenceontology.org/browser/obob.cgi // would be nice to more closely match Ensembl's subset of SO: // http://uswest.ensembl.org/info/docs/variation/predicted_data.html#consequences #ifndef SOTERM_H #define SOTERM_H enum soTerm // the various variant effects { regulatory_region_variant=1566, stop_retained_variant=1567, splice_acceptor_variant=1574, splice_donor_variant=1575, complex_transcript_variant=1577, stop_lost=1578, @@ -26,57 +29,19 @@ _3_prime_UTR_variant=1624, incomplete_terminal_codon_variant=1626, intron_variant=1627, intergenic_variant=1628, splice_region_variant=1630, upstream_gene_variant=1631, downstream_gene_variant=1632, TF_binding_site_variant=1782, non_coding_exon_variant=1792, protein_altering_variant=1818, synonymous_variant=1819, inframe_insertion=1821, inframe_deletion=1822, }; -struct soCall // a single variant effect call - { - struct soCall *next; - char *transcript; // ID of feature affected by this call - uint soNumber; // Sequence Ontology Number of effect - union - { - struct codingChange // (non)synonymous variant, deletions in CDS - { - uint exonNumber; - - // the next three should have a length specified too - uint cDnaPosition; // offset of variant in transcript cDNA - uint cdsPosition; // offset of variant from transcript's cds start - uint pepPosition; // offset of variant in translated product - char *aaOld; // peptides, before change by variant (starting at pepPos) - char *aaNew; // peptides, changed by variant - char *codonOld; // codons, before change by variant (starting at cdsPos) - char *codonNew; // codons, changed by variant - } codingChange; - struct // intron_variant - { - uint intronNumber; - } intron; - struct // a generic variant - { - char *soOther0; // Ancillary detail 0 - char *soOther1; // Ancillary detail 1 - char *soOther2; // Ancillary detail 2 - char *soOther3; // Ancillary detail 3 - char *soOther4; // Ancillary detail 4 - char *soOther5; // Ancillary detail 5 - char *soOther6; // Ancillary detail 6 - char *soOther7; // Ancillary detail 7 - } generic; - } sub; - }; - char *soTermToString(enum soTerm termNumber); /* Translate termNumber to its string equivalent. Do not modify or free result. */ #endif /* SOTERM_H */