04c67a0e7bcb65c217598e5e44084bf4c5c0674d angie Tue Mar 5 12:16:10 2013 -0800 Feature #6152 (Variant Annotation Integrator): first cut of annoFormatVep whichproduces output in the format of Ensembl's Variant Effect Predictor (VEP), along with modifications to soTerm, gpFx and annoGratorGpVar to enable VEP-like output. diff --git src/hg/inc/soTerm.h src/hg/inc/soTerm.h new file mode 100644 index 0000000..ce5b21e --- /dev/null +++ src/hg/inc/soTerm.h @@ -0,0 +1,82 @@ +/* soTerm.h --- Sequence Ontology terms and supporting data structures */ + +// would be nice to more closely match Ensembl's subset of SO: +// http://uswest.ensembl.org/info/docs/variation/predicted_data.html#consequences + +#ifndef SOTERM_H +#define SOTERM_H + +enum soTerm // the various variant effects + { + regulatory_region_variant=1566, + stop_retained_variant=1567, + splice_acceptor_variant=1574, + splice_donor_variant=1575, + complex_transcript_variant=1577, + stop_lost=1578, + coding_sequence_variant=1580, + initiator_codon_variant=1582, + missense_variant=1583, + stop_gained=1587, + frameshift_variant=1589, + nc_transcript_variant=1619, + mature_miRNA_variant=1620, + NMD_transcript_variant=1621, + _5_prime_UTR_variant=1623, + _3_prime_UTR_variant=1624, + incomplete_terminal_codon_variant=1626, + intron_variant=1627, + intergenic_variant=1628, + splice_region_variant=1630, + upstream_gene_variant=1631, + downstream_gene_variant=1632, + TF_binding_site_variant=1782, + non_coding_exon_variant=1792, + protein_altering_variant=1818, + synonymous_variant=1819, + inframe_insertion=1821, + inframe_deletion=1822, + }; + +struct soCall // a single variant effect call + { + struct soCall *next; + char *transcript; // ID of feature affected by this call + uint soNumber; // Sequence Ontology Number of effect + union + { + struct codingChange // (non)synonymous variant, deletions in CDS + { + uint exonNumber; + + // the next three should have a length specified too + uint cDnaPosition; // offset of variant in transcript cDNA + uint cdsPosition; // offset of variant from transcript's cds start + uint pepPosition; // offset of variant in translated product + char *aaOld; // peptides, before change by variant (starting at pepPos) + char *aaNew; // peptides, changed by variant + char *codonOld; // codons, before change by variant (starting at cdsPos) + char *codonNew; // codons, changed by variant + } codingChange; + struct // intron_variant + { + uint intronNumber; + } intron; + struct // a generic variant + { + char *soOther0; // Ancillary detail 0 + char *soOther1; // Ancillary detail 1 + char *soOther2; // Ancillary detail 2 + char *soOther3; // Ancillary detail 3 + char *soOther4; // Ancillary detail 4 + char *soOther5; // Ancillary detail 5 + char *soOther6; // Ancillary detail 6 + char *soOther7; // Ancillary detail 7 + } generic; + } sub; + }; + +char *soTermToString(enum soTerm termNumber); +/* Translate termNumber to its string equivalent. Do not modify or free result. */ + +#endif /* SOTERM_H */