88d10fbd0db11b5602e39f14809ea5fee2df08db markd Sat Oct 30 12:42:41 2010 -0700 added support for GFF3 discontinuous features diff --git src/inc/gff3.h src/inc/gff3.h index b69b128..2184209 100644 --- src/inc/gff3.h +++ src/inc/gff3.h @@ -1,32 +1,36 @@ /* * Object for accessing GFF3 files * See GFF3 specification for details of file format: * http://www.sequenceontology.org/gff3.shtml */ #ifndef gff3_h #define gff3_h struct gff3Ann /* Annotation record from a GFF3 file. Attributes define in the spec (those * starting with upper case letters) are parsed into fields of this * object. User defined attributes (starting with lower-case characters) are * stored as in a list, along with a copy of the string versions of the spec * attributes. All strings stored in the object have been un-escaped. - * All storage for the object is allocated by the gff3File object. */ + * All storage for the object is allocated by the gff3File object. + * For discontinuous features, there are multiple gff3Ann objects. + * These objects are stored in a double-linked list, and all references + * point to the first one in ascending start order.*/ { - struct gff3Ann *next; /* links all gff3Ann objects */ + struct gff3Ann *prevPart; /* Discontinuous features have linked annotation */ + struct gff3Ann *nextPart; /* field name next not used to avoid confusion */ char *seqid; /* The ID of the landmark used to establish the coordinate * system for the current feature. IDs may contain any * characters. */ char *source; /* The source is a free text qualifier intended to describe * the algorithm or operating procedure that generated this * feature. Typically this is the name of a piece of * software, such as "Genescan" or a database name, such as * "Genbank." In effect, the source is used to extend the * feature ontology by adding a qualifier to the type * creating a new composite type that is a subclass of the * type in the type column. */ char *type; /* The type of the feature (previously called the "method"). * This is constrained to be either: (a) a term from the * "lite" sequence ontology, SOFA; or (b) a SOFA accession @@ -150,33 +154,33 @@ }; struct gff3SeqRegion /* start/end of a sequence region, taken from ##sequence-region record.*/ { struct gff3SeqRegion *next; /* next region */ char *seqid; /* sequence if of region */ int start; /* bounds of region */ int end; }; struct gff3File /* Object representing a GFF file. Manages all memory for related objects. */ { char *fileName; /* path of file that was parsed */ - struct hash *byId; /* index of gff3Ann object by id */ - struct gff3Ann *anns; /* all records in the file */ - struct gff3AnnRef *roots; /* all records without parents */ + struct hash *byId; /* index of gff3Ann object by id. Links to first object of link discontinuous features */ + struct gff3AnnRef *anns; /* all records in the file. Includes all parts of discontinuous features */ + struct gff3AnnRef *roots; /* all records without parents. */ struct hash *pool; /* used to allocate string values that tend to * be repeated in the files. localMem is also * to allocated memory for all other objects. */ struct gff3SeqRegion *seqRegions; /* list of gff3SeqRegion objects. */ struct hash *seqRegionMap; /* map of seqId to gff3SeqRegion objects. NULL * if none are specified */ struct slName *featureOntologies; /* feature ontology URIs */ struct slName *attributeOntologies; /* attribute ontology URIs */ struct slName *sourceOntologies; /* source ontology URIs */ struct slName *species; /* Species, usually NCBI Taxonomy * URI */ char *genomeBuildSource; /* source of genome build */ char *genomeBuildName; /* name or version of genome build */ struct dnaSeq *seqs; /* list of sequences */