88d10fbd0db11b5602e39f14809ea5fee2df08db
markd
  Sat Oct 30 12:42:41 2010 -0700
added support for GFF3 discontinuous features
diff --git src/inc/gff3.h src/inc/gff3.h
index b69b128..2184209 100644
--- src/inc/gff3.h
+++ src/inc/gff3.h
@@ -1,32 +1,36 @@
 /*
  * Object for accessing GFF3 files
  * See GFF3 specification for details of file format:
  *   http://www.sequenceontology.org/gff3.shtml
  */
 #ifndef gff3_h
 #define gff3_h
 
 struct gff3Ann
 /* Annotation record from a GFF3 file.  Attributes define in the spec (those
  * starting with upper case letters) are parsed into fields of this
  * object. User defined attributes (starting with lower-case characters) are
  * stored as in a list, along with a copy of the string versions of the spec
  * attributes. All strings stored in the object have been un-escaped.
- * All storage for the object is allocated by the gff3File object. */
+ * All storage for the object is allocated by the gff3File object.
+ * For discontinuous features, there are multiple gff3Ann objects.
+ * These objects are stored in a double-linked list, and all references
+ * point to the first one in ascending start order.*/
 {
-    struct gff3Ann *next; /* links all gff3Ann objects */
+    struct gff3Ann *prevPart; /* Discontinuous features have linked annotation */
+    struct gff3Ann *nextPart; /* field name next not used to avoid confusion */
     char *seqid;   /* The ID of the landmark used to establish the coordinate
                     * system for the current feature. IDs may contain any
                     * characters. */
     char *source;  /* The source is a free text qualifier intended to describe
                     * the algorithm or operating procedure that generated this
                     * feature.  Typically this is the name of a piece of
                     * software, such as "Genescan" or a database name, such as
                     * "Genbank."  In effect, the source is used to extend the
                     * feature ontology by adding a qualifier to the type
                     * creating a new composite type that is a subclass of the
                     * type in the type column. */
 
     char *type; /* The type of the feature (previously called the "method").
                  * This is constrained to be either: (a) a term from the
                  * "lite" sequence ontology, SOFA; or (b) a SOFA accession
@@ -150,33 +154,33 @@
 };
 
 struct gff3SeqRegion
 /* start/end of a sequence region, taken from ##sequence-region record.*/
 {
     struct gff3SeqRegion *next;     /* next region */
     char *seqid;    /* sequence if of region */
     int start;      /* bounds of region */
     int end;
 };
 
 struct gff3File
 /* Object representing a GFF file. Manages all memory for related objects. */
 {
     char *fileName;       /* path of file that was parsed */
-    struct hash *byId;    /* index of gff3Ann object by id */
-    struct gff3Ann *anns; /* all records in the file */
-    struct gff3AnnRef *roots;  /* all records without parents */
+    struct hash *byId;    /* index of gff3Ann object by id.  Links to first object of link discontinuous features */
+    struct gff3AnnRef *anns;   /* all records in the file. Includes all parts of discontinuous features */
+    struct gff3AnnRef *roots;  /* all records without parents. */
     struct hash *pool;         /* used to allocate string values that tend to
                                 * be repeated in the files.  localMem is also 
                                 * to allocated memory for all other objects. */
     struct gff3SeqRegion *seqRegions;  /* list of gff3SeqRegion objects. */
     struct hash *seqRegionMap;  /* map of seqId to gff3SeqRegion objects. NULL
                                  * if none are specified */
 
     struct slName *featureOntologies;    /* feature ontology URIs */
     struct slName *attributeOntologies;  /* attribute ontology URIs */
     struct slName *sourceOntologies;     /* source ontology URIs */
     struct slName *species;              /* Species, usually NCBI Taxonomy
                                           * URI */
     char *genomeBuildSource;             /* source of genome build */
     char *genomeBuildName;               /* name or version of genome build */
     struct dnaSeq *seqs;                 /* list of sequences */