5652d415b7ad388d28eebebdca8e448da4a3e0f3 markd Sat May 26 22:54:39 2018 -0700 Have gff3ToGenePred -warnAndContinue discard more bogus cases, such as Maker files that have exons and CDS that don't have gene or mRNA records. This way Max doesn't think I am a pathetic loser ;-) diff --git src/inc/gff3.h src/inc/gff3.h index f142aba..9c15ed1 100644 --- src/inc/gff3.h +++ src/inc/gff3.h @@ -1,23 +1,24 @@ /* * Object for accessing GFF3 files * See GFF3 specification for details of file format: * http://www.sequenceontology.org/gff3.shtml */ #ifndef gff3_h #define gff3_h + struct gff3Ann /* Annotation record from a GFF3 file. Attributes define in the spec (those * starting with upper case letters) are parsed into fields of this * object. User defined attributes (starting with lower-case characters) are * stored as in a list, along with a copy of the string versions of the spec * attributes. All strings stored in the object have been un-escaped. * All storage for the object is allocated by the gff3File object. * For discontinuous features, there are multiple gff3Ann objects. * These objects are stored in a double-linked list, and all references * point to the first one in ascending start order.*/ { struct gff3Ann *prevPart; /* Discontinuous features have linked annotation */ struct gff3Ann *nextPart; /* field name next not used to avoid confusion */ char *seqid; /* The ID of the landmark used to establish the coordinate * system for the current feature. IDs may contain any @@ -128,30 +129,34 @@ struct gff3Attr *attrs; /* attributes, both user-define and spec-defined, * parsed into one or more values */ struct gff3AnnRef *children; /* child nodes */ struct gff3SeqRegion *seqRegion; /* start/end of sequence region, taken * from ##sequence-region records, or * NULL if not specified.*/ struct gff3File *file; /* file this record is associated with */ int lineNum; /* line number of record in file, or -1 * if not known */ }; +/* flags */ +#define GFF3_WARN_WHEN_POSSIBLE 0x01 // generate warnings and drop entries rather than errors + + struct gff3AnnRef /* A reference to a gff3Ann object */ { struct gff3AnnRef *next; /* next link in the chain */ struct gff3Ann *ann; /* reference to object */ }; struct gff3Attr /* an attribute and string values */ { struct gff3Attr *next; /* next attribute in the list */ char *tag; /* name of attribute */ struct slName *vals; /* values for the attribute */ }; @@ -179,30 +184,31 @@ * if none are specified */ struct slName *featureOntologies; /* feature ontology URIs */ struct slName *attributeOntologies; /* attribute ontology URIs */ struct slName *sourceOntologies; /* source ontology URIs */ struct slName *species; /* Species, usually NCBI Taxonomy * URI */ char *genomeBuildSource; /* source of genome build */ char *genomeBuildName; /* name or version of genome build */ struct dnaSeq *seqs; /* list of sequences */ struct hash *seqMap; /* map of sequence ids to sequence * string from ##FASTA section or * NULL if none specified */ struct lineFile *lf; /* only set while parsing */ FILE *errFh; /* write errors to this file */ + unsigned int flags; /* flags controlling parsing */ int maxErr; /* maximum number of errors before aborting */ int errCnt; /* error count */ }; /* standard attribute tags */ extern char *gff3AttrID; extern char *gff3AttrName; extern char *gff3AttrAlias; extern char *gff3AttrParent; extern char *gff3AttrTarget; extern char *gff3AttrGap; extern char *gff3AttrDerivesFrom; extern char *gff3AttrNote; extern char *gff3AttrDbxref; @@ -217,35 +223,35 @@ extern char *gff3FeatTRna; extern char *gff3FeatExon; extern char *gff3FeatCDS; extern char *gff3FeatThreePrimeUTR; extern char *gff3FeatFivePrimeUTR; extern char *gff3FeatStartCodon; extern char *gff3FeatStopCodon; extern char *gff3FeatTranscript; extern char *gff3FeatPrimaryTranscript; extern char *gff3FeatCGeneSegment; extern char *gff3FeatDGeneSegment; extern char *gff3FeatJGeneSegment; extern char *gff3FeatVGeneSegment; -struct gff3File *gff3FileOpen(char *fileName, int maxErr, FILE *errFh); +struct gff3File *gff3FileOpen(char *fileName, int maxErr, unsigned flags, FILE *errFh); /* Parse a GFF3 file into a gff3File object. If maxErr not zero, then * continue to parse until this number of error have been reached. A maxErr * less than zero does not stop reports all errors. Write errors to errFh, - * if NULL, use stderr. */ + * if NULL, use stderr. See above flags. */ void gff3FileFree(struct gff3File **g3fPtr); /* Free a gff3File object */ struct gff3Ann *gff3FileFindAnn(struct gff3File *g3f, char *id); /* find an annotation record by id, or NULL if not found. */ struct gff3Attr *gff3AnnFindAttr(struct gff3Ann *g3a, char *tag); /* find a user attribute, or NULL */ void gff3AnnWrite(struct gff3Ann *g3a, FILE *fh); /* Write an annotation record to the specified file. * This only writes a single record, it is not recursive.*/ void gff3FileWrite(struct gff3File *g3f, char *fileName);