43bc39cf7ce2525cc9b11251ec10272b9f889910 markd Thu Aug 24 08:57:53 2023 -0700 fixed incoorect check for discontinue features with multiple parents diff --git src/lib/gff3.c src/lib/gff3.c index 4da2b8a..0dd27a8 100644 --- src/lib/gff3.c +++ src/lib/gff3.c @@ -8,38 +8,30 @@ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #include "common.h" #include "gff3.h" #include <limits.h> #include "errAbort.h" #include "localmem.h" #include "hash.h" #include "linefile.h" #include "dystring.h" #include "fa.h" // FIXME: spec unclear if attributes can be specified multiple times // FIXME: spec unclear on attribute of discontinuous features. // FIXME: should spaces be striped from attributes? -/* - * Notes: - * - a separate feature object that linked discontinuous feature annotations - * was not used because it create more complexity with the linking of parents - * and the fact that the restriction on discontinguous features attributes is - * not clearly defined. - */ - static const int gffNumCols = 9; /* standard attribute names */ char *gff3AttrID = "ID"; char *gff3AttrName = "Name"; char *gff3AttrAlias = "Alias"; char *gff3AttrParent = "Parent"; char *gff3AttrTarget = "Target"; char *gff3AttrGap = "Gap"; char *gff3AttrDerivesFrom = "Derives_from"; char *gff3AttrNote = "Note"; char *gff3AttrDbxref = "Dbxref"; char *gff3AttrOntologyTerm = "Ontology_term"; char *gff3AttrIsCircular = "Is_circular"; @@ -928,54 +920,43 @@ } static int gff3AnnCount(struct gff3Ann *g3a) /* count the number of gff3Ann objects linked together in a feature */ { int cnt = 0; for (; g3a != NULL; g3a = g3a->nextPart) cnt++; return cnt; } static void discontinFeatureCheck(struct gff3Ann *g3a) /* sanity check linked gff3Ann discontinuous features */ { // Add non-spec restriction on only one parent id to make parent id checks easier - -if (slCount(g3a->parentIds) != 1) +if (slCount(g3a->parentIds) > 1) gff3AnnErr(g3a, FALSE, "Annotation records for discontinuous features with ID=\"%s\" must have one and only one parent", g3a->id); for (struct gff3Ann *g3a2 = g3a->nextPart; (g3a2 != NULL) && !gff3FileStopDueToErrors(g3a->file); g3a2 = g3a2->nextPart) { - if (slCount(g3a2->parentIds) != 1) + if (slCount(g3a2->parentIds) > 1) gff3AnnErr(g3a2, FALSE, "Annotation records for discontinuous features with ID=\"%s\" must have one and only one parent", g3a2->id); else if (!sameString(g3a->type, g3a2->type)) gff3AnnErr(g3a2, FALSE, "Annotation records for discontinuous features with ID=\"%s\" do not have the same type, found \"%s\" and \"%s\"", g3a->id, g3a->type, g3a2->type); - else if (!sameString(g3a2->parentIds->name, g3a->parentIds->name)) + else if ((slCount(g3a2->parentIds) > 0) && + !sameString(g3a2->parentIds->name, g3a->parentIds->name)) gff3AnnErr(g3a2, FALSE, "Annotation records for discontinuous features with ID=\"%s\" must have same parent, found: \"%s\" and \"%s\"", g3a->id, g3a2->parentIds->name, g3a->parentIds->name); } - -// The discontinuous features abomination means we can't check for duplicate -// ids in features were it makes no sense. Add non-spec restriction. -if (!(sameString(g3a->type, gff3FeatCDS) || - sameString(g3a->type, gff3FeatUTR) || - sameString(g3a->type, gff3FeatThreePrimeUTR) || - sameString(g3a->type, gff3FeatFivePrimeUTR) || - sameString(g3a->type, gff3FeatStartCodon) || - sameString(g3a->type, gff3FeatStopCodon))) - gff3AnnErr(g3a, FALSE, "Incorrect duplicated ID=\"%s\" or attempt to create discontinuous features for type \"%s\"", - g3a->id, g3a->type); } static void discontinFeatureFillArray(struct gff3Ann *g3a, int numAnns, struct gff3Ann *featAnns[]) /* convert list to array for sorting */ { int i = 0; for (; g3a != NULL; g3a = g3a->nextPart) featAnns[i++] = g3a; } static struct gff3Ann *discontinFeatureArrayLink(int numAnns, struct gff3Ann *featAnns[]) /* convert sorted array to a list */ { struct gff3Ann *g3aHead = NULL, *g3aPrev = NULL; int i;