21441c54b2c684a56954b0a2b9d7f352b6d99432 markd Wed Jul 5 21:54:37 2023 -0700 added checking for GFF3 incorrectly duplicated ids mascrading as the horrible discontinuous features GFF3 mess diff --git src/lib/gff3.c src/lib/gff3.c index 9c12e32..05aad9c 100644 --- src/lib/gff3.c +++ src/lib/gff3.c @@ -926,36 +926,53 @@ slReverse(&g3f->anns); } static int gff3AnnCount(struct gff3Ann *g3a) /* count the number of gff3Ann objects linked together in a feature */ { int cnt = 0; for (; g3a != NULL; g3a = g3a->nextPart) cnt++; return cnt; } static void discontinFeatureCheck(struct gff3Ann *g3a) /* sanity check linked gff3Ann discontinuous features */ { -struct gff3Ann *g3a2; -for (g3a2 = g3a->nextPart; (g3a2 != NULL) && !gff3FileStopDueToErrors(g3a->file); g3a2 = g3a2->nextPart) +// Add non-spec restriction on only one parent id to make parent id checks easier + +if (slCount(g3a->parentIds) != 1) + gff3AnnErr(g3a, FALSE, "Annotation records for discontinuous features with ID=\"%s\" must have one and only one parent", g3a->id); +for (struct gff3Ann *g3a2 = g3a->nextPart; (g3a2 != NULL) && !gff3FileStopDueToErrors(g3a->file); g3a2 = g3a2->nextPart) { if (!sameString(g3a->type, g3a2->type)) gff3AnnErr(g3a, FALSE, "Annotation records for discontinuous features with ID=\"%s\" do not have the same type, found \"%s\" and \"%s\"", g3a->id, g3a->type, g3a2->type); - } + if (!sameString(g3a->type, g3a2->type)) + gff3AnnErr(g3a, FALSE, "Annotation records for discontinuous features with ID=\"%s\" do not have the same type, found \"%s\" and \"%s\"", g3a->id, g3a->type, g3a2->type); + if ((slCount(g3a2->parentIds) != 1) || !sameString(g3a2->parentIds->name, g3a->parentIds->name)) + gff3AnnErr(g3a, FALSE, "Annotation records for discontinuous features with ID=\"%s\" must have same parent", g3a->id); + } + +// The discontinuous features abomination means we can't check for duplicate +// ids in features were it makes no sense. Add non-spec restriction. +if (!(sameString(g3a->type, gff3FeatCDS) || + sameString(g3a->type, gff3FeatThreePrimeUTR) || + sameString(g3a->type, gff3FeatFivePrimeUTR) || + sameString(g3a->type, gff3FeatStartCodon) || + sameString(g3a->type, gff3FeatStopCodon))) + gff3AnnErr(g3a, FALSE, "Incorrect duplicated ID=\"%s\" or attempt to create discontinuous features for type \"%s\"", + g3a->id, g3a->type); } static void discontinFeatureFillArray(struct gff3Ann *g3a, int numAnns, struct gff3Ann *featAnns[]) /* convert list to array for sorting */ { int i = 0; for (; g3a != NULL; g3a = g3a->nextPart) featAnns[i++] = g3a; } static struct gff3Ann *discontinFeatureArrayLink(int numAnns, struct gff3Ann *featAnns[]) /* convert sorted array to a list */ { struct gff3Ann *g3aHead = NULL, *g3aPrev = NULL; int i;