21441c54b2c684a56954b0a2b9d7f352b6d99432
markd
  Wed Jul 5 21:54:37 2023 -0700
added checking for GFF3 incorrectly duplicated ids mascrading as the horrible discontinuous features GFF3 mess

diff --git src/lib/gff3.c src/lib/gff3.c
index 9c12e32..05aad9c 100644
--- src/lib/gff3.c
+++ src/lib/gff3.c
@@ -926,36 +926,53 @@
 slReverse(&g3f->anns);
 }
 
 static int gff3AnnCount(struct gff3Ann *g3a)
 /* count the number of gff3Ann objects linked together in a feature */
 {
 int cnt = 0;
 for (; g3a != NULL; g3a = g3a->nextPart)
     cnt++;
 return cnt;
 }
 
 static void discontinFeatureCheck(struct gff3Ann *g3a)
 /* sanity check linked gff3Ann discontinuous features */
 {
-struct gff3Ann *g3a2;
-for (g3a2 = g3a->nextPart; (g3a2 != NULL) && !gff3FileStopDueToErrors(g3a->file); g3a2 = g3a2->nextPart)
+// Add non-spec restriction on only one parent id to make parent id checks easier
+
+if (slCount(g3a->parentIds) != 1)
+    gff3AnnErr(g3a, FALSE, "Annotation records for discontinuous features with ID=\"%s\" must have one and only one parent", g3a->id);
+for (struct gff3Ann *g3a2 = g3a->nextPart; (g3a2 != NULL) && !gff3FileStopDueToErrors(g3a->file); g3a2 = g3a2->nextPart)
     {
     if (!sameString(g3a->type, g3a2->type))
         gff3AnnErr(g3a, FALSE, "Annotation records for discontinuous features with ID=\"%s\" do not have the same type, found \"%s\" and \"%s\"", g3a->id, g3a->type, g3a2->type);
-    }
+    if (!sameString(g3a->type, g3a2->type))
+        gff3AnnErr(g3a, FALSE, "Annotation records for discontinuous features with ID=\"%s\" do not have the same type, found \"%s\" and \"%s\"", g3a->id, g3a->type, g3a2->type);
+    if ((slCount(g3a2->parentIds) != 1) || !sameString(g3a2->parentIds->name, g3a->parentIds->name))
+        gff3AnnErr(g3a, FALSE, "Annotation records for discontinuous features with ID=\"%s\" must have same parent", g3a->id);
+    }
+
+// The discontinuous features abomination means we can't check for duplicate
+// ids in features were it makes no sense. Add non-spec restriction.
+if (!(sameString(g3a->type, gff3FeatCDS) ||
+      sameString(g3a->type, gff3FeatThreePrimeUTR) ||
+      sameString(g3a->type, gff3FeatFivePrimeUTR) ||
+      sameString(g3a->type, gff3FeatStartCodon) ||
+      sameString(g3a->type, gff3FeatStopCodon)))
+    gff3AnnErr(g3a, FALSE, "Incorrect duplicated ID=\"%s\" or attempt to create discontinuous features for type \"%s\"",
+               g3a->id, g3a->type);
 }
 
 static void discontinFeatureFillArray(struct gff3Ann *g3a, int numAnns, struct gff3Ann *featAnns[])
 /* convert list to array for sorting */
 {
 int i = 0;
 for (; g3a != NULL; g3a = g3a->nextPart)
     featAnns[i++] = g3a;
 }
 
 static struct gff3Ann *discontinFeatureArrayLink(int numAnns, struct gff3Ann *featAnns[])
 /* convert sorted array to a list */
 {
 struct gff3Ann *g3aHead = NULL, *g3aPrev = NULL;
 int i;