b1a9de0fd2ebab6d3caf4197eecc2e75eb74162d
markd
  Mon May 24 13:53:35 2021 -0700
Added better handling of GTFs without correct frames on CDS. This also simplified the handling of GTF stop codons

diff --git src/hg/utils/gtfToGenePred/gtfToGenePred.c src/hg/utils/gtfToGenePred/gtfToGenePred.c
index 44c93f6..4897530 100644
--- src/hg/utils/gtfToGenePred/gtfToGenePred.c
+++ src/hg/utils/gtfToGenePred/gtfToGenePred.c
@@ -42,31 +42,30 @@
     {"ignoreGroupsWithoutExons", OPTION_BOOLEAN},
     {"infoOut", OPTION_STRING},
     {"sourcePrefix", OPTION_STRING|OPTION_MULTI},
     {"impliedStopAfterCds", OPTION_BOOLEAN},
     {"geneNameAsName2", OPTION_BOOLEAN},
     {"includeVersion", OPTION_BOOLEAN},
     {NULL, 0},
 };
 boolean clGenePredExt = FALSE;  /* include frame and geneName */
 boolean clAllErrors = FALSE;    /* report as many errors as possible */
 boolean clIgnoreGroupsWithoutExons = FALSE;  /* ignore groups without exons */
 struct slName *clSourcePrefixes; /* list of source prefixes to match */
 boolean clIncludeVersion = FALSE; /* add version numbers to identifiers if available */
 unsigned clGxfOptions = 0;       /* options for converting GTF/GFF */
 boolean doSimple = FALSE;      /* only check column validity */
-
 int badGroupCount = 0;  /* count of inconsistent groups found */
 
 
 /* header for info file */
 static char *infoHeader = "#transId\tgeneId\tsource\tchrom\tstart\tend\tstrand\tproteinId\tgeneName\ttranscriptName\tgeneType\ttranscriptType\n";
 
 static void saveName(char **name, char *newName)
 /* if name references NULL, and newName is not NULL, update name */
 {
 if ((*name == NULL) && (newName != NULL))
     *name = newName;
 }
 static void writeInfo(FILE *infoFh, struct gffGroup *group)
 /* write a row for a GTF group from the info file */
 {
@@ -217,22 +216,20 @@
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 clGenePredExt = optionExists("genePredExt");
 doSimple = optionExists("simple");
 clIgnoreGroupsWithoutExons = optionExists("ignoreGroupsWithoutExons");
 clAllErrors = optionExists("allErrors");
 clIncludeVersion = optionExists("includeVersion");
 clSourcePrefixes = optionMultiVal("sourcePrefix", NULL);
 if (optionExists("impliedStopAfterCds"))
     clGxfOptions |= genePredGxfImpliedStopAfterCds;
 if (optionExists("geneNameAsName2"))
     clGxfOptions |= genePredGxfGeneNameAsName2;
 if (optionExists("includeVersion"))
     clGxfOptions |= genePredGxfIncludeVersion;
-
-
 gtfToGenePred(argv[1], argv[2], optionVal("infoOut", NULL));
 if (badGroupCount > 0)
     errAbort("%d errors", badGroupCount);
 return 0;
 }