b1a9de0fd2ebab6d3caf4197eecc2e75eb74162d markd Mon May 24 13:53:35 2021 -0700 Added better handling of GTFs without correct frames on CDS. This also simplified the handling of GTF stop codons diff --git src/hg/utils/gtfToGenePred/gtfToGenePred.c src/hg/utils/gtfToGenePred/gtfToGenePred.c index 44c93f6..4897530 100644 --- src/hg/utils/gtfToGenePred/gtfToGenePred.c +++ src/hg/utils/gtfToGenePred/gtfToGenePred.c @@ -42,31 +42,30 @@ {"ignoreGroupsWithoutExons", OPTION_BOOLEAN}, {"infoOut", OPTION_STRING}, {"sourcePrefix", OPTION_STRING|OPTION_MULTI}, {"impliedStopAfterCds", OPTION_BOOLEAN}, {"geneNameAsName2", OPTION_BOOLEAN}, {"includeVersion", OPTION_BOOLEAN}, {NULL, 0}, }; boolean clGenePredExt = FALSE; /* include frame and geneName */ boolean clAllErrors = FALSE; /* report as many errors as possible */ boolean clIgnoreGroupsWithoutExons = FALSE; /* ignore groups without exons */ struct slName *clSourcePrefixes; /* list of source prefixes to match */ boolean clIncludeVersion = FALSE; /* add version numbers to identifiers if available */ unsigned clGxfOptions = 0; /* options for converting GTF/GFF */ boolean doSimple = FALSE; /* only check column validity */ - int badGroupCount = 0; /* count of inconsistent groups found */ /* header for info file */ static char *infoHeader = "#transId\tgeneId\tsource\tchrom\tstart\tend\tstrand\tproteinId\tgeneName\ttranscriptName\tgeneType\ttranscriptType\n"; static void saveName(char **name, char *newName) /* if name references NULL, and newName is not NULL, update name */ { if ((*name == NULL) && (newName != NULL)) *name = newName; } static void writeInfo(FILE *infoFh, struct gffGroup *group) /* write a row for a GTF group from the info file */ { @@ -217,22 +216,20 @@ optionInit(&argc, argv, options); if (argc != 3) usage(); clGenePredExt = optionExists("genePredExt"); doSimple = optionExists("simple"); clIgnoreGroupsWithoutExons = optionExists("ignoreGroupsWithoutExons"); clAllErrors = optionExists("allErrors"); clIncludeVersion = optionExists("includeVersion"); clSourcePrefixes = optionMultiVal("sourcePrefix", NULL); if (optionExists("impliedStopAfterCds")) clGxfOptions |= genePredGxfImpliedStopAfterCds; if (optionExists("geneNameAsName2")) clGxfOptions |= genePredGxfGeneNameAsName2; if (optionExists("includeVersion")) clGxfOptions |= genePredGxfIncludeVersion; - - gtfToGenePred(argv[1], argv[2], optionVal("infoOut", NULL)); if (badGroupCount > 0) errAbort("%d errors", badGroupCount); return 0; }