src/utils/gtfToGenePred/gtfToGenePred.c 1.7

1.7 2009/08/27 19:33:07 markd
don't dereference NULL if an annotation doesn't have an exon'
Index: src/utils/gtfToGenePred/gtfToGenePred.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/utils/gtfToGenePred/gtfToGenePred.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -b -B -U 1000000 -r1.6 -r1.7
--- src/utils/gtfToGenePred/gtfToGenePred.c	14 Feb 2008 00:36:46 -0000	1.6
+++ src/utils/gtfToGenePred/gtfToGenePred.c	27 Aug 2009 19:33:07 -0000	1.7
@@ -1,169 +1,183 @@
 /* gtfToGenePred - convert a GTF file to a genePred. */
 #include "common.h"
 #include "linefile.h"
 #include "gff.h"
 #include "genePred.h"
 #include "errCatch.h"
 #include "options.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "gtfToGenePred - convert a GTF file to a genePred\n"
   "usage:\n"
   "   gtfToGenePred gtf genePred\n"
   "\n"
   "options:\n"
   "     -genePredExt - create a extended genePred, including frame\n"
   "      information and gene name\n"
   "     -allErrors - skip groups with errors rather than aborting.\n"
   "      Useful for getting infomation about as many errors as possible.\n"
   "     -infoOut=file - write a file with information on each transcript\n"
   "     -sourcePrefix=pre - only process entries where the source name has the\n"
   "      specified prefix.  May be repeated.\n"
   "     -impliedStopAfterCds - implied stop codon in after CDS\n");
 }
 
 static struct optionSpec options[] = {
     {"genePredExt", OPTION_BOOLEAN},
     {"allErrors", OPTION_BOOLEAN},
     {"infoOut", OPTION_STRING},
     {"sourcePrefix", OPTION_STRING|OPTION_MULTI},
     {"impliedStopAfterCds", OPTION_BOOLEAN},
     {NULL, 0},
 };
 boolean clGenePredExt = FALSE;  /* include frame and geneName */
 boolean clAllErrors = FALSE;    /* report as many errors as possible */
 struct slName *clSourcePrefixes; /* list of source prefixes to match */
 unsigned clGxfOptions = 0;       /* options for converting GTF/GFF */
 
 int badGroupCount = 0;  /* count of inconsistent groups found */
 
 
 /* header for info file */
 static char *infoHeader = "#transId\tgeneId\tsource\tchrom\tstart\tend\tstrand\tproteinId\tgeneName\ttranscriptName\n";
 
 static void saveName(char **name, char *newName)
 /* if name references NULL, and newName is not NULL, update name */
 {
 if ((*name == NULL) && (newName != NULL))
     *name = newName;
 }
 static void writeInfo(FILE *infoFh, struct gffGroup *group)
 /* write a row for a GTF group from the info file */
 {
 
 // scan lineList for group and protein ids
 struct gffLine *ll;
 char *geneId = NULL, *proteinId = NULL, *geneName = NULL, *transcriptName = NULL;
 for (ll = group->lineList; ll != NULL; ll = ll->next)
     {
     saveName(&geneId, ll->geneId);
     saveName(&proteinId, ll->proteinId);
     saveName(&geneName, ll->geneName);
     saveName(&transcriptName, ll->transcriptName);
     }
 
 fprintf(infoFh, "%s\t%s\t%s\t%s\t%d\t%d\t%c\t%s\t%s\t%s\n",
         group->name, emptyForNull(geneId), group->source,
         group->seq, group->start, group->end, group->strand,
         emptyForNull(proteinId), emptyForNull(geneName),
         emptyForNull(transcriptName));
 }
 
 static void gtfGroupToGenePred(struct gffFile *gtf, struct gffGroup *group, FILE *gpFh,
                                FILE *infoFh)
 /* convert one gtf group to a genePred */
 {
 unsigned optFields = (clGenePredExt ? genePredAllFlds : 0);
-struct genePred *gp;
 struct errCatch *errCatch = errCatchNew();
 
 if (errCatchStart(errCatch))
     {
-    gp = genePredFromGroupedGtf(gtf, group, group->name, optFields, clGxfOptions);
+    struct genePred *gp = genePredFromGroupedGtf(gtf, group, group->name, optFields, clGxfOptions);
+    if (gp == NULL)
+        {
+        if (clAllErrors)
+            fprintf(stderr,"no exons defined for %s\n", group->name);
+        else
+            errAbort("no exons defined for %s", group->name);
+        badGroupCount++;
+        }
+    else
+        {
     genePredTabOut(gp, gpFh);
     genePredFree(&gp);
     }
+    }
 errCatchEnd(errCatch);
 if (errCatch->gotError)
     {
+    // drop trailing newline in caught message
+    int l = strlen(errCatch->message->string);
+    if ((l > 0) && (errCatch->message->string[l-1] == '\n'))
+        errCatch->message->string[l-1] = '\0';
     if (clAllErrors)
-        warn("%s", errCatch->message->string);
+        fprintf(stderr, "%s\n", errCatch->message->string);
     else
         errAbort("%s", errCatch->message->string);
     badGroupCount++;
     }
 else
     {
     if (infoFh != NULL)
         writeInfo(infoFh, group);
     }
 errCatchFree(&errCatch); 
 }
 
 static bool sourceMatches(struct gffGroup *group)
 /* see if the source matches on on the list */
 {
 struct slName *pre = NULL;
 for (pre = clSourcePrefixes; pre != NULL; pre = pre->next)
     if (startsWith(pre->name, group->source))
         return TRUE;
 return FALSE;
 }
         
 
 static bool inclGroup(struct gffGroup *group)
 /* check if a group should be included in the output */
 {
 if (clSourcePrefixes != NULL)
     {
     if (!sourceMatches(group))
         return FALSE;
     }
 return TRUE;
 }
 
 static void gtfToGenePred(char *gtfFile, char *gpFile, char *infoFile)
 /* gtfToGenePred -  convert a GTF file to a genePred.. */
 {
 struct gffFile *gtf = gffRead(gtfFile);
 FILE *gpFh, *infoFh = NULL;
 struct gffGroup *group;
 
 if (!gtf->isGtf)
     errAbort("%s doesn't appear to be a GTF file (GFF not supported by this program)", gtfFile);
 gffGroupLines(gtf);
 gpFh = mustOpen(gpFile, "w");
 if (infoFile != NULL)
     {
     infoFh = mustOpen(infoFile, "w");
     fputs(infoHeader, infoFh);
     }
 
 for (group = gtf->groupList; group != NULL; group = group->next)
     if (inclGroup(group))
         gtfGroupToGenePred(gtf, group, gpFh, infoFh);
 
 carefulClose(&gpFh);
 gffFileFree(&gtf);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 3)
     usage();
 clGenePredExt = optionExists("genePredExt");
 clAllErrors = optionExists("allErrors");
 clSourcePrefixes = optionMultiVal("sourcePrefix", NULL);
 if (optionExists("impliedStopAfterCds"))
     clGxfOptions |= genePredGxfImpliedStopAfterCds;
 
 gtfToGenePred(argv[1], argv[2], optionVal("infoOut", NULL));
 if (badGroupCount > 0)
     errAbort("%d errors", badGroupCount);
 return 0;
 }