src/hg/utils/gff3ToGenePred/gff3ToGenePred.c 1.3

1.3 2010/03/19 06:04:18 markd
fixed crash on nodes with no ID or Parent attrs
Index: src/hg/utils/gff3ToGenePred/gff3ToGenePred.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/utils/gff3ToGenePred/gff3ToGenePred.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/utils/gff3ToGenePred/gff3ToGenePred.c	19 Mar 2010 02:24:35 -0000	1.2
+++ src/hg/utils/gff3ToGenePred/gff3ToGenePred.c	19 Mar 2010 06:04:18 -0000	1.3
@@ -49,8 +49,28 @@
 fputc('\n', stderr);
 convertErrCnt++;
 }
 
+static char *mkAnnAddrKey(struct gff3Ann *ann)
+/* create a key for a gff3Ann from its address.  WARNING: static return */
+{
+static char buf[64];
+safef(buf, sizeof(buf), "%lld", (long long)ann);
+return buf;
+}
+
+static boolean isProcessed(struct hash *processed, struct gff3Ann *ann)
+/* has an ann record be processed? */
+{
+return hashLookup(processed, mkAnnAddrKey(ann)) != NULL;
+}
+
+static void recProcessed(struct hash *processed, struct gff3Ann *ann)
+/* add an ann record to processed hash */
+{
+hashAdd(processed, mkAnnAddrKey(ann), ann);
+}
+
 static struct gff3File *loadGff3(char *inGff3File)
 /* load GFF3 into memory */
 {
 struct gff3File *gff3File = gff3FileOpen(inGff3File, maxParseErrs, NULL);
@@ -173,9 +193,10 @@
 
 static void processMRna(FILE *gpFh, struct gff3Ann *gene, struct gff3Ann *mrna, struct hash *processed)
 /* process a mRNA node in the tree; gene can be NULL. Error count increment on error and genePred discarded */
 {
-hashStore(processed, mrna->id);
+recProcessed(processed, mrna);
+
 // allow for only having CDS children
 struct gff3AnnRef *exons = getChildFeatures(mrna, gff3FeatExon);
 struct gff3AnnRef *cdsBlks = getChildFeatures(mrna, gff3FeatCDS);
 struct gff3AnnRef *useExons = (exons != NULL) ? exons : cdsBlks;
@@ -189,9 +210,9 @@
     return; // error
 
 // output before checking so it can be examined
 genePredTabOut(gp, gpFh);
-if (genePredCheck("GFF3 converted to genePred", stderr, -1, gp) != 0)
+if (genePredCheck("GFF3 convert to genePred", stderr, -1, gp) != 0)
     {
     cnvError("conversion failed");
     genePredFree(&gp);
     return; // error
@@ -204,14 +225,14 @@
 
 static void processGene(FILE *gpFh, struct gff3Ann *gene, struct hash *processed)
 /* process a gene node in the tree.  Stop process if maximum errors reached */
 {
-hashStore(processed, gene->id);
+recProcessed(processed, gene);
 
 struct gff3AnnRef *child;
 for (child = gene->children; child != NULL; child = child->next)
     {
-    if (sameString(child->ann->type, gff3FeatMRna) && (hashLookup(processed, child->ann->id) == NULL))
+    if (sameString(child->ann->type, gff3FeatMRna) && !isProcessed(processed, child->ann))
         {
         processMRna(gpFh, gene, child->ann, processed);
         if (convertErrCnt > maxConvertErrs)
             break;
@@ -221,9 +242,9 @@
 
 static void processRoot(FILE *gpFh, struct gff3Ann *node, struct hash *processed)
 /* process a root node in the tree */
 {
-hashStore(processed, node->id);
+recProcessed(processed, node);
 
 if (sameString(node->type, gff3FeatGene))
     processGene(gpFh, node, processed);
 else if (sameString(node->type, gff3FeatMRna))
@@ -232,16 +253,16 @@
 
 static void gff3ToGenePred(char *inGff3File, char *outGpFile)
 /* gff3ToGenePred - convert a GFF3 file to a genePred file. */
 {
-// hash of nodes record ids, prevents dup processing due to dup parents
+// hash of nodes ptrs, prevents dup processing due to dup parents
 struct hash *processed = hashNew(12);
 struct gff3File *gff3File = loadGff3(inGff3File);
 FILE *gpFh = mustOpen(outGpFile, "w");
 struct gff3AnnRef *root;
 for (root = gff3File->roots; root != NULL; root = root->next)
     {
-    if (hashLookup(processed, root->ann->id) == NULL)
+    if (!isProcessed(processed, root->ann))
         {
         processRoot(gpFh, root->ann, processed);
         if (convertErrCnt > maxConvertErrs)
             break;