src/hg/utils/gff3ToGenePred/gff3ToGenePred.c 1.3
1.3 2010/03/19 06:04:18 markd
fixed crash on nodes with no ID or Parent attrs
Index: src/hg/utils/gff3ToGenePred/gff3ToGenePred.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/hg/utils/gff3ToGenePred/gff3ToGenePred.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -b -B -U 4 -r1.2 -r1.3
--- src/hg/utils/gff3ToGenePred/gff3ToGenePred.c 19 Mar 2010 02:24:35 -0000 1.2
+++ src/hg/utils/gff3ToGenePred/gff3ToGenePred.c 19 Mar 2010 06:04:18 -0000 1.3
@@ -49,8 +49,28 @@
fputc('\n', stderr);
convertErrCnt++;
}
+static char *mkAnnAddrKey(struct gff3Ann *ann)
+/* create a key for a gff3Ann from its address. WARNING: static return */
+{
+static char buf[64];
+safef(buf, sizeof(buf), "%lld", (long long)ann);
+return buf;
+}
+
+static boolean isProcessed(struct hash *processed, struct gff3Ann *ann)
+/* has an ann record be processed? */
+{
+return hashLookup(processed, mkAnnAddrKey(ann)) != NULL;
+}
+
+static void recProcessed(struct hash *processed, struct gff3Ann *ann)
+/* add an ann record to processed hash */
+{
+hashAdd(processed, mkAnnAddrKey(ann), ann);
+}
+
static struct gff3File *loadGff3(char *inGff3File)
/* load GFF3 into memory */
{
struct gff3File *gff3File = gff3FileOpen(inGff3File, maxParseErrs, NULL);
@@ -173,9 +193,10 @@
static void processMRna(FILE *gpFh, struct gff3Ann *gene, struct gff3Ann *mrna, struct hash *processed)
/* process a mRNA node in the tree; gene can be NULL. Error count increment on error and genePred discarded */
{
-hashStore(processed, mrna->id);
+recProcessed(processed, mrna);
+
// allow for only having CDS children
struct gff3AnnRef *exons = getChildFeatures(mrna, gff3FeatExon);
struct gff3AnnRef *cdsBlks = getChildFeatures(mrna, gff3FeatCDS);
struct gff3AnnRef *useExons = (exons != NULL) ? exons : cdsBlks;
@@ -189,9 +210,9 @@
return; // error
// output before checking so it can be examined
genePredTabOut(gp, gpFh);
-if (genePredCheck("GFF3 converted to genePred", stderr, -1, gp) != 0)
+if (genePredCheck("GFF3 convert to genePred", stderr, -1, gp) != 0)
{
cnvError("conversion failed");
genePredFree(&gp);
return; // error
@@ -204,14 +225,14 @@
static void processGene(FILE *gpFh, struct gff3Ann *gene, struct hash *processed)
/* process a gene node in the tree. Stop process if maximum errors reached */
{
-hashStore(processed, gene->id);
+recProcessed(processed, gene);
struct gff3AnnRef *child;
for (child = gene->children; child != NULL; child = child->next)
{
- if (sameString(child->ann->type, gff3FeatMRna) && (hashLookup(processed, child->ann->id) == NULL))
+ if (sameString(child->ann->type, gff3FeatMRna) && !isProcessed(processed, child->ann))
{
processMRna(gpFh, gene, child->ann, processed);
if (convertErrCnt > maxConvertErrs)
break;
@@ -221,9 +242,9 @@
static void processRoot(FILE *gpFh, struct gff3Ann *node, struct hash *processed)
/* process a root node in the tree */
{
-hashStore(processed, node->id);
+recProcessed(processed, node);
if (sameString(node->type, gff3FeatGene))
processGene(gpFh, node, processed);
else if (sameString(node->type, gff3FeatMRna))
@@ -232,16 +253,16 @@
static void gff3ToGenePred(char *inGff3File, char *outGpFile)
/* gff3ToGenePred - convert a GFF3 file to a genePred file. */
{
-// hash of nodes record ids, prevents dup processing due to dup parents
+// hash of nodes ptrs, prevents dup processing due to dup parents
struct hash *processed = hashNew(12);
struct gff3File *gff3File = loadGff3(inGff3File);
FILE *gpFh = mustOpen(outGpFile, "w");
struct gff3AnnRef *root;
for (root = gff3File->roots; root != NULL; root = root->next)
{
- if (hashLookup(processed, root->ann->id) == NULL)
+ if (!isProcessed(processed, root->ann))
{
processRoot(gpFh, root->ann, processed);
if (convertErrCnt > maxConvertErrs)
break;