0362494184981193a406895a488aee5e92ece803 markd Tue Feb 7 14:24:25 2023 -0800 support for converting HPRC GFF3 files from Ensembl. Not perfect, however, all genes get converted, so this can be fixed in post-preocessing diff --git src/hg/utils/gff3ToGenePred/gff3ToGenePred.c src/hg/utils/gff3ToGenePred/gff3ToGenePred.c index 2d0a6cd..ee91335 100644 --- src/hg/utils/gff3ToGenePred/gff3ToGenePred.c +++ src/hg/utils/gff3ToGenePred/gff3ToGenePred.c @@ -112,38 +112,45 @@ static char **cdsExonFeatures[] = { &gff3FeatExon, &gff3FeatCDS, NULL }; static char **cdjvFeatures[] = { &gff3FeatCGeneSegment, &gff3FeatDGeneSegment, &gff3FeatJGeneSegment, &gff3FeatVGeneSegment, NULL }; static char** geneFeatures[] = { &gff3FeatGene, &gff3FeatPseudogene, + &gff3FeatNCRnaGene, NULL }; static char** transFeatures[] = { &gff3FeatMRna, &gff3FeatNCRna, &gff3FeatCDS, &gff3FeatRRna, &gff3FeatTRna, + &gff3FeatLncRna, + &gff3FeatPseudogenicTranscript, + &gff3FeatScRna, + &gff3FeatSnRna, + &gff3FeatSnoRna, + &gff3FeatUnconfirmedTranscript, &gff3FeatCGeneSegment, &gff3FeatDGeneSegment, &gff3FeatJGeneSegment, &gff3FeatVGeneSegment, &gff3FeatTranscript, &gff3FeatPrimaryTranscript, NULL }; static void cnvError(char *format, ...) /* print a GFF3 to gene conversion error. This will return. Code must check * for error count to be exceeded and unwind to the top level to print a usefull * error message and abort. */ { if (warnAndContinue) @@ -740,31 +747,31 @@ recProcessed(processed, gene); if (isNcbiLikeSegmentGene(gene)) fixNcbiLikeSegmentGene(gene); if (shouldProcessGeneAsTranscript(gene)) processTranscript(gpFh, NULL, gene, processed); else if (shouldProcessGeneAsStandard(gene)) processGeneTranscripts(gpFh, gene, processed); else if (allowMinimalGenes) processGeneStandalone(gpFh, gene, processed); } static void processRoot(FILE *gpFh, struct gff3Ann *node, struct hash *processed) /* process a root node in the tree */ { -if (sameString(node->type, gff3FeatGene) || sameString(node->type, gff3FeatPseudogene)) +if (featTypeMatch(node->type, geneFeatures)) processGene(gpFh, node, processed); else if (shouldProcessAsTranscript(node)) processTranscript(gpFh, NULL, node, processed); } static void processRoots(FILE *gpFh, struct gff3AnnRef *roots, struct hash *processed) /* process all root node in the tree */ { struct gff3AnnRef *root; for (root = roots; root != NULL; root = root->next) { if (!isProcessed(processed, root->ann)) { processRoot(gpFh, root->ann, processed); if (convertErrCnt >= maxConvertErrors)