0362494184981193a406895a488aee5e92ece803 markd Tue Feb 7 14:24:25 2023 -0800 support for converting HPRC GFF3 files from Ensembl. Not perfect, however, all genes get converted, so this can be fixed in post-preocessing diff --git src/lib/gff3.c src/lib/gff3.c index b5388dd..9c12e32 100644 --- src/lib/gff3.c +++ src/lib/gff3.c @@ -34,34 +34,41 @@ char *gff3AttrID = "ID"; char *gff3AttrName = "Name"; char *gff3AttrAlias = "Alias"; char *gff3AttrParent = "Parent"; char *gff3AttrTarget = "Target"; char *gff3AttrGap = "Gap"; char *gff3AttrDerivesFrom = "Derives_from"; char *gff3AttrNote = "Note"; char *gff3AttrDbxref = "Dbxref"; char *gff3AttrOntologyTerm = "Ontology_term"; char *gff3AttrIsCircular = "Is_circular"; /* commonly used features names */ char *gff3FeatGene = "gene"; char *gff3FeatPseudogene = "pseudogene"; +char *gff3FeatNCRnaGene ="ncRNA_gene"; char *gff3FeatNCRna ="ncRNA"; char *gff3FeatRRna = "rRNA"; char *gff3FeatTRna = "tRNA"; char *gff3FeatMRna = "mRNA"; +char *gff3FeatLncRna = "lnc_RNA"; +char *gff3FeatPseudogenicTranscript = "pseudogenic_transcript"; +char *gff3FeatScRna = "scRNA"; +char *gff3FeatSnRna = "snRNA"; +char *gff3FeatSnoRna = "snoRNA"; +char *gff3FeatUnconfirmedTranscript = "unconfirmed_transcript"; char *gff3FeatExon = "exon"; char *gff3FeatCDS = "CDS"; char *gff3FeatThreePrimeUTR = "three_prime_UTR"; char *gff3FeatFivePrimeUTR = "five_prime_UTR"; char *gff3FeatStartCodon = "start_codon"; char *gff3FeatStopCodon = "stop_codon"; char *gff3FeatTranscript = "transcript"; char *gff3FeatPrimaryTranscript = "primary_transcript"; char *gff3FeatCGeneSegment = "C_gene_segment"; char *gff3FeatDGeneSegment = "D_gene_segment"; char *gff3FeatJGeneSegment = "J_gene_segment"; char *gff3FeatVGeneSegment = "V_gene_segment"; static bool gff3FileStopDueToErrors(struct gff3File *g3f) /* determine if we should stop due to the number of errors */