dbf303c20543fe86f5378e20f80c012eb1304daf markd Mon Nov 30 08:44:12 2015 -0800 added missing transcript records in genePredToGtf conversion diff --git src/hg/genePredToGtf/genePredToGtf.c src/hg/genePredToGtf/genePredToGtf.c index 98cb8e6..b0bf513 100644 --- src/hg/genePredToGtf/genePredToGtf.c +++ src/hg/genePredToGtf/genePredToGtf.c @@ -91,32 +91,35 @@ assert(start <= end); /* convert frame to phase */ char phase = (frame < 0) ? '.' : (frame == 0) ? '0' : (frame == 1) ? '2' : '1'; fprintf(f, "%s\t", chrom); fprintf(f, "%s\t", source); fprintf(f, "%s\t", type); fprintf(f, "%d\t", start+1); fprintf(f, "%d\t", end); fprintf(f, ".\t"); /* Score. */ fprintf(f, "%c\t", strand); fprintf(f, "%c\t", phase); fprintf(f, "gene_id \"%s\"; ", (isNotEmpty(geneName)) ? geneName : name); fprintf(f, "transcript_id \"%s\"; ", name); +if (exonIx >= 0) + { fprintf(f, "exon_number \"%d\"; ", exonIx+1); fprintf(f, "exon_id \"%s.%d\";", name, exonIx+1); + } if (isNotEmpty(geneName)) fprintf(f, " gene_name \"%s\";", geneName); fprintf(f, "\n"); } static boolean inExon(struct genePred *gp, int iExon, int pos) /* determine if pos is in the specified exon */ { return ((gp->exonStarts[iExon] <= pos) && (pos <= gp->exonEnds[iExon])); } static int movePos(struct genePred *gp, int pos, int dist) /* Move a position in an exon by dist, which is positive to move forward, and * negative to move backwards. Introns are skipped. Error if can't move * distance and stay in exon. @@ -364,30 +367,32 @@ // figure out bounds of CDS and UTR regions, moving stop codon to outside of // CDS. int firstUtrEnd = gp->cdsStart, lastUtrStart = gp->cdsEnd; int cdsStart = gp->cdsStart, cdsEnd = gp->cdsEnd; if ((strand == '+') && codonComplete(&lastCodon)) cdsEnd = movePos(gp, lastUtrStart, -3); if ((strand == '-') && codonComplete(&firstCodon)) cdsStart = movePos(gp, cdsStart, 3); if (addComments) fprintf(f, "###\n# %s %s:%d-%d (%s) CDS: %d-%d\n#\n", gp->name, gp->chrom, gp->txStart, gp->txEnd, gp->strand, gp->cdsStart, gp->cdsEnd); +writeGtfLine(f, source, name, geneName, chrom, strand, "transcript", + gp->txStart, gp->txEnd, -1, -1); for (i=0; iexonCount; ++i) { writeGtfLine(f, source, name, geneName, chrom, strand, "exon", gp->exonStarts[i], gp->exonEnds[i], i, -1); if (cdsStart < cdsEnd) writeFeatures(gp, i, source, name, chrom, strand, geneName, firstUtrEnd, cdsStart, cdsEnd, lastUtrStart, frames[i], f); } if (gp->strand[0] == '+') { if (codonComplete(&firstCodon)) writeCodon(f, source, name, geneName, chrom, strand, "start_codon", &firstCodon); if (codonComplete(&lastCodon))