f0bc7fe725829c9839e56455cac2aabbe1d27515 max Tue May 27 04:07:30 2014 -0700 fixing CDS problem in genePredToBed diff --git src/hg/genePredToBed/genePredToBed.c src/hg/genePredToBed/genePredToBed.c index 7cda352..0b0c27d 100644 --- src/hg/genePredToBed/genePredToBed.c +++ src/hg/genePredToBed/genePredToBed.c @@ -27,30 +27,36 @@ void convertGenePredToBed(char *inFile, char *outFile) /* genePredToBed - Convert from genePred to bed format.. */ { struct genePred *gp, *gpList= genePredLoadAll(inFile); FILE *f = mustOpen(outFile, "w"); for (gp = gpList; gp != NULL; gp = gp->next) { /* Print scalar bed fields. */ fprintf(f, "%s\t", gp->chrom); fprintf(f, "%u\t", gp->txStart); fprintf(f, "%u\t", gp->txEnd); fprintf(f, "%s\t", gp->name); fprintf(f, "%u\t", 0); fprintf(f, "%s\t", gp->strand); + + // genePred sets cdsStart==cdsEnd==txEnd if there is no CDS + // bed sets thickStart==thickEnd==txStart if there is no CDS (see https://www.biostars.org/p/73452/) + if (gp->cdsStart==gp->cdsEnd && gp->cdsEnd==gp->txEnd) + gp->cdsStart = gp->cdsEnd = gp->txStart; + fprintf(f, "%u\t", gp->cdsStart); fprintf(f, "%u\t", gp->cdsEnd); fprintf(f, "%u\t", 0); fprintf(f, "%u\t", gp->exonCount); /* Print exon-by-exon fields. */ int i; /* Print exon sizes */ for (i=0; i<gp->exonCount; ++i) fprintf(f, "%u,", gp->exonEnds[i] - gp->exonStarts[i]); fprintf(f, "\t"); /* Print exons starts */ for (i=0; i<gp->exonCount; ++i)