4e0e5a68eca6f220fc5553949ccfa8b8742bce7c markd Mon Apr 18 16:05:27 2016 -0700 added note in about case that is not correctly handle diff --git src/hg/genePredToFakePsl/genePredToFakePsl.c src/hg/genePredToFakePsl/genePredToFakePsl.c index 1732514..346f50e 100644 --- src/hg/genePredToFakePsl/genePredToFakePsl.c +++ src/hg/genePredToFakePsl/genePredToFakePsl.c @@ -35,30 +35,36 @@ "pslOut specifies the fake-mRNA output psl filename.\n" "\n" "cdsOut specifies the output cds tab-separated file which contains\n" "genbank-style CDS records showing cdsStart..cdsEnd\n" "e.g. NM_123456 34..305\n" "options:\n" " -chromSize=sizefile\tRead chrom sizes from file instead of database\n" " sizefile contains two white space separated fields per line:\n" " chrom name and size\n" "\n"); } static void cnvGenePredCds(struct genePred *gp, int qSize, FILE *cdsFh) /* determine CDS and output */ { +/* + * Warning: Genbank CDS does't have the ability to represent + * partial codons. If we have genePreds created from GFF/GTF, they can have + * partial codons, which is indicated in frame. This code does not correctly handle + * this case, or frame shifting indels. + */ int e, off = 0; int qCdsStart = -1, qCdsEnd = -1; int eCdsStart, eCdsEnd; for (e = 0; e < gp->exonCount; ++e) { if (genePredCdsExon(gp, e, &eCdsStart, &eCdsEnd)) { if (qCdsStart < 0) qCdsStart = off + (eCdsStart - gp->exonStarts[e]); qCdsEnd = off + (eCdsEnd - gp->exonStarts[e]); } off += gp->exonEnds[e] - gp->exonStarts[e]; } if (gp->strand[0] == '-')