a6208b51039416d5be37b79b89d405aeea93be30 markd Mon Apr 18 13:03:32 2016 -0700 Deal with case were CDS specifications that were create by other software that didn't correctly set the completeness flags create frames incorrectly on end-truncated sequences. This code can't possible do the right thing all the time if completeness isn't correctly flag, however this makes it more likely to do the right thing when software start from the beginning of the first coding defining CDS without actually setting completeness flags. diff --git src/hg/lib/genePred.c src/hg/lib/genePred.c index 7c0b999..6e8bbc1 100644 --- src/hg/lib/genePred.c +++ src/hg/lib/genePred.c @@ -1102,32 +1102,35 @@ { /* use the 3' end is used if it's complete, as it is more often accurate when * genes are defined from mRNAs sequenced with reverse-transcriptase. */ int frame = -1; /* map to mRNA coords in CDS since frame for an exon is in direction of * transcription. */ if (psl->strand[0] == '-') reverseIntRange(&start, &end, psl->qSize); if (start < cds->start) start = cds->start; if (end > cds->end) end = cds->end; if (start < end) { - /* compute from end if it is complete in mRNA */ - if (cds->endComplete) + /* Compute from end if it is complete in mRNA and start is not complete. + * This is doesn't as the end is more likely completely. However, so + * code doesn't correctly create CDS to indicate completeness, so don't + * use CDS end unless we know start is incomplete. */ + if (cds->endComplete && !cds->startComplete) { int fr = (cds->end-start) % 3; frame = (fr == 2) ? 1 : ((fr == 1) ? 2 : 0); } else frame = (start-cds->start) % 3; } return frame; } static boolean shouldMergeBlocks(struct genePred *gene, unsigned tStart, unsigned prevTEnd, unsigned qStart, unsigned prevQEnd, unsigned options, int cdsMergeSize, int utrMergeSize)