cf293df150f4b256d195e90b5a20817d8479ece0 galt Sat Jul 2 13:17:59 2016 -0700 fixes #17624. -maxIntron= was broken in blat whenever a non-default value was used causing it to run cutAtBigIntrons() which did not support protein. The fix was using trans3GenoPos() to get correct coordinates handling 3 frames, and also calling ffScoreProtein() instead of ffScore() when it is a protein query. diff --git src/jkOwnLib/supStitch.c src/jkOwnLib/supStitch.c index 90317dc..c8525cf 100644 --- src/jkOwnLib/supStitch.c +++ src/jkOwnLib/supStitch.c @@ -65,55 +65,58 @@ } *pList = NULL; } void dumpNearCrossover(char *what, DNA *n, DNA *h, int size) /* Print sequence near crossover */ { printf("%s: ", what); mustWrite(stdout, n, size); printf("\n"); printf("%s: ", what); mustWrite(stdout, h, size); printf("\n"); } -void dumpFf(struct ffAli *left, DNA *needle, DNA *hay) +void dumpFf(struct ffAli *left, bioSeq *qSeq, bioSeq *tSeq, struct trans3 *t3List) /* Print info on ffAli. */ { struct ffAli *ff; for (ff = left; ff != NULL; ff = ff->right) { - printf("(%ld - %ld)[%ld-%ld] ", (long)(ff->hStart-hay), (long)(ff->hEnd-hay), - (long)(ff->nStart - needle), (long)(ff->nEnd - needle)); + int hStart = trans3GenoPos(ff->hStart, tSeq, t3List, FALSE); + int hEnd = trans3GenoPos(ff->hEnd , tSeq, t3List, TRUE); + + printf("(%d - %d)[%ld-%ld] ", hStart, hEnd, + (long)(ff->nStart - qSeq->dna), (long)(ff->nEnd - qSeq->dna)); } printf("\n"); } void dumpBuns(struct ssBundle *bunList) /* Print diagnostic info on bundles. */ { struct ssBundle *bun; struct ssFfItem *ffl; for (bun = bunList; bun != NULL; bun = bun->next) { struct dnaSeq *qSeq = bun->qSeq; /* Query sequence (not owned by bundle.) */ struct dnaSeq *genoSeq = bun->genoSeq; /* Genomic sequence (not owned by bundle.) */ printf("Bundle of %d between %s and %s\n", slCount(bun->ffList), qSeq->name, genoSeq->name); for (ffl = bun->ffList; ffl != NULL; ffl = ffl->next) { - dumpFf(ffl->ff, bun->qSeq->dna, bun->genoSeq->dna); + dumpFf(ffl->ff, bun->qSeq, bun->genoSeq, bun->t3List); } } } static int bioScoreMatch(boolean isProt, char *a, char *b, int size) /* Return score of match (no inserts) between two bio-polymers. */ { if (isProt) { return dnaOrAaScoreMatch(a, b, size, 2, -1, 'X'); } else { return dnaOrAaScoreMatch(a, b, size, 1, -1, 'n'); @@ -705,52 +708,58 @@ int count = ffAliCount(ffList); if (count >= 10) { ssFindBestBig(ffList, qSeq, tSeq, stringency, isProt, t3List, retBestAli, retScore, retLeftovers); } else { ssFindBestSmall(ffList, qSeq, tSeq, stringency, isProt, t3List, retBestAli, retScore, retLeftovers); } } struct ffAli *cutAtBigIntrons(struct ffAli *ffList, int maxIntron, int *pScore, enum ffStringency stringency, + boolean isProt, bioSeq *tSeq, struct trans3 *t3List, struct ffAli **returnLeftovers) /* Return ffList up to the first intron that's too big. * Put the rest of the blocks back onto the leftovers list. */ { struct ffAli *prevFf, *ff, *cutFf = NULL; prevFf = ffList; for (ff = prevFf->right; ff != NULL; ff = ff->right) { - int dt = ff->hStart - prevFf->hEnd; + int nhStart = trans3GenoPos( ff->hStart, tSeq, t3List, FALSE); + int ohEnd = trans3GenoPos(prevFf->hEnd , tSeq, t3List, TRUE); + int dt = nhStart - ohEnd; if (dt > maxIntron) { cutFf = prevFf; break; } prevFf = ff; } if (cutFf != NULL) { ff = cutFf->right; cutFf->right = NULL; ff->left = NULL; ffCat(returnLeftovers, &ff); + if (isProt) + *pScore = ffScoreProtein(ffList, stringency); + else *pScore = ffScore(ffList, stringency); } return ffList; } void ssStitch(struct ssBundle *bundle, enum ffStringency stringency, int minScore, int maxToReturn) /* Glue together mrnas in bundle as much as possible. Returns number of * alignments after stitching. Updates bundle->ffList with stitched * together version. */ { struct dnaSeq *qSeq = bundle->qSeq; struct dnaSeq *genoSeq = bundle->genoSeq; struct ffAli *ffList = NULL; struct ssFfItem *ffl; @@ -766,56 +775,59 @@ if (minScore > 20) minScore = 20; /* Create ffAlis for all in bundle and move to one big list. */ for (ffl = bundle->ffList; ffl != NULL; ffl = ffl->next) { ffCat(&ffList, &ffl->ff); } slFreeList(&bundle->ffList); ffAliSort(&ffList, ffCmpHitsNeedleFirst); ffList = ffMergeClose(ffList, qSeq->dna, genoSeq->dna); while (ffList != NULL) { + ssFindBest(ffList, qSeq, genoSeq, stringency, bundle->isProt, bundle->t3List, &bestPath, &score, &ffList); + bestPath = ffMergeNeedleAlis(bestPath, TRUE); bestPath = ffRemoveEmptyAlis(bestPath, TRUE); if (!bestPath) { ffFreeAli(&ffList); break; } bestPath = ffMergeHayOverlaps(bestPath); bestPath = ffRemoveEmptyAlis(bestPath, TRUE); bestPath = forceMonotonic(bestPath, qSeq, genoSeq, stringency, bundle->isProt, bundle->t3List); if (firstTime && stringency == ffCdna && bundle->avoidFuzzyFindKludge == FALSE) { /* Only look for middle exons the first time. Next times * this might regenerate most of the first alignment... */ bestPath = smallMiddleExons(bestPath, bundle, stringency); } bestPath = ffMergeNeedleAlis(bestPath, TRUE); if (ffIntronMax != ffIntronMaxDefault) { bestPath = cutAtBigIntrons(bestPath, ffIntronMax, &score, stringency, + bundle->isProt, genoSeq, bundle->t3List, &ffList); } if (!bundle->isProt) ffSlideIntrons(bestPath); bestPath = ffRemoveEmptyAlis(bestPath, TRUE); if (score >= minScore) { AllocVar(ffl); ffl->ff = bestPath; slAddHead(&bundle->ffList, ffl); } else { ffFreeAli(&bestPath); ffFreeAli(&ffList);