0919ab12c7fff4d83613273cad9d7a13b1af668c markd Mon Jun 12 19:29:34 2023 -0700 allow pslMapPostChain handle protein/NA alignments diff --git src/utils/pslMapPostChain/pslMapPostChain.c src/utils/pslMapPostChain/pslMapPostChain.c index a03bee7..5cfa08c 100644 --- src/utils/pslMapPostChain/pslMapPostChain.c +++ src/utils/pslMapPostChain/pslMapPostChain.c @@ -62,76 +62,30 @@ static int pslCmpTargetAndStrandSpan(const void *va, const void *vb) /* Compare to sort based on target, strand, tStart, and then span */ { const struct psl *a = *((struct psl **)va); const struct psl *b = *((struct psl **)vb); int dif; dif = strcmp(a->tName, b->tName); if (dif == 0) dif = strcmp(a->strand, b->strand); if (dif == 0) dif = -(pslTSpan(a) - pslTSpan(b)); return dif; } -static char normStrand(char strand) -/* return strand as stored in psl, converting implicit `\0' to `+' */ -{ -return (strand == '\0') ? '+' : strand; -} - -static unsigned pslQStartStrand(struct psl *psl, int blkIdx, char strand) -/* return query start for the given block, mapped to specified strand, - * which can be `\0' for `+' */ -{ -if (psl->strand[0] == normStrand(strand)) - return psl->qStarts[blkIdx]; -else - return psl->qSize - pslQEnd(psl, blkIdx); -} - -static unsigned pslQEndStrand(struct psl *psl, int blkIdx, char strand) -/* return query end for the given block, mapped to specified strand, - * which can be `\0' for `+' */ -{ -if (psl->strand[0] == normStrand(strand)) - return pslQEnd(psl, blkIdx); -else - return psl->qSize - pslQStart(psl, blkIdx); -} - -static unsigned pslTStartStrand(struct psl *psl, int blkIdx, char strand) -/* return target start for the given block, mapped to specified strand, - * which can be `\0' for `+' */ -{ -if (normStrand(psl->strand[1]) == normStrand(strand)) - return psl->tStarts[blkIdx]; -else - return psl->tSize - pslTEnd(psl, blkIdx); -} - -static unsigned pslTEndStrand(struct psl *psl, int blkIdx, char strand) -/* return target end for the given block, mapped to specified strand, - * which can be `\0' for `+' */ -{ -if (normStrand(psl->strand[1]) == normStrand(strand)) - return pslTEnd(psl, blkIdx); -else - return psl->tSize - pslTStart(psl, blkIdx); -} - static int findChainPointUpstream(struct psl* chainedPsl, struct psl* nextPsl) /* findChainPoint upstream check. */ { // check next being query upstream of chain if ((pslQEnd(nextPsl, nextPsl->blockCount-1) <= pslQStart(chainedPsl, 0)) && (pslTEnd(nextPsl, nextPsl->blockCount-1) <= pslTStart(chainedPsl, 0))) { if ((pslTStart(chainedPsl, 0) - pslTEnd(nextPsl, nextPsl->blockCount-1)) > maxAdjacentDistance) return CHAIN_CAN_NOT; // too far before else return 0; } else return CHAIN_NOT_HERE; @@ -223,44 +177,35 @@ static void addPslToChained(struct psl* chainedPsl, struct psl* nextPsl, int insertIdx) /* add blocks form a psl to the chained psl at the given point */ { assert(insertIdx <= chainedPsl->blockCount); if (pslIsProtein(chainedPsl) != pslIsProtein(nextPsl)) errAbort("can't chain protein/NA with NA/NA alignments: %s %s", chainedPsl->qName, chainedPsl->tName); // expand arrays and copy in entries pslArrayInsert(&chainedPsl->blockSizes, chainedPsl->blockCount, nextPsl->blockSizes, nextPsl->blockCount, insertIdx); pslArrayInsert(&chainedPsl->qStarts, chainedPsl->blockCount, nextPsl->qStarts, nextPsl->blockCount, insertIdx); pslArrayInsert(&chainedPsl->tStarts, chainedPsl->blockCount, nextPsl->tStarts, nextPsl->blockCount, insertIdx); chainedPsl->blockCount += nextPsl->blockCount; -// update bounds if needed -if (pslQStrand(chainedPsl) == '+') - { - chainedPsl->qStart = pslQStartStrand(chainedPsl, 0, '+'); - chainedPsl->qEnd = pslQEndStrand(chainedPsl, chainedPsl->blockCount-1, '+'); - } -else - { - chainedPsl->qStart = pslQStartStrand(chainedPsl, chainedPsl->blockCount-1, '+'); - chainedPsl->qEnd = pslQEndStrand(chainedPsl, 0, '+'); - } -assert(pslTStrand(chainedPsl) == '+'); // forced on load -chainedPsl->tStart = pslTStartStrand(chainedPsl, 0, '+'); -chainedPsl->tEnd = pslTEndStrand(chainedPsl, chainedPsl->blockCount-1, '+'); +// update bounds +chainedPsl->qStart = min(chainedPsl->qStart, nextPsl->qStart); +chainedPsl->qEnd = max(chainedPsl->qEnd, nextPsl->qEnd); +chainedPsl->tStart = min(chainedPsl->tStart, nextPsl->tStart); +chainedPsl->tEnd = max(chainedPsl->tEnd, nextPsl->tEnd); // update counts chainedPsl->match += nextPsl->match; chainedPsl->misMatch += nextPsl->misMatch; chainedPsl->repMatch += nextPsl->repMatch; chainedPsl->nCount += nextPsl->nCount; } static void chainToLongest(struct psl** queryPsls, FILE* outPslFh) /* pull off one or more psls from the list and chain them */ { struct psl* chainedPsl = slPopHead(queryPsls); struct psl* unchainedPsls = NULL; // ones not included struct psl* nextPsl;