5711ac6c2bcaeda814ae1049a0661f499775885e markd Wed Dec 13 00:36:12 2023 -0800 change pslProtToRnaCoords to use common code with pslMap to convert protein/NA to CDS-coordinate/NA alignments. This will correctly remove overlapping blocks after conversion diff --git src/lib/pslTransMap.c src/lib/pslTransMap.c index 50cce93..f53b8f9 100644 --- src/lib/pslTransMap.c +++ src/lib/pslTransMap.c @@ -46,47 +46,33 @@ static boolean pslTypeTargetIsProtein(enum pslType pslType) /* is the pslType indicate the target is a protein? */ { return (pslType == pslTypeProtProt); } static boolean pslTypeTargetIsNa(enum pslType pslType) /* is the pslType indicate the target is a NA? */ { return !pslTypeTargetIsProtein(pslType); } static void setPslBoundsCounts(struct psl* psl) /* set sequences bounds and counts from blocks on a PSL */ { -int lastBlk = psl->blockCount-1; - -/* set start/end of sequences */ -psl->qStart = psl->qStarts[0]; -psl->qEnd = psl->qStarts[lastBlk] + psl->blockSizes[lastBlk]; -if (pslQStrand(psl) == '-') - reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize); - -psl->tStart = psl->tStarts[0]; -psl->tEnd = psl->tStarts[lastBlk] + psl->blockSizes[lastBlk]; -if (pslTStrand(psl) == '-') - reverseIntRange(&psl->tStart, &psl->tEnd, psl->tSize); - -psl->match = 0; -for (int iBlk = 0; iBlk < psl->blockCount; iBlk++) - psl->match += psl->blockSizes[iBlk]; pslComputeInsertCounts(psl); +pslRecalcBounds(psl); +pslRecalcBaseCounts(psl); } static unsigned int roundUpToMultipleOf3(unsigned n) { return ((n + 2) / 3) * 3; } static unsigned blockOverlapAmt3(struct psl *psl, int iBlk) /* How much overlap is there with the next block. This is the max of query * and target, rounded up to a multiple of three, since we are dealing with * protein -> NA. */ { // care taken in subtraction because of unsigneds int tOver = (pslTStart(psl, iBlk + 1) < pslTEnd(psl, iBlk)) ? @@ -111,108 +97,108 @@ memmove(&array[iArray], &array[iArray + 1], (arrayLen - iArray - 1) * sizeof(unsigned)); } static void removeBlock(struct psl *psl, int jBlk) /* remove the specified block */ { arrayDelete(psl->blockSizes, jBlk, psl->blockCount); arrayDelete(psl->qStarts, jBlk, psl->blockCount); arrayDelete(psl->tStarts, jBlk, psl->blockCount); psl->blockCount--; } static void editBlockOverlap(struct psl *psl, int iBlk, unsigned overlapAmt3) /* remove overlap between two blocks. If multiple blocks are covered, - * then shift remove the block */ + * then shift over the blocks */ { while ((overlapAmt3 > 0) && (iBlk < ((int)psl->blockCount) - 1)) { int jBlk = iBlk + 1; if (overlapAmt3 < psl->blockSizes[jBlk]) { trimBlockOverlap(psl, jBlk, overlapAmt3); overlapAmt3 = 0; } else { overlapAmt3 -= psl->blockSizes[jBlk]; removeBlock(psl, jBlk); } } } static void removeOverlappingBlock(struct psl *psl) /* Remove overlapping blocks, which BLAT can create with protein to NA * alignments. These are exposed when convert prot-NA alignments to NA-NA * alignment. */ { for (int iBlk = 0; iBlk < ((int)psl->blockCount) - 1; iBlk++) { unsigned overlapAmt3 = blockOverlapAmt3(psl, iBlk); if (overlapAmt3 > 0) editBlockOverlap(psl, iBlk, overlapAmt3); } -setPslBoundsCounts(psl); } struct block /* Coordinates of a block, which might be aligned or a gap on one side */ { int qStart; /* Query start position. */ int qEnd; /* Query end position. */ int tStart; /* Target start position. */ int tEnd; /* Target end position. */ }; static struct block ZERO_BLOCK = {0, 0, 0, 0}; static void assertBlockAligned(struct block *blk) /* Make sure both sides are same size and not negative length. */ { assert(blk->qStart <= blk->qEnd); assert(blk->tStart <= blk->tEnd); assert((blk->qEnd - blk->qStart) == (blk->tEnd - blk->tStart)); } static int blockIsAligned(struct block *blk) /* check that both the query and target side have alignment */ { return (blk->qEnd != 0) && (blk->tEnd != 0); // can start at zero } -static void pslProtToNAConvert(struct psl *psl) +void pslProtToNAConvert(struct psl *psl) /* convert a protein/NA or protein/protein alignment to a NA/NA alignment */ { boolean isProtNa = pslIsProtein(psl); int iBlk; - psl->qStart *= 3; psl->qEnd *= 3; psl->qSize *= 3; if (!isProtNa) psl->tSize *= 3; for (iBlk = 0; iBlk < psl->blockCount; iBlk++) { psl->blockSizes[iBlk] *= 3; psl->qStarts[iBlk] *= 3; if (!isProtNa) psl->tStarts[iBlk] *= 3; + psl->match += psl->blockSizes[iBlk]; } removeOverlappingBlock(psl); +setPslBoundsCounts(psl); if (pslCheck("converted to NA", stderr, psl) > 0) { pslTabOut(psl, stderr); errAbort("BUG: converting an AA to NA alignment produced an invalid PSL"); } } static struct psl* createMappedPsl(struct psl* inPsl, struct psl *mapPsl, int mappedPslMax) /* setup a PSL for the output alignment */ { char strand[3]; assert(pslTStrand(inPsl) == pslQStrand(mapPsl)); /* strand can be taken from both alignments, since common sequence is in same