5711ac6c2bcaeda814ae1049a0661f499775885e
markd
  Wed Dec 13 00:36:12 2023 -0800
change pslProtToRnaCoords to use common code with pslMap to convert protein/NA to CDS-coordinate/NA alignments. This will correctly remove overlapping blocks after conversion

diff --git src/lib/pslTransMap.c src/lib/pslTransMap.c
index 50cce93..f53b8f9 100644
--- src/lib/pslTransMap.c
+++ src/lib/pslTransMap.c
@@ -46,47 +46,33 @@
 static boolean pslTypeTargetIsProtein(enum pslType pslType)
 /* is the pslType indicate the target is a protein? */
 {
 return (pslType == pslTypeProtProt);
 }
 
 static boolean pslTypeTargetIsNa(enum pslType pslType)
 /* is the pslType indicate the target is a NA? */
 {
 return !pslTypeTargetIsProtein(pslType);
 }
 
 static void setPslBoundsCounts(struct psl* psl)
 /* set sequences bounds and counts from blocks on a PSL */
 {
-int lastBlk = psl->blockCount-1;
-
-/* set start/end of sequences */
-psl->qStart = psl->qStarts[0];
-psl->qEnd = psl->qStarts[lastBlk] + psl->blockSizes[lastBlk];
-if (pslQStrand(psl) == '-')
-    reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize);
-
-psl->tStart = psl->tStarts[0];
-psl->tEnd = psl->tStarts[lastBlk] + psl->blockSizes[lastBlk];
-if (pslTStrand(psl) == '-')
-    reverseIntRange(&psl->tStart, &psl->tEnd, psl->tSize);
-
-psl->match = 0;
-for (int iBlk = 0; iBlk < psl->blockCount; iBlk++)
-    psl->match += psl->blockSizes[iBlk];
 pslComputeInsertCounts(psl);
+pslRecalcBounds(psl);
+pslRecalcBaseCounts(psl);
 }
 
 static unsigned int roundUpToMultipleOf3(unsigned n) {
   return ((n + 2) / 3) * 3;
 }
 
 static unsigned blockOverlapAmt3(struct psl *psl,
                                  int iBlk)
 /* How much overlap is there with the next block.  This is the max of query
  * and target, rounded up to a multiple of three, since we are dealing with
  * protein -> NA.
  */
 {
 // care taken in subtraction because of unsigneds
 int tOver = (pslTStart(psl, iBlk + 1) < pslTEnd(psl, iBlk)) ?
@@ -111,108 +97,108 @@
 memmove(&array[iArray], &array[iArray + 1], (arrayLen - iArray - 1) * sizeof(unsigned));
 }
 
 static void removeBlock(struct psl *psl, int jBlk)
 /* remove the specified block */
 {
 arrayDelete(psl->blockSizes, jBlk, psl->blockCount);
 arrayDelete(psl->qStarts, jBlk, psl->blockCount);
 arrayDelete(psl->tStarts, jBlk, psl->blockCount);
 psl->blockCount--;
 }
     
 static void editBlockOverlap(struct psl *psl, int iBlk,
                              unsigned overlapAmt3)
 /* remove overlap between two blocks.  If multiple blocks are covered,
- * then shift remove the block */
+ * then shift over the blocks */
 {
 while ((overlapAmt3 > 0) && (iBlk < ((int)psl->blockCount) - 1))
     {
     int jBlk = iBlk + 1;
     if (overlapAmt3 < psl->blockSizes[jBlk])
         {
         trimBlockOverlap(psl, jBlk, overlapAmt3);
         overlapAmt3 = 0;
         }
     else
         {
         overlapAmt3 -= psl->blockSizes[jBlk];
         removeBlock(psl, jBlk);
         }
     }
 }
 
 static void removeOverlappingBlock(struct psl *psl)
 /* Remove overlapping blocks, which BLAT can create with protein to NA
  * alignments.  These are exposed when convert prot-NA alignments to NA-NA
  * alignment.
  */
 {
 for (int iBlk = 0; iBlk < ((int)psl->blockCount) - 1; iBlk++)
     {
     unsigned overlapAmt3 = blockOverlapAmt3(psl, iBlk);
     if (overlapAmt3 > 0)
         editBlockOverlap(psl, iBlk, overlapAmt3);
     }
-setPslBoundsCounts(psl);
 }
 
 
 struct block
 /* Coordinates of a block, which might be aligned or a gap on one side */
 {
     int qStart;          /* Query start position. */
     int qEnd;            /* Query end position. */
     int tStart;          /* Target start position. */
     int tEnd;            /* Target end position. */
 };
 
 static struct block ZERO_BLOCK = {0, 0, 0, 0};
 
 static void assertBlockAligned(struct block *blk)
 /* Make sure both sides are same size and not negative length.  */
 {
 assert(blk->qStart <= blk->qEnd);
 assert(blk->tStart <= blk->tEnd);
 assert((blk->qEnd - blk->qStart) == (blk->tEnd - blk->tStart));
 }
 
 static int blockIsAligned(struct block *blk)
 /* check that both the query and target side have alignment */
 {
 return (blk->qEnd != 0) && (blk->tEnd != 0); // can start at zero
 }
 
-static void pslProtToNAConvert(struct psl *psl)
+void pslProtToNAConvert(struct psl *psl)
 /* convert a protein/NA or protein/protein alignment to a NA/NA alignment */
 {
 boolean isProtNa = pslIsProtein(psl);
 int iBlk;
-
 psl->qStart *= 3;
 psl->qEnd *= 3;
 psl->qSize *= 3;
 if (!isProtNa)
     psl->tSize *= 3;
 for (iBlk = 0; iBlk < psl->blockCount; iBlk++)
     {
     psl->blockSizes[iBlk] *= 3;
     psl->qStarts[iBlk] *= 3;
     if (!isProtNa)
         psl->tStarts[iBlk] *= 3;
+    psl->match += psl->blockSizes[iBlk];
     }
 removeOverlappingBlock(psl);
+setPslBoundsCounts(psl);
 if (pslCheck("converted to NA", stderr, psl) > 0)
     {
     pslTabOut(psl, stderr);
     errAbort("BUG: converting an AA to NA alignment produced an invalid PSL");
     }
 }
 
 static struct psl* createMappedPsl(struct psl* inPsl, struct psl *mapPsl,
                                    int mappedPslMax)
 /* setup a PSL for the output alignment */
 {
 char strand[3];
 assert(pslTStrand(inPsl) == pslQStrand(mapPsl));
 
 /* strand can be taken from both alignments, since common sequence is in same