ba66786a683bce0f6c38efbdcad60686984478b4
markd
  Sun Jun 11 13:12:41 2023 -0700
correctly handle getting target block end on protein/rna PSLs

diff --git src/utils/pslMapPostChain/pslMapPostChain.c src/utils/pslMapPostChain/pslMapPostChain.c
index 13e6d74..a03bee7 100644
--- src/utils/pslMapPostChain/pslMapPostChain.c
+++ src/utils/pslMapPostChain/pslMapPostChain.c
@@ -6,30 +6,31 @@
 #include "hash.h"
 #include "psl.h"
 #include <string.h>
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort("pslMapPostChain - Post genomic pslMap (TransMap) chaining.\n"
          "usage:\n"
          "    pslMapPostChain [options] inPsl outPsl\n"
          "\n"
          "Post genomic pslMap (TransMap) chaining.  This takes transcripts\n"
          "that have been mapped via genomic chains adds back in\n"
          "blocks that didn't get include in genomic chains due\n"
          "to complex rearrangements or other issues.\n"
+         "This can also handle other PSLs, including protein-RNA alignments\n"
          "\n"
          "This program has not seen much use and may not do what you want\n");
 }
 
 static struct optionSpec optionSpecs[] =
 {
     {"h", OPTION_BOOLEAN},
     {"help", OPTION_BOOLEAN},
     {NULL, 0}
 };
 /* max distance to attempt to chain over */
 static int maxAdjacentDistance = 50*1000*1000;  // 50mb
 
 /* special status used to in place of chain */
 static const int CHAIN_NOT_HERE = -1;
@@ -213,48 +214,51 @@
     (*destArray)[iBlk+numSrcBlocks] = (*destArray)[iBlk];
     }
 for (iBlk = 0; iBlk < numSrcBlocks; iBlk++)
     {
     assert(iBlk+insertIdx < numDestBlocks+numSrcBlocks);
     (*destArray)[iBlk+insertIdx] = srcArray[iBlk];
     }
 }
 
 static void addPslToChained(struct psl* chainedPsl,
                             struct psl* nextPsl,
                             int insertIdx)
 /* add blocks form a psl to the chained psl at the given point */
 {
 assert(insertIdx <= chainedPsl->blockCount);
+if (pslIsProtein(chainedPsl) != pslIsProtein(nextPsl))
+    errAbort("can't chain protein/NA with NA/NA alignments: %s %s", chainedPsl->qName, chainedPsl->tName);
+
 // expand arrays and copy in entries
 pslArrayInsert(&chainedPsl->blockSizes, chainedPsl->blockCount, nextPsl->blockSizes, nextPsl->blockCount, insertIdx);
 pslArrayInsert(&chainedPsl->qStarts, chainedPsl->blockCount, nextPsl->qStarts, nextPsl->blockCount, insertIdx);
 pslArrayInsert(&chainedPsl->tStarts, chainedPsl->blockCount, nextPsl->tStarts, nextPsl->blockCount, insertIdx);
 chainedPsl->blockCount += nextPsl->blockCount;
 
 // update bounds if needed
 if (pslQStrand(chainedPsl) == '+')
     {
     chainedPsl->qStart = pslQStartStrand(chainedPsl, 0, '+');
     chainedPsl->qEnd = pslQEndStrand(chainedPsl, chainedPsl->blockCount-1, '+');
     }
 else
     {
     chainedPsl->qStart = pslQStartStrand(chainedPsl, chainedPsl->blockCount-1, '+');
     chainedPsl->qEnd = pslQEndStrand(chainedPsl, 0, '+');
     }
-assert(pslTStrand(chainedPsl) == '+');
+assert(pslTStrand(chainedPsl) == '+');  // forced on load
 chainedPsl->tStart = pslTStartStrand(chainedPsl, 0, '+');
 chainedPsl->tEnd = pslTEndStrand(chainedPsl, chainedPsl->blockCount-1, '+');
 
 // update counts
 chainedPsl->match += nextPsl->match;
 chainedPsl->misMatch += nextPsl->misMatch;
 chainedPsl->repMatch += nextPsl->repMatch;
 chainedPsl->nCount += nextPsl->nCount;
 }
 
 static void chainToLongest(struct psl** queryPsls,
                            FILE* outPslFh)
 /* pull off one or more psls from the list and chain them */
 {
 struct psl* chainedPsl = slPopHead(queryPsls);
@@ -273,31 +277,31 @@
         slAddHead(&unchainedPsls, nextPsl);
         }
     }
 pslComputeInsertCounts(chainedPsl);
 pslTabOut(chainedPsl, outPslFh);
 pslFree(&chainedPsl);
 slReverse(&unchainedPsls); // preserve longest to shortest order
 *queryPsls = unchainedPsls;
 }
 
 static void chainQuery(struct psl** queryPsls,
                        FILE* outPslFh)
 /* make chains for a single query. Takes over ownership
  * of PSLs */
 {
-// sort by genomic location, then pull of a group to
+// sort by genomic location, then group to chain
 // to chain
 slSort(queryPsls, pslCmpTargetAndStrandSpan);
 while (*queryPsls != NULL)
     chainToLongest(queryPsls, outPslFh);
 }
 
 static void pslMapPostChain(char* inPslFile,
                             char* outPslFile)
 /* do chaining */
 {
 struct hash* pslsByQName = loadPslByQname(inPslFile);
 FILE* outPslFh = mustOpen(outPslFile, "w");
 struct hashEl *hel;
 struct hashCookie cookie = hashFirst(pslsByQName);
 while ((hel = hashNext(&cookie)) != NULL)