195a08fae15bd2ca55f2f2ed11d0c359521efcc4
markd
  Wed May 17 09:13:21 2023 -0700
added program to convert protein/protein or protein NA PSLs to NA/NA PSLs.  The multiplies coordinates by three; trimming any overlapping blocks that might be creates.  This is intended to use with pslMap

diff --git src/utils/pslProtToRnaCoords/pslProtToRnaCoords.c src/utils/pslProtToRnaCoords/pslProtToRnaCoords.c
new file mode 100644
index 0000000..5f8034c
--- /dev/null
+++ src/utils/pslProtToRnaCoords/pslProtToRnaCoords.c
@@ -0,0 +1,139 @@
+/* pslProtToRnaCoords - Convert protein alignments to RNA coordinates. */
+#include "common.h"
+#include "psl.h"
+#include "options.h"
+
+void usage()
+/* Explain usage and exit. */
+{
+errAbort(
+  "pslProtToRnaCoords - Convert protein alignments to RNA coordinates\n"
+  "usage:\n"
+  "   pslProtToRnaCoords inPsl outPsl\n"
+  "\n"
+  "Convert either a protein/protein or protein/NA PSL to NA/NA PSL.  This\n"
+  "multiplies coordinates and statistics by three.  As this can occasionally\n"
+  "results in blocks overlapping, overlap is trimmed as needed.\n"
+  );
+}
+
+/* Command line validation table. */
+static struct optionSpec options[] = {
+   {NULL, 0},
+};
+
+static void trimOverlapping(bool isProtNaPsl, struct psl *psl, int oBlk)
+/* if this block overlaps the previous block, trim accordingly.  Overlaps
+ * can be created with protein to DNA PSLs.  */
+// check for overlap; use int so we can go negative
+{
+int prevQEnd = pslQEnd(psl, oBlk - 1);
+int prevTEnd = pslTEnd(psl, oBlk - 1);
+
+// trim, possibly setting to zero-length
+int overAmt = max((prevQEnd - (int)psl->qStarts[oBlk]), (prevTEnd - (int)psl->tStarts[oBlk]));
+if (overAmt < 0)
+    overAmt = 0;
+else if (overAmt > psl->blockSizes[oBlk])
+    overAmt = psl->blockSizes[oBlk];
+
+psl->qStarts[oBlk] += overAmt;
+psl->tStarts[oBlk] += overAmt;
+psl->blockSizes[oBlk] -= overAmt;
+psl->match -= overAmt;
+}
+
+
+static int updateBlk(bool isProtNaPsl, struct psl *psl, int iBlk, int oBlk)
+/* Update sizes in a block, adjusting start of block if it now
+ * overlaps previous block due previous block size expansion.
+ * return updated oBlk */
+{
+// update to NA coordinates
+psl->blockSizes[oBlk] = 3 * psl->blockSizes[iBlk];
+psl->qStarts[oBlk] = 3 * psl->qStarts[iBlk];
+if (isProtNaPsl)
+    psl->tStarts[oBlk] = psl->tStarts[iBlk];  // already in NA coords
+else
+    psl->tStarts[oBlk] = 3 * psl->tStarts[iBlk];
+psl->match += psl->blockSizes[oBlk];
+
+if (oBlk > 0)
+    trimOverlapping(isProtNaPsl, psl, oBlk);
+
+// only advance if something left in the block
+if (psl->blockSizes[oBlk] > 0)
+    oBlk++;
+return oBlk;
+}
+
+static void pslConvertCoords(struct psl *psl)
+/* convert psl coordinates */
+{
+boolean isProtNaPsl = pslIsProtein(psl);
+// overall sizes and coordinates
+psl->qSize *= 3;
+psl->qStart *= 3;
+psl->qEnd *= 3;
+if (!isProtNaPsl)
+    {
+    psl->tSize *= 3;
+    psl->tStart *= 3;
+    psl->tEnd *= 3;
+    }
+
+/* set counts to be all match, since we don't know actual answer and
+ * it makes it easier to trim overlapping blocks  */
+psl->match = psl->misMatch = psl->repMatch = psl->nCount = 0;
+
+// blocks
+int oBlk = 0;
+for (int iBlk = 0; iBlk < psl->blockCount; iBlk += 1)
+    {
+    oBlk = updateBlk(isProtNaPsl, psl, iBlk, oBlk);
+    }
+psl->blockCount = oBlk;
+
+// need to update overall coordinates if last block was full remove due to
+// overlap
+psl->qStart = psl->qStarts[0];
+psl->qEnd = psl->qStarts[oBlk - 1] + psl->blockSizes[oBlk - 1];
+if (psl->strand[0] == '-')
+    reverseIntRange(&psl->qStart, &psl->qEnd, psl->qSize);
+psl->tStart = psl->tStarts[0];
+psl->tEnd = psl->tStarts[oBlk - 1] + psl->blockSizes[oBlk - 1];
+if (psl->strand[0] == '-')
+    reverseIntRange(&psl->tStart, &psl->tEnd, psl->tSize);
+
+// indels
+pslComputeInsertCounts(psl);
+}
+
+
+static void pslProtToRnaCoords(char *inPsl, char *outPsl)
+/* pslProtToRnaCoords - Convert protein alignments to RNA coordinates. */
+{
+struct lineFile *inFh = pslFileOpen(inPsl);
+FILE *outFh = mustOpen(outPsl, "w");
+
+struct psl *psl;
+while ((psl = pslNext(inFh)) != NULL)
+    {
+    pslConvertCoords(psl);
+    pslTabOut(psl, outFh);
+    pslFree(&psl);
+    }
+
+carefulClose(&outFh);
+lineFileClose(&inFh);
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 3)
+    usage();
+pslProtToRnaCoords(argv[1], argv[2]);
+return 0;
+}