02336754147822f5aa61ba13277123b2cc629001
markd
  Thu May 20 08:38:55 2021 -0700
Moved pslMap, pslMapPostChain, pslRc, pslSwap to src/utils, as they do not have hg/lib dependencies.

diff --git src/hg/utils/pslMapPostChain/pslMapPostChain.c src/hg/utils/pslMapPostChain/pslMapPostChain.c
deleted file mode 100644
index 13e6d74..0000000
--- src/hg/utils/pslMapPostChain/pslMapPostChain.c
+++ /dev/null
@@ -1,323 +0,0 @@
-/* pslMapPostChain - Post genomic pslMap (TransMap) chaining. */
-
-#include "common.h"
-#include "linefile.h"
-#include "options.h"
-#include "hash.h"
-#include "psl.h"
-#include <string.h>
-
-void usage()
-/* Explain usage and exit. */
-{
-errAbort("pslMapPostChain - Post genomic pslMap (TransMap) chaining.\n"
-         "usage:\n"
-         "    pslMapPostChain [options] inPsl outPsl\n"
-         "\n"
-         "Post genomic pslMap (TransMap) chaining.  This takes transcripts\n"
-         "that have been mapped via genomic chains adds back in\n"
-         "blocks that didn't get include in genomic chains due\n"
-         "to complex rearrangements or other issues.\n"
-         "\n"
-         "This program has not seen much use and may not do what you want\n");
-}
-
-static struct optionSpec optionSpecs[] =
-{
-    {"h", OPTION_BOOLEAN},
-    {"help", OPTION_BOOLEAN},
-    {NULL, 0}
-};
-/* max distance to attempt to chain over */
-static int maxAdjacentDistance = 50*1000*1000;  // 50mb
-
-/* special status used to in place of chain */
-static const int CHAIN_NOT_HERE = -1;
-static const int CHAIN_CAN_NOT = -2;
-
-static struct hash* loadPslByQname(char* inPslFile)
-/* load PSLs in to hash by qName.  Make sure target strand is positive
- * to make process easier later. */
-{
-struct hash* pslsByQName = hashNew(0);
-struct psl *psls = pslLoadAll(inPslFile);
-struct psl *psl;
-while ((psl = slPopHead(&psls)) != NULL)
-    {
-    if (pslTStrand(psl) != '+')
-        pslRc(psl);
-    struct hashEl *hel = hashStore(pslsByQName, psl->qName);
-    struct psl** queryPsls = (struct psl**)&hel->val;
-    slAddHead(queryPsls, psl);
-    }
-return pslsByQName;
-}
-
-static int pslTSpan(const struct psl* psl)
-/* get target span */
-{
-return psl->tEnd - psl->tStart;
-}
-
-static int pslCmpTargetAndStrandSpan(const void *va, const void *vb)
-/* Compare to sort based on target, strand,  tStart, and then span */
-{
-const struct psl *a = *((struct psl **)va);
-const struct psl *b = *((struct psl **)vb);
-int dif;
-dif = strcmp(a->tName, b->tName);
-if (dif == 0)
-    dif = strcmp(a->strand, b->strand);
-if (dif == 0)
-    dif = -(pslTSpan(a) - pslTSpan(b));
-return dif;
-}
-
-static char normStrand(char strand)
-/* return strand as stored in psl, converting implicit `\0' to `+' */
-{
-return (strand == '\0') ? '+' : strand;
-}
-
-static unsigned pslQStartStrand(struct psl *psl, int blkIdx, char strand)
-/* return query start for the given block, mapped to specified strand,
- * which can be `\0' for `+' */
-{
-if (psl->strand[0] == normStrand(strand))
-    return psl->qStarts[blkIdx];
-else
-    return psl->qSize - pslQEnd(psl, blkIdx);
-}
-
-static unsigned pslQEndStrand(struct psl *psl, int blkIdx, char strand)
-/* return query end for the given block, mapped to specified strand,
- * which can be `\0' for `+' */
-{
-if (psl->strand[0] == normStrand(strand))
-    return pslQEnd(psl, blkIdx);
-else
-    return psl->qSize - pslQStart(psl, blkIdx);
-}
-
-static unsigned pslTStartStrand(struct psl *psl, int blkIdx, char strand)
-/* return target start for the given block, mapped to specified strand,
- * which can be `\0' for `+' */
-{
-if (normStrand(psl->strand[1]) == normStrand(strand))
-    return psl->tStarts[blkIdx];
-else
-    return psl->tSize - pslTEnd(psl, blkIdx);
-}
-
-static unsigned pslTEndStrand(struct psl *psl, int blkIdx, char strand)
-/* return target end for the given block, mapped to specified strand,
- * which can be `\0' for `+' */
-{
-if (normStrand(psl->strand[1]) == normStrand(strand))
-    return pslTEnd(psl, blkIdx);
-else
-    return psl->tSize - pslTStart(psl, blkIdx);
-}
-
-static int findChainPointUpstream(struct psl* chainedPsl,
-                                  struct psl* nextPsl)
-/* findChainPoint upstream check. */
-{
-// check next being query upstream of chain
-if ((pslQEnd(nextPsl, nextPsl->blockCount-1) <= pslQStart(chainedPsl, 0))
-    && (pslTEnd(nextPsl, nextPsl->blockCount-1) <= pslTStart(chainedPsl, 0)))
-    {
-    if ((pslTStart(chainedPsl, 0) - pslTEnd(nextPsl, nextPsl->blockCount-1)) > maxAdjacentDistance)
-        return CHAIN_CAN_NOT;  // too far before
-    else
-        return 0;
-    }
-else
-    return CHAIN_NOT_HERE;
-}
-
-static int findChainPointDownstream(struct psl* chainedPsl,
-                                    struct psl* nextPsl)
-/* findChainPoint downstream check. */
-{
-if ((pslQStart(nextPsl, 0) >= pslQEnd(chainedPsl, chainedPsl->blockCount-1))
-    && (pslTStart(nextPsl, 0) >= pslTEnd(chainedPsl, chainedPsl->blockCount-1)))
-    {
-    if ((pslTStart(nextPsl, 0) - pslTEnd(chainedPsl, chainedPsl->blockCount-1)) > maxAdjacentDistance)
-        return CHAIN_CAN_NOT;  // too far after
-    else
-        return chainedPsl->blockCount;
-    }
-else
-    return CHAIN_NOT_HERE; 
-}
-
-static int isBetweenBlocks(struct psl* chainedPsl,
-                           struct psl* nextPsl,
-                           int insertIdx)
-/* does nextPsl fit between two chained PSL blocks */
-{
-return (pslQEnd(chainedPsl, insertIdx-1) <= pslQStart(nextPsl, 0))
-    && (pslTEnd(chainedPsl, insertIdx-1) <= pslTStart(nextPsl, 0))
-    && (pslQEnd(nextPsl, nextPsl->blockCount-1) <= pslQStart(chainedPsl, insertIdx))
-    && (pslTEnd(nextPsl, nextPsl->blockCount-1) <= pslTStart(chainedPsl, insertIdx));
-}
-
-static int findChainPointInternal(struct psl* chainedPsl,
-                                  struct psl* nextPsl)
-/* find internal insertion point between block of chainedPsl. */
-{
-int insertIdx;
-for (insertIdx = 1; insertIdx < chainedPsl->blockCount; insertIdx++)
-    {
-    if (isBetweenBlocks(chainedPsl, nextPsl, insertIdx))
-        return insertIdx;
-    }
-return CHAIN_CAN_NOT;
-}
-
-static int findChainPoint(struct psl* chainedPsl,
-                          struct psl* nextPsl)
-/* Return the position in the chained PSL where a PSL can be added.  This
- * returns the block-index of where to insert the PSL, blockCount if to added
- * after, or one of the negative constants if it should not be chained.
- * assumes it chainedPsl has longest target span so we only look for insert
- * in chainPsl and not the other way around. */
-{
-if (!(sameString(chainedPsl->tName, nextPsl->tName)
-      && sameString(chainedPsl->strand, nextPsl->strand)))
-        return CHAIN_CAN_NOT;  // not on same seq/strand
-int insertIdx = findChainPointUpstream(chainedPsl, nextPsl);
-if (insertIdx != CHAIN_NOT_HERE)
-    return insertIdx;
-insertIdx = findChainPointDownstream(chainedPsl, nextPsl);
-if (insertIdx != CHAIN_NOT_HERE)
-    return insertIdx;
-insertIdx = findChainPointInternal(chainedPsl, nextPsl);
-return insertIdx;
-}
-
-static void pslArrayInsert(unsigned **destArray,
-                           int numDestBlocks,
-                           unsigned *srcArray,
-                           int numSrcBlocks,
-                           int insertIdx)
-/* Grow one of the psl arrays and move contents down
- * to make room, and copy new entries */
-{
-*destArray = needMoreMem(*destArray, numDestBlocks*sizeof(unsigned), (numDestBlocks+numSrcBlocks)*sizeof(unsigned));
-int iBlk;
-for (iBlk = numDestBlocks-1; iBlk >= insertIdx; iBlk--)
-    {
-    assert(iBlk+numSrcBlocks < numDestBlocks+numSrcBlocks);
-    (*destArray)[iBlk+numSrcBlocks] = (*destArray)[iBlk];
-    }
-for (iBlk = 0; iBlk < numSrcBlocks; iBlk++)
-    {
-    assert(iBlk+insertIdx < numDestBlocks+numSrcBlocks);
-    (*destArray)[iBlk+insertIdx] = srcArray[iBlk];
-    }
-}
-
-static void addPslToChained(struct psl* chainedPsl,
-                            struct psl* nextPsl,
-                            int insertIdx)
-/* add blocks form a psl to the chained psl at the given point */
-{
-assert(insertIdx <= chainedPsl->blockCount);
-// expand arrays and copy in entries
-pslArrayInsert(&chainedPsl->blockSizes, chainedPsl->blockCount, nextPsl->blockSizes, nextPsl->blockCount, insertIdx);
-pslArrayInsert(&chainedPsl->qStarts, chainedPsl->blockCount, nextPsl->qStarts, nextPsl->blockCount, insertIdx);
-pslArrayInsert(&chainedPsl->tStarts, chainedPsl->blockCount, nextPsl->tStarts, nextPsl->blockCount, insertIdx);
-chainedPsl->blockCount += nextPsl->blockCount;
-
-// update bounds if needed
-if (pslQStrand(chainedPsl) == '+')
-    {
-    chainedPsl->qStart = pslQStartStrand(chainedPsl, 0, '+');
-    chainedPsl->qEnd = pslQEndStrand(chainedPsl, chainedPsl->blockCount-1, '+');
-    }
-else
-    {
-    chainedPsl->qStart = pslQStartStrand(chainedPsl, chainedPsl->blockCount-1, '+');
-    chainedPsl->qEnd = pslQEndStrand(chainedPsl, 0, '+');
-    }
-assert(pslTStrand(chainedPsl) == '+');
-chainedPsl->tStart = pslTStartStrand(chainedPsl, 0, '+');
-chainedPsl->tEnd = pslTEndStrand(chainedPsl, chainedPsl->blockCount-1, '+');
-
-// update counts
-chainedPsl->match += nextPsl->match;
-chainedPsl->misMatch += nextPsl->misMatch;
-chainedPsl->repMatch += nextPsl->repMatch;
-chainedPsl->nCount += nextPsl->nCount;
-}
-
-static void chainToLongest(struct psl** queryPsls,
-                           FILE* outPslFh)
-/* pull off one or more psls from the list and chain them */
-{
-struct psl* chainedPsl = slPopHead(queryPsls);
-struct psl* unchainedPsls = NULL;  // ones not included
-struct psl* nextPsl;
-while ((nextPsl = slPopHead(queryPsls)) != NULL)
-    {
-    int insertIdx = findChainPoint(chainedPsl, nextPsl);
-    if (insertIdx >= 0)
-        {
-        addPslToChained(chainedPsl, nextPsl, insertIdx);
-        pslFree(&nextPsl);
-        }
-    else
-        {
-        slAddHead(&unchainedPsls, nextPsl);
-        }
-    }
-pslComputeInsertCounts(chainedPsl);
-pslTabOut(chainedPsl, outPslFh);
-pslFree(&chainedPsl);
-slReverse(&unchainedPsls); // preserve longest to shortest order
-*queryPsls = unchainedPsls;
-}
-
-static void chainQuery(struct psl** queryPsls,
-                       FILE* outPslFh)
-/* make chains for a single query. Takes over ownership
- * of PSLs */
-{
-// sort by genomic location, then pull of a group to
-// to chain
-slSort(queryPsls, pslCmpTargetAndStrandSpan);
-while (*queryPsls != NULL)
-    chainToLongest(queryPsls, outPslFh);
-}
-
-static void pslMapPostChain(char* inPslFile,
-                            char* outPslFile)
-/* do chaining */
-{
-struct hash* pslsByQName = loadPslByQname(inPslFile);
-FILE* outPslFh = mustOpen(outPslFile, "w");
-struct hashEl *hel;
-struct hashCookie cookie = hashFirst(pslsByQName);
-while ((hel = hashNext(&cookie)) != NULL)
-    {
-    struct psl** queryPsls = (struct psl**)&hel->val;
-    chainQuery(queryPsls, outPslFh);
-    }
-carefulClose(&outPslFh);
-}
-
-int main(int argc, char **argv)
-/* entry */
-{
-optionInit(&argc, argv, optionSpecs);
-if (optionExists("h") || optionExists("help"))
-    usage();
-if (argc != 3)
-    usage();
-pslMapPostChain(argv[1], argv[2]);
-return 0;
-}
-
-