ba66786a683bce0f6c38efbdcad60686984478b4 markd Sun Jun 11 13:12:41 2023 -0700 correctly handle getting target block end on protein/rna PSLs diff --git src/utils/pslMapPostChain/pslMapPostChain.c src/utils/pslMapPostChain/pslMapPostChain.c index 13e6d74..a03bee7 100644 --- src/utils/pslMapPostChain/pslMapPostChain.c +++ src/utils/pslMapPostChain/pslMapPostChain.c @@ -6,30 +6,31 @@ #include "hash.h" #include "psl.h" #include <string.h> void usage() /* Explain usage and exit. */ { errAbort("pslMapPostChain - Post genomic pslMap (TransMap) chaining.\n" "usage:\n" " pslMapPostChain [options] inPsl outPsl\n" "\n" "Post genomic pslMap (TransMap) chaining. This takes transcripts\n" "that have been mapped via genomic chains adds back in\n" "blocks that didn't get include in genomic chains due\n" "to complex rearrangements or other issues.\n" + "This can also handle other PSLs, including protein-RNA alignments\n" "\n" "This program has not seen much use and may not do what you want\n"); } static struct optionSpec optionSpecs[] = { {"h", OPTION_BOOLEAN}, {"help", OPTION_BOOLEAN}, {NULL, 0} }; /* max distance to attempt to chain over */ static int maxAdjacentDistance = 50*1000*1000; // 50mb /* special status used to in place of chain */ static const int CHAIN_NOT_HERE = -1; @@ -213,48 +214,51 @@ (*destArray)[iBlk+numSrcBlocks] = (*destArray)[iBlk]; } for (iBlk = 0; iBlk < numSrcBlocks; iBlk++) { assert(iBlk+insertIdx < numDestBlocks+numSrcBlocks); (*destArray)[iBlk+insertIdx] = srcArray[iBlk]; } } static void addPslToChained(struct psl* chainedPsl, struct psl* nextPsl, int insertIdx) /* add blocks form a psl to the chained psl at the given point */ { assert(insertIdx <= chainedPsl->blockCount); +if (pslIsProtein(chainedPsl) != pslIsProtein(nextPsl)) + errAbort("can't chain protein/NA with NA/NA alignments: %s %s", chainedPsl->qName, chainedPsl->tName); + // expand arrays and copy in entries pslArrayInsert(&chainedPsl->blockSizes, chainedPsl->blockCount, nextPsl->blockSizes, nextPsl->blockCount, insertIdx); pslArrayInsert(&chainedPsl->qStarts, chainedPsl->blockCount, nextPsl->qStarts, nextPsl->blockCount, insertIdx); pslArrayInsert(&chainedPsl->tStarts, chainedPsl->blockCount, nextPsl->tStarts, nextPsl->blockCount, insertIdx); chainedPsl->blockCount += nextPsl->blockCount; // update bounds if needed if (pslQStrand(chainedPsl) == '+') { chainedPsl->qStart = pslQStartStrand(chainedPsl, 0, '+'); chainedPsl->qEnd = pslQEndStrand(chainedPsl, chainedPsl->blockCount-1, '+'); } else { chainedPsl->qStart = pslQStartStrand(chainedPsl, chainedPsl->blockCount-1, '+'); chainedPsl->qEnd = pslQEndStrand(chainedPsl, 0, '+'); } -assert(pslTStrand(chainedPsl) == '+'); +assert(pslTStrand(chainedPsl) == '+'); // forced on load chainedPsl->tStart = pslTStartStrand(chainedPsl, 0, '+'); chainedPsl->tEnd = pslTEndStrand(chainedPsl, chainedPsl->blockCount-1, '+'); // update counts chainedPsl->match += nextPsl->match; chainedPsl->misMatch += nextPsl->misMatch; chainedPsl->repMatch += nextPsl->repMatch; chainedPsl->nCount += nextPsl->nCount; } static void chainToLongest(struct psl** queryPsls, FILE* outPslFh) /* pull off one or more psls from the list and chain them */ { struct psl* chainedPsl = slPopHead(queryPsls); @@ -273,31 +277,31 @@ slAddHead(&unchainedPsls, nextPsl); } } pslComputeInsertCounts(chainedPsl); pslTabOut(chainedPsl, outPslFh); pslFree(&chainedPsl); slReverse(&unchainedPsls); // preserve longest to shortest order *queryPsls = unchainedPsls; } static void chainQuery(struct psl** queryPsls, FILE* outPslFh) /* make chains for a single query. Takes over ownership * of PSLs */ { -// sort by genomic location, then pull of a group to +// sort by genomic location, then group to chain // to chain slSort(queryPsls, pslCmpTargetAndStrandSpan); while (*queryPsls != NULL) chainToLongest(queryPsls, outPslFh); } static void pslMapPostChain(char* inPslFile, char* outPslFile) /* do chaining */ { struct hash* pslsByQName = loadPslByQname(inPslFile); FILE* outPslFh = mustOpen(outPslFile, "w"); struct hashEl *hel; struct hashCookie cookie = hashFirst(pslsByQName); while ((hel = hashNext(&cookie)) != NULL)