a4887527b333527019132f93bfcebab37e594b5a markd Sat Jun 10 12:17:57 2023 -0700 added tests for various mapping PSL type modes and fixed mode checking bug diff --git src/lib/pslTransMap.c src/lib/pslTransMap.c index 3661d0f..8c3ab0e 100644 --- src/lib/pslTransMap.c +++ src/lib/pslTransMap.c @@ -25,36 +25,48 @@ static char *pslTypeDesc[] = /* description of pslType */ { "unspecified", // pslTypeUnspecified "protein-protein", // pslTypeProtProt "protein-NA", // pslTypeProtNa "NA-NA" // pslTypeNaNa }; static boolean pslTypeQueryIsProtein(enum pslType pslType) /* is the pslType indicate the query is a protein? */ { return (pslType == pslTypeProtProt) || (pslType == pslTypeProtNa); } +static boolean pslTypeQueryIsNa(enum pslType pslType) +/* is the pslType indicate the query is a NA? */ +{ +return !pslTypeQueryIsProtein(pslType); +} + static boolean pslTypeTargetIsProtein(enum pslType pslType) /* is the pslType indicate the target is a protein? */ { return (pslType == pslTypeProtProt); } +static boolean pslTypeTargetIsNa(enum pslType pslType) +/* is the pslType indicate the target is a NA? */ +{ +return !pslTypeTargetIsProtein(pslType); +} + struct block /* Coordinates of a block, which might be aligned or a gap on one side */ { int qStart; /* Query start position. */ int qEnd; /* Query end position. */ int tStart; /* Target start position. */ int tEnd; /* Target end position. */ }; static struct block ZERO_BLOCK = {0, 0, 0, 0}; static void assertBlockAligned(struct block *blk) /* Make sure both sides are same size and not negative length. */ { assert(blk->qStart <= blk->qEnd); @@ -93,30 +105,32 @@ for (int iBlk = 0; iBlk < psl->blockCount; iBlk++) psl->match += psl->blockSizes[iBlk]; pslComputeInsertCounts(psl); } static void pslProtToNAConvert(struct psl *psl) /* convert a protein/NA or protein/protein alignment to a NA/NA alignment */ { boolean isProtNa = pslIsProtein(psl); int iBlk; psl->qStart *= 3; psl->qEnd *= 3; psl->qSize *= 3; +if (!isProtNa) + psl->tSize *= 3; for (iBlk = 0; iBlk < psl->blockCount; iBlk++) { psl->blockSizes[iBlk] *= 3; psl->qStarts[iBlk] *= 3; if (!isProtNa) psl->tStarts[iBlk] *= 3; } setPslBoundsCounts(psl); } static struct psl* createMappedPsl(struct psl* inPsl, struct psl *mapPsl, int mappedPslMax) /* setup a PSL for the output alignment */ { char strand[3]; @@ -332,76 +346,87 @@ // check specified type if (isProtNa && (pslType != pslTypeProtNa)) errAbort("%s PSL %s to %s is a protein to NA alignment, however %s specified", errDesc, psl->qName, psl->tName, pslTypeDesc[pslType]); if ((!isProtNa) && (pslType == pslTypeProtNa)) errAbort("%s PSL %s to %s is not a protein to NA alignment, however %s was specified", errDesc, psl->qName, psl->tName, pslTypeDesc[pslType]); return pslType; } static void checkInMapCompat(struct psl *inPsl, enum pslType inPslType, struct psl *mapPsl, enum pslType mapPslType) /* validate input and mapping alignments are compatible types */ { -if (pslTypeTargetIsProtein(inPslType) != pslTypeQueryIsProtein(mapPslType)) +/* allowed combinations: + * inPslType mapPslType outPslType + * na_na na_na na_na + * prot_prot prot_prot prot_prot + * prot_na na_na cds_na + * prot_prot na_na cds_na + * prot_prot prot_na cds_na + */ + +if (!((pslTypeTargetIsNa(inPslType) && pslTypeQueryIsNa(mapPslType)) || + pslTypeTargetIsProtein(inPslType))) errAbort("input PSL %s to %s type %s is not compatible with mapping PSL %s to %s type %s ", inPsl->qName, inPsl->tName, pslTypeDesc[inPslType], mapPsl->qName, mapPsl->tName, pslTypeDesc[mapPslType]); } struct psl* pslTransMap(unsigned opts, struct psl *inPsl, enum pslType inPslType, struct psl *mapPsl, enum pslType mapPslType) /* map a psl via a mapping psl, a single psl is returned, or NULL if it * couldn't be mapped. psl types are normally set as pslTypeUnspecified, * and assumed to be NA->NA. */ { -/* sanity check size but not name, so names to vary to allow ids to have - * unique-ifying suffixes. */ -if (inPsl->tSize != mapPsl->qSize) - errAbort("Error: inPsl %s tSize (%d) != mapPsl %s qSize (%d)", - inPsl->tName, inPsl->tSize, mapPsl->qName, mapPsl->qSize); - inPslType = determinePslType(inPsl, inPslType, "input PSL"); mapPslType = determinePslType(mapPsl, mapPslType, "mapping PSL"); checkInMapCompat(inPsl, inPslType, mapPsl, mapPslType); // check if need to swap strands boolean rcInPsl = (pslTStrand(inPsl) != pslQStrand(mapPsl)); // need to convert all sides of both alignments to a common coordinate space // all protein or all NA require no conversions. boolean cnvIn = (inPslType == pslTypeProtNa) || ((inPslType == pslTypeProtProt) && (mapPslType != pslTypeProtProt)); boolean cnvMap = (mapPslType == pslTypeProtNa) || (cnvIn && (mapPslType == pslTypeProtProt)); /* ensure common sequence is in same orientation and convert protein PSLs */ char inPslOrigStrand[3]; safef(inPslOrigStrand, sizeof(inPslOrigStrand), "%s", inPsl->strand); if (cnvIn || rcInPsl) inPsl = pslClone(inPsl); if (cnvIn) pslProtToNAConvert(inPsl); if (rcInPsl) pslRc(inPsl); if (cnvMap) { mapPsl = pslClone(mapPsl); pslProtToNAConvert(mapPsl); } +/* sanity check sizes after conversion Don't check name, so names to vary to + * allow ids to have unique-ifying suffixes. */ +if (inPsl->tSize != mapPsl->qSize) + errAbort("Error: inPsl %s tSize (%d) != mapPsl %s qSize (%d)", + inPsl->tName, inPsl->tSize, mapPsl->qName, mapPsl->qSize); + + struct psl* mappedPsl = doMapping(inPsl, mapPsl); /* finish up psl, or free if no blocks were added */ if (mappedPsl->blockCount == 0) pslFree(&mappedPsl); /* nothing made it */ else { setPslBoundsCounts(mappedPsl); adjustOrientation(opts, inPsl, inPslOrigStrand, mappedPsl); } if (cnvIn || rcInPsl) pslFree(&inPsl); if (cnvMap) pslFree(&mapPsl);