ce07fe9f6695acd811e90fbd646830ca7858b525 markd Mon Sep 28 13:41:50 2015 -0700 fixed various problems detected by making pslCheck more stringent diff --git src/lib/psl.c src/lib/psl.c index 46d0253..23f921f 100644 --- src/lib/psl.c +++ src/lib/psl.c @@ -1548,40 +1548,40 @@ "+", "-", "++", "+-", "-+", "--", NULL }; int i, errCount = 0; int tBlockSizeMult = pslIsProtein(psl) ? 3 : 1; /* check strand value */ for (i = 0; VALID_STRANDS[i] != NULL; i++) { if (strcmp(psl->strand, VALID_STRANDS[i]) == 0) break; } if (VALID_STRANDS[i] == NULL) chkError(pslDesc, out, psl, &errCount, "\tinvalid PSL strand: \"%s\"\n", psl->strand); -/* check target */ -chkRanges(pslDesc, out, psl, psl->tName, "target", 't', pslTStrand(psl), psl->tSize, psl->tStart, psl->tEnd, - psl->tStarts, tBlockSizeMult, &errCount); -chkInsertCounts(pslDesc, out, psl, psl->tName, 't', psl->tStarts, psl->tNumInsert, psl->tBaseInsert, &errCount); - /* check query */ chkRanges(pslDesc, out, psl, psl->qName, "query", 'q', pslQStrand(psl), psl->qSize, psl->qStart, psl->qEnd, psl->qStarts, 1, &errCount); chkInsertCounts(pslDesc, out, psl, psl->qName, 'q', psl->qStarts, psl->qNumInsert, psl->qBaseInsert, &errCount); +/* check target */ +chkRanges(pslDesc, out, psl, psl->tName, "target", 't', pslTStrand(psl), psl->tSize, psl->tStart, psl->tEnd, + psl->tStarts, tBlockSizeMult, &errCount); +chkInsertCounts(pslDesc, out, psl, psl->tName, 't', psl->tStarts, psl->tNumInsert, psl->tBaseInsert, &errCount); + return errCount; } struct hash *readPslToBinKeeper(char *sizeFileName, char *pslFileName) /* read a list of psls and return results in hash of binKeeper structure for fast query*/ { struct binKeeper *bk; struct psl *psl; struct lineFile *sf = lineFileOpen(sizeFileName, TRUE); struct lineFile *pf = lineFileOpen(pslFileName , TRUE); struct hash *hash = newHash(0); char *chromRow[2]; char *row[21] ; while (lineFileRow(sf, chromRow)) @@ -1827,30 +1827,54 @@ * updated to with the new amount of space. */ { int blockSpace = *blockSpacePtr; int newSpace = 2 * blockSpace; ExpandArray(psl->blockSizes, blockSpace, newSpace); ExpandArray(psl->qStarts, blockSpace, newSpace); ExpandArray(psl->tStarts, blockSpace, newSpace); if (psl->qSequence != NULL) { ExpandArray(psl->qSequence, blockSpace, newSpace); ExpandArray(psl->tSequence, blockSpace, newSpace); } *blockSpacePtr = newSpace; } +void pslComputeInsertCounts(struct psl *psl) +/* compute numInsert and baseInsert fields from the blocks */ +{ +psl->qNumInsert = psl->qBaseInsert = 0; +psl->tNumInsert = psl->tBaseInsert = 0; +int iBlk; + +for (iBlk = 1; iBlk < psl->blockCount; iBlk++) + { + unsigned qGapSize = psl->qStarts[iBlk] - (psl->qStarts[iBlk-1]+psl->blockSizes[iBlk-1]); + if (qGapSize != 0) + { + psl->qNumInsert++; + psl->qBaseInsert += qGapSize; + } + unsigned tGapSize = psl->tStarts[iBlk] - (psl->tStarts[iBlk-1]+psl->blockSizes[iBlk-1]); + if (tGapSize != 0) + { + psl->tNumInsert++; + psl->tBaseInsert += tGapSize; + } + } +} + static boolean getNextCigarOp(char *startPtr, boolean reverse, char **ptr, char *op, int *size) /* gets one cigar op out of the CIGAR string. Reverse the order if asked */ { char *str = *ptr; if (str == NULL) return FALSE; if ((!reverse && (*str == 0)) || (reverse && (str == startPtr))) return FALSE; // between each cigar op there could be nothing, or a space, or a plus if (reverse) { char *end = str - 1; @@ -1953,30 +1977,31 @@ psl->qStart = psl->qStarts[0]; psl->qEnd = pslQEnd(psl, psl->blockCount-1); if (strand[0] == '-') reverseIntRange(&psl->qStart, &psl->qEnd, qSize); psl->tStart = psl->tStarts[0]; psl->tEnd = pslTEnd(psl, psl->blockCount-1); if (strand[1] == '-') reverseIntRange(&psl->tStart, &psl->tEnd, tSize); /* sanity check */ if (qNext != qBlkEnd) errAbort("CIGAR query length does not match specified query range %s:%d-%d", qName, qStart, qEnd); if (tNext != tBlkEnd) errAbort("CIGAR target length does not match specified target range %s:%d-%d", tName, tStart, tEnd); psl->match = totalSize; +pslComputeInsertCounts(psl); return psl; } int pslRangeTreeOverlap(struct psl *psl, struct rbTree *rangeTree) /* Return amount that psl overlaps (on target side) with rangeTree. */ { int i; int overlap = 0; boolean isRc = (psl->strand[1] == '-'); for (i=0; i<psl->blockCount; ++i) { int start = psl->tStarts[i]; int end = start + psl->blockSizes[i]; if (isRc) reverseIntRange(&start, &end, psl->tSize);