bfe5bd345bdaf071673eb7e81114502a020b33b9 markd Sun Sep 27 22:55:49 2015 -0700 added checks for numBase and numInsert fields to pslCheck diff --git src/lib/psl.c src/lib/psl.c index db0c111..f85b1d9 100644 --- src/lib/psl.c +++ src/lib/psl.c @@ -1501,57 +1501,88 @@ // check that block start/end matches overall start end unsigned pStartStrand = pStart, pEndStrand = pEnd; if (pStrand != '+') reverseUnsignedRange(&pStartStrand, &pEndStrand, pSize); unsigned lastBlkEnd = pBlockStarts[blockCount-1] + (blockSizeMult * blockSizes[blockCount-1]); if ((pStartStrand != pBlockStarts[0]) || (pEndStrand != lastBlkEnd)) chkError(pslDesc, out, psl, errCount, "\t%s strand \"%c\" adjusted %cStart-%cEnd range %u-%u != block range %u-%u\n", pName, pStrand, pCLabel, pCLabel, pStartStrand, pEndStrand, pBlockStarts[0], lastBlkEnd); for (iBlk = 0; iBlk < blockCount; iBlk++) chkBlkRanges(pslDesc, out, psl, pName, pLabel, pCLabel, pStrand, pSize, pStart, pEnd, iBlk, blockSizes, pBlockStarts, errCount); } + +static void chkInsertCounts(char* pslDesc, FILE* out, struct psl* psl, + char* pName, char pCLabel, unsigned* pBlockStarts, + unsigned pNumInsert, unsigned pBaseInsert, + int* errCount) +/* check the insert counts, incrementing errorCnt */ +{ +unsigned numInsert = 0, baseInsert = 0; +int iBlk; + +for (iBlk = 1; iBlk < psl->blockCount; iBlk++) + { + unsigned gapSize = pBlockStarts[iBlk] - (pBlockStarts[iBlk-1]+psl->blockSizes[iBlk-1]); + if (gapSize > 0) + { + numInsert++; + baseInsert += gapSize; + } + } +if (numInsert != pNumInsert) + chkError(pslDesc, out, psl, errCount, + "\t%s %cNumInsert %u != expected %u\n", + pName, pCLabel, pNumInsert, numInsert); +if (baseInsert != pBaseInsert) + chkError(pslDesc, out, psl, errCount, + "\t%s %cBaseInsert %u != expected %u\n", + pName, pCLabel, pBaseInsert, baseInsert); +} + int pslCheck(char *pslDesc, FILE* out, struct psl* psl) /* Validate a PSL for consistency. pslDesc is printed the error messages * to file out (open /dev/null to discard). Return count of errors. */ { static char* VALID_STRANDS[] = { "+", "-", "++", "+-", "-+", "--", NULL }; int i, errCount = 0; int tBlockSizeMult = pslIsProtein(psl) ? 3 : 1; /* check strand value */ for (i = 0; VALID_STRANDS[i] != NULL; i++) { if (strcmp(psl->strand, VALID_STRANDS[i]) == 0) break; } if (VALID_STRANDS[i] == NULL) chkError(pslDesc, out, psl, &errCount, "\tinvalid PSL strand: \"%s\"\n", psl->strand); /* check target */ chkRanges(pslDesc, out, psl, psl->tName, "target", 't', pslTStrand(psl), psl->tSize, psl->tStart, psl->tEnd, psl->blockCount, psl->blockSizes, psl->tStarts, tBlockSizeMult, &errCount); +chkInsertCounts(pslDesc, out, psl, psl->tName, 't', psl->tStarts, psl->tNumInsert, psl->tBaseInsert, &errCount); /* check query */ chkRanges(pslDesc, out, psl, psl->qName, "query", 'q', pslQStrand(psl), psl->qSize, psl->qStart, psl->qEnd, psl->blockCount, psl->blockSizes, psl->qStarts, 1, &errCount); +chkInsertCounts(pslDesc, out, psl, psl->qName, 'q', psl->qStarts, psl->qNumInsert, psl->qBaseInsert, &errCount); return errCount; } struct hash *readPslToBinKeeper(char *sizeFileName, char *pslFileName) /* read a list of psls and return results in hash of binKeeper structure for fast query*/ { struct binKeeper *bk; struct psl *psl; struct lineFile *sf = lineFileOpen(sizeFileName, TRUE); struct lineFile *pf = lineFileOpen(pslFileName , TRUE); struct hash *hash = newHash(0); char *chromRow[2]; char *row[21] ;