87160c4c6abc63ea1732458d5b58ca8bd420528c markd Wed Aug 30 08:53:24 2017 -0700 added option to pslCheck to not check insert sizes diff --git src/lib/psl.c src/lib/psl.c index 8f295be..9990075 100644 --- src/lib/psl.c +++ src/lib/psl.c @@ -1529,63 +1529,76 @@ { numInsert++; baseInsert += gapSize; } } if (numInsert != pNumInsert) chkError(pslDesc, out, psl, errCount, "\t%s %cNumInsert %u != expected %u\n", pName, pCLabel, pNumInsert, numInsert); if (baseInsert != pBaseInsert) chkError(pslDesc, out, psl, errCount, "\t%s %cBaseInsert %u != expected %u\n", pName, pCLabel, pBaseInsert, baseInsert); } -int pslCheck(char *pslDesc, FILE* out, struct psl* psl) -/* Validate a PSL for consistency. pslDesc is printed the error messages - * to file out (open /dev/null to discard). Return count of errors. */ +int pslCheck2(unsigned opts, char *pslDesc, FILE* out, struct psl* psl) +/* Validate a PSL for consistency. pslDesc is printed the error messages to + * file out (open /dev/null to discard). Return count of errors. Option + * PSL_CHECK_IGNORE_INSERT_CNTS doesn't validate problems insert counts fields + * in each PSL. Useful because protein PSL doesn't seen to compute these in a + * consistent way. + */ { static char* VALID_STRANDS[] = { "+", "-", "++", "+-", "-+", "--", NULL }; int i, errCount = 0; int tBlockSizeMult = pslIsProtein(psl) ? 3 : 1; /* check strand value */ for (i = 0; VALID_STRANDS[i] != NULL; i++) { if (strcmp(psl->strand, VALID_STRANDS[i]) == 0) break; } if (VALID_STRANDS[i] == NULL) chkError(pslDesc, out, psl, &errCount, "\tinvalid PSL strand: \"%s\"\n", psl->strand); /* check query */ chkRanges(pslDesc, out, psl, psl->qName, "query", 'q', pslQStrand(psl), psl->qSize, psl->qStart, psl->qEnd, psl->qStarts, 1, &errCount); +if ((opts & PSL_CHECK_IGNORE_INSERT_CNTS) == 0) chkInsertCounts(pslDesc, out, psl, psl->qName, 'q', psl->qStarts, psl->qNumInsert, psl->qBaseInsert, &errCount); /* check target */ chkRanges(pslDesc, out, psl, psl->tName, "target", 't', pslTStrand(psl), psl->tSize, psl->tStart, psl->tEnd, psl->tStarts, tBlockSizeMult, &errCount); +if ((opts & PSL_CHECK_IGNORE_INSERT_CNTS) == 0) chkInsertCounts(pslDesc, out, psl, psl->tName, 't', psl->tStarts, psl->tNumInsert, psl->tBaseInsert, &errCount); return errCount; } +int pslCheck(char *pslDesc, FILE* out, struct psl* psl) +/* Validate a PSL for consistency. pslDesc is printed the error messages + * to file out (open /dev/null to discard). Return count of errors. */ +{ +return pslCheck2(0, pslDesc, out, psl); +} + struct hash *readPslToBinKeeper(char *sizeFileName, char *pslFileName) /* read a list of psls and return results in hash of binKeeper structure for fast query*/ { struct binKeeper *bk; struct psl *psl; struct lineFile *sf = lineFileOpen(sizeFileName, TRUE); struct lineFile *pf = lineFileOpen(pslFileName , TRUE); struct hash *hash = newHash(0); char *chromRow[2]; char *row[21] ; while (lineFileRow(sf, chromRow)) { char *name = chromRow[0]; int size = lineFileNeedNum(sf, chromRow, 1);