87160c4c6abc63ea1732458d5b58ca8bd420528c markd Wed Aug 30 08:53:24 2017 -0700 added option to pslCheck to not check insert sizes diff --git src/inc/psl.h src/inc/psl.h index 5504b31..eac4fb2 100644 --- src/inc/psl.h +++ src/inc/psl.h @@ -24,30 +24,33 @@ #ifndef DNASEQ_H #include "dnaseq.h" #endif /* Some forward declarations of structures used but not defined here. */ struct rbTree; #define PSL_NUM_COLS 21 /* number of columns in a PSL */ #define PSLX_NUM_COLS 23 /* number of columns in a PSLX */ #define PSL_XA_FORMAT 0x04 /* add XA format columns */ /* options for pslFromAlign */ #define PSL_IS_SOFTMASK 0x01 /* lower case are mask */ +/* options for pslCheck */ +#define PSL_CHECK_IGNORE_INSERT_CNTS 0x01 /* Don't check insert counts in psl */ + struct psl /* Summary info about a patSpace alignment */ { struct psl *next; /* Next in singly linked list. */ unsigned match; /* Number of bases that match that aren't repeats */ unsigned misMatch; /* Number of bases that don't match */ unsigned repMatch; /* Number of bases that match but are part of repeats */ unsigned nCount; /* Number of 'N' bases */ unsigned qNumInsert; /* Number of inserts in query */ int qBaseInsert; /* Number of bases inserted in query */ unsigned tNumInsert; /* Number of inserts in target */ int tBaseInsert; /* Number of bases inserted in target */ char strand[3]; /* + or - for strand */ char *qName; /* Query sequence name */ unsigned qSize; /* Query sequence size */ @@ -249,30 +252,38 @@ void pslDump(struct psl *psl, FILE *f); /* Dump most of PSL to file - for debugging. */ struct psl *pslTrimToTargetRange(struct psl *oldPsl, int tMin, int tMax); /* Return psl trimmed to fit inside tMin/tMax. Note this does not * update the match/misMatch and related fields. */ struct psl *pslTrimToQueryRange(struct psl *oldPsl, int qMin, int qMax); /* Return psl trimmed to fit inside qMin/qMax. Note this does not * update the match/misMatch and related fields. */ int pslCheck(char *pslDesc, FILE* out, struct psl* psl); /* Validate a PSL for consistency. pslDesc is printed the error messages * to file out (open /dev/null to discard). Return count of errors. */ +int pslCheck2(unsigned opts, char *pslDesc, FILE* out, struct psl* psl); +/* Validate a PSL for consistency. pslDesc is printed the error messages to + * file out (open /dev/null to discard). Return count of errors. Option + * PSL_CHECK_IGNORE_INSERT_CNTS doesn't validate problems insert counts fields + * in each PSL. Useful because protein PSL doesn't seen to compute these in a + * consistent way. + */ + int pslCountBlocks(struct psl *target, struct psl *query, int maxBlockGap); /* count the number of blocks in the query that overlap the target */ /* merge blocks that are closer than maxBlockGap */ struct hash *readPslToBinKeeper(char *sizeFileName, char *pslFileName); /* read a list of psls and return results in hash of binKeeper structure for fast query*/ boolean pslIsProtein(const struct psl *psl); /* is psl a protein psl (are it's blockSizes and scores in protein space) */ struct psl* pslFromAlign(char *qName, int qSize, int qStart, int qEnd, char *qString, char *tName, int tSize, int tStart, int tEnd, char *tString, char* strand, unsigned options); /* Create a PSL from an alignment. Options PSL_IS_SOFTMASK if lower case * bases indicate repeat masking. Returns NULL if alignment is empty after