017f1ebbfad81dd9a4759d391debdc44b227b595 markd Fri Aug 19 23:25:17 2016 -0700 Cache twoBit sequence header information for the last sequence accessed to avoid rereading N and mask blocks when sequentially reader fragments of the genome. This resulted in a 20% speedup for pslRecalcMatch. diff --git src/inc/twoBit.h src/inc/twoBit.h index 02a33c7..1d9b246 100644 --- src/inc/twoBit.h +++ src/inc/twoBit.h @@ -31,35 +31,42 @@ struct twoBitFile /* Holds header and index info from .2bit file. */ { struct twoBitFile *next; char *fileName; /* Name of this file, for error reporting. */ void *f; /* Open file. */ boolean isSwapped; /* Is byte-swapping needed. */ bits32 version; /* Version of .2bit file */ bits32 seqCount; /* Number of sequences. */ bits32 reserved; /* Reserved, always zero for now. */ struct twoBitIndex *indexList; /* List of sequence. */ struct hash *hash; /* Hash of sequences. */ struct bptFile *bpt; /* Alternative index. */ + + struct twoBit *seqCache; /* Cache information about last sequence accessed, including + * nBlock and mask block. This doesn't include the data. + * This speeds fragment reads. */ + bits64 dataOffsetCache; /* file offset of data for seqCache seqeunce */ + /* the routines we use to access the twoBit. * These may be UDC routines, or stdio */ void (*ourSeek)(void *file, bits64 offset); void (*ourSeekCur)(void *file, bits64 offset); + bits64 (*ourTell)(void *file); bits32 (*ourReadBits32)(void *f, boolean isSwapped); void (*ourClose)(void *pFile); boolean (*ourFastReadString)(void *f, char buf[256]); void (*ourMustRead)(void *file, void *buf, size_t size); }; struct twoBitSpec /* parsed .2bit file and sequence specs */ { char *fileName; /* path to file */ struct twoBitSeqSpec *seqs; /* list of sequences and subsequences */ }; struct twoBitSeqSpec /* specification for a seq or subsequence in a .2bit file */