b31907d700c1fe956e4e4c20e64d91de027d7c84 markd Tue May 14 02:03:33 2024 -0700 merge blatHuge implementation diff --git src/jkOwnLib/genoFind.c src/jkOwnLib/genoFind.c index a4e0e12..4e20111 100644 --- src/jkOwnLib/genoFind.c +++ src/jkOwnLib/genoFind.c @@ -6,34 +6,73 @@ #include <sys/mman.h> #include "portable.h" #include "obscure.h" #include "dnautil.h" #include "dnaseq.h" #include "nib.h" #include "twoBit.h" #include "fa.h" #include "dystring.h" #include "errAbort.h" #include "sig.h" #include "ooc.h" #include "genoFind.h" #include "trans3.h" #include "binRange.h" +#include "portable.h" +#include <sys/mman.h> static char indexFileMagic[] = "genoFind"; static char indexFileVerison[] = "1.0"; +#undef DEBUG_CLUMP +#undef DEBUG_HITS + +#ifdef DEBUG_CLUMP +static void dumpClump(struct gfClump *clump, FILE *f) +/* Print out a clump */ +{ +struct gfSeqSource *target = clump->target; +char *tName = target->fileName; +if (tName == NULL) tName = target->seq->name; +fprintf(f, GFOFFSET_FMT "-" GFOFFSET_FMT "\t%s:" GFOFFSET_FMT "-" GFOFFSET_FMT "\n", + clump->qStart, clump->qEnd, tName, clump->tStart, clump->tEnd); +} + +static void dumpClumpList(struct gfClump *clumpList, FILE *f) +/* Dump list of clumps. */ +{ +fprintf(f, "=== Clumps\n"); +struct gfClump *clump; +for (clump = clumpList; clump != NULL; clump = clump->next) + dumpClump(clump, f); +fflush(f); +} +#endif /* DEBUG_CLUMP */ + +#ifdef DEBUG_HITS +static void dumpHits(struct gfHit *hits, FILE *f) +/* Dump list of hits. */ +{ +fprintf(f, "=== Hits\n"); +for (struct gfHit *hit = hits; hit != NULL; hit = hit->next) + fprintf(f, GFOFFSET_FMT " " GFOFFSET_FMT " " GFOFFSET_FMT "\n", + hit->qStart, hit->tStart, hit->diagonal); +fflush(f); +} +#endif /* DEBUG_HITS */ + char *gfSignature() /* Return signature that starts each command to gfServer. Helps defend * server from confused clients. */ { static char signature[] = "0ddf270562684f29"; return signature; } volatile boolean pipeBroke = FALSE; /* Flag broken pipes here. */ static void gfPipeHandler(int sigNum) /* Set broken pipe flag. */ { pipeBroke = TRUE; } @@ -79,59 +118,109 @@ if (!gf->isMapped) { freeMem(gf->listSizes); freeMem(gf->allocated); } if ((sources = gf->sources) != NULL) { for (i=0; i<gf->sourceCount; ++i) bitFree(&sources[i].maskedBits); freeMem(sources); } freez(pGenoFind); } } +/* constants for endList */ +#if GFSERVER_HUGE +#define ENDLIST_ENTRY_NUM_PARTS 3 // 48-bits endList entry parts +#else +#define ENDLIST_ENTRY_NUM_PARTS 5 // 80-bits endList entry parts +#endif + +#define ENDLIST_ENTRY_SIZE (ENDLIST_ENTRY_NUM_PARTS * sizeof(endListPart)) +/* byte size of an entry in endList array */ + +INLINE endListPart* endListGetEntry(struct genoFind *gf, int iRow, int iCol) +/* get an entry in the endlist array */ +{ +assert(iCol < gf->listSizes[iRow]); +bits8 *row = (bits8*)gf->endLists[iRow]; +return (endListPart*)(row + (iCol * ENDLIST_ENTRY_SIZE)); +} + +INLINE void endListSetEntryValues(struct genoFind *gf, int iRow, int iCol, + bits16 tileTail, gfOffset offset) +/* store values in an endList entry */ +{ +endListPart* entry = endListGetEntry(gf, iRow, iCol); +entry[0] = tileTail; +#if GFSERVER_HUGE +entry[1] = offset >> 32; +entry[2] = (offset >> 16) & 0xffff; +entry[3] = offset & 0xffff; +#else +entry[1] = offset >> 16; +entry[2] = offset & 0xffff; +#endif +} + +INLINE bits16 endListEntryTileTail(endListPart *entry) +/* get endList tileTail value */ +{ +return entry[0]; +} + +INLINE gfOffset endListEntryOffset(endListPart *entry) +/* get endList genome offset */ +{ +#if GFSERVER_HUGE +return ((bits64)entry[1] << 32) | ((bits64)entry[2] << 16) | (bits64)entry[3]; +#else +return (entry[1] << 16) | entry[2]; +#endif +} + static off_t mustSeekAligned(FILE *f) /* seek so that the current offset is 64-bit aligned */ { off_t off = ftell(f); if ((off & 0x7) != 0) { off = (off & ~0x7) + 0x8; mustSeek(f, off, SEEK_SET); } return off; } struct genoFindFileHdr /* header for genoFind section in file */ { int maxPat; int minMatch; int maxGap; int tileSize; int stepSize; int tileSpaceSize; int tileMask; int sourceCount; bool isPep; bool allowOneMismatch; bool noSimpRepMask; int segSize; - int totalSeqSize; + gfOffset totalSeqSize; off_t sourcesOff; // offset of sequences sources off_t listSizesOff; // offset of listSizes off_t listsOff; // offset of lists or endLists off_t endListsOff; // Reserved area. These are bytes of zero, so that need fields can be added // that default to zero without needed check the version in code. bits64 reserved[32]; // vesion 1.0: 32 words }; static void genoFindInitHdr(struct genoFind *gf, struct genoFindFileHdr *hdr) /* fill in the file header struct from in-memory struct */ { @@ -173,40 +262,40 @@ static void genoFindWriteSource(struct gfSeqSource *ss, FILE *f) /* write a gfSeqSource object */ { if ((ss->seq != NULL) || (ss->maskedBits != NULL)) errAbort("can't write index contained sequences, must used external sequence file"); char *fileName = ss->fileName; if (fileName != NULL) { // don't include directories char *s = strrchr(fileName, '/'); if (s != NULL) fileName = s + 1; } writeStringSafe(f, fileName); -mustWrite(f, &ss->start, sizeof(bits32)); -mustWrite(f, &ss->end, sizeof(bits32)); +mustWrite(f, &ss->start, sizeof(gfOffset)); +mustWrite(f, &ss->end, sizeof(gfOffset)); } static void genoFindReadSource(FILE *f, struct gfSeqSource *ss) /* read a gfSeqSource from file */ { ss->fileName = readString(f); -mustRead(f, &ss->start, sizeof(bits32)); -mustRead(f, &ss->end, sizeof(bits32)); +mustRead(f, &ss->start, sizeof(gfOffset)); +mustRead(f, &ss->end, sizeof(gfOffset)); } static off_t genoFindWriteSources(struct genoFind *gf, FILE *f) /* write the sources to the file */ { off_t off = mustSeekAligned(f); int i; for (i = 0; i < gf->sourceCount; i++) genoFindWriteSource(gf->sources + i, f); return off; } static void genoFindReadSources(FILE *f, off_t off, struct genoFind *gf) /* read the sources from the file */ { @@ -229,70 +318,70 @@ static void genoFindMapListSize(void *memMapped, off_t off, struct genoFind *gf) /* map the list sizes into memory */ { gf->listSizes = memMapped + off; } static off_t genoFindWriteLists(struct genoFind *gf, FILE *f) /* write the lists */ { off_t off = mustSeekAligned(f); size_t count = 0; int i; for (i = 0; i < gf->tileSpaceSize; i++) if (gf->listSizes[i] < gf->maxPat) count += gf->listSizes[i]; -mustWrite(f, gf->allocated, count*sizeof(bits32)); +mustWrite(f, gf->allocated, count*sizeof(gfOffset)); return off; } static void genoFindMapLists(void *memMapped, off_t off, struct genoFind *gf) /* maps the lists into memory */ { gf->allocated = memMapped + off; gf->lists = needHugeZeroedMem(gf->tileSpaceSize * sizeof(gf->lists[0])); -bits32 *cur = gf->allocated; +gfOffset *cur = gf->allocated; size_t count = 0; int i; for (i = 0; i < gf->tileSpaceSize; i++) { if (gf->listSizes[i] < gf->maxPat) { gf->lists[i] = cur; cur += gf->listSizes[i]; count += gf->listSizes[i]; } } } static off_t genoFindWriteEndLists(struct genoFind *gf, FILE *f) /* write the endList */ { off_t off = mustSeekAligned(f); size_t count = 0; int i; for (i = 0; i < gf->tileSpaceSize; i++) count += gf->listSizes[i]; -mustWrite(f, gf->allocated, 3*count*sizeof(bits16)); +mustWrite(f, gf->allocated, count * ENDLIST_ENTRY_SIZE); return off; } static void genoFindMapEndLists(void *memMapped, off_t off, struct genoFind *gf) /* maps the endLists into memory */ { gf->endLists = needHugeZeroedMem(gf->tileSpaceSize * sizeof(gf->endLists[0])); -bits16 *cur = gf->allocated; +endListPart* cur = gf->allocated; size_t count = 0; int i; for (i = 0; i < gf->tileSpaceSize; i++) { gf->endLists[i] = cur; cur += 3 * gf->listSizes[i]; count += gf->listSizes[i]; } } static off_t genoFindWrite(struct genoFind *gf, FILE *f) /* write one genoFind structure, return offset */ { off_t hdrOff = mustSeekAligned(f); @@ -381,45 +470,54 @@ off_t untransOff; off_t transOff[2][3]; // Reserved area. These are bytes of zero, so that need fields can be added // that default to zero without needed check the version in code. bits64 reserved[32]; // vesion 1.0: 32 words }; static void genoFindIndexInitHeader(struct genoFindIndex *gfIdx, struct genoFindIndexFileHdr* hdr) /* fill in the file header struct from in-memory struct */ { zeroBytes(hdr, sizeof(struct genoFindIndexFileHdr)); safecpy(hdr->magic, sizeof(hdr->magic), indexFileMagic); safecpy(hdr->version, sizeof(hdr->version), indexFileVerison); +#ifdef GFSERVER_HUGE +hdr->indexAddressSize = 64; +#else hdr->indexAddressSize = 32; +#endif hdr->isTrans = gfIdx->isTrans; } static void genoFindIndexReadHeader(void* memMapped, struct genoFindIndexFileHdr* hdr, struct genoFindIndex* gfIdx) /* fill in the file header from file and valodate */ { *hdr = *((struct genoFindIndexFileHdr*)memMapped); if (!sameString(hdr->magic, indexFileMagic)) errAbort("wrong magic string for index file"); if (!sameString(hdr->version, indexFileVerison)) errAbort("unsupported version for index file: %s", hdr->version); +#ifdef GFSERVER_HUGE +if (hdr->indexAddressSize != 64) + errAbort("compiled for 64-bit index, but not a 64-bit index: %d", hdr->indexAddressSize); +#else if (hdr->indexAddressSize != 32) - errAbort("not a 32-bit index: %d", hdr->indexAddressSize); + errAbort("compiled for 32-bit index, but not a 32-bit index: %d", hdr->indexAddressSize); +#endif if (hdr->isTrans != gfIdx->isTrans) errAbort("index file has isTrans=%d, isTrans=%d requested", hdr->isTrans, gfIdx->isTrans); } static void genoFindIndexWriteTrans(struct genoFindIndex *gfIdx, struct genoFindIndexFileHdr* hdr, FILE *f) /* write translated indexes */ { int i, j; for (i = 0; i < 2; i++) for (j = 0; j < 3; j++) hdr->transOff[i][j] = genoFindWrite(gfIdx->transGf[i][j], f); } @@ -495,31 +593,31 @@ else { for (int i = 0; i < 2; i++) for (int j = 0; j < 3; j++) genoFindFree(&gfIdx->transGf[i][j]); } if (gfIdx->memMapped != NULL) { if (munmap(gfIdx->memMapped, gfIdx->memLength)) errnoAbort("munmap error"); } freez(pGfIdx); } } -int gfPowerOf20(int n) +static int gfPowerOf20(int n) /* Return a 20 to the n */ { int res = 20; while (--n > 0) res *= 20; return res; } void gfCheckTileSize(int tileSize, boolean isPep) /* Check that tile size is legal. Abort if not. */ { if (isPep) { if (tileSize < 3 || tileSize > 8) errAbort("protein tileSize must be between 3 and 8"); @@ -600,46 +698,46 @@ { static boolean initted = FALSE; if (!initted) { int i; for (i=0; i<ArraySize(ntLookup); ++i) ntLookup[i] = -1; ntLookup['a'] = A_BASE_VAL; ntLookup['c'] = C_BASE_VAL; ntLookup['t'] = T_BASE_VAL; ntLookup['g'] = G_BASE_VAL; initted = TRUE; } } -int gfDnaTile(DNA *dna, int n) +static int gfDnaTile(DNA *dna, int n) /* Make a packed DNA n-mer. */ { int tile = 0; int i, c; for (i=0; i<n; ++i) { tile <<= 2; if ((c = ntLookup[(int)dna[i]]) < 0) return -1; tile += c; } return tile; } -int gfPepTile(AA *pep, int n) +static int gfPepTile(AA *pep, int n) /* Make up packed representation of translated protein. */ { int tile = 0; int aa; while (--n >= 0) { tile *= 20; aa = aaVal[(int)(*pep++)]; if (aa < 0 || aa > 19) return -1; tile += aa; } return tile; } @@ -658,233 +756,225 @@ initNtLookup(); for (i=0; i<=lastTile; i += stepSize) { if ((tile = makeTile(poly, tileHeadSize)) >= 0) { if (listSizes[tile] < maxPat) { listSizes[tile] += 1; } } poly += stepSize; } } -static int gfCountTilesInNib(struct genoFind *gf, int stepSize, char *fileName) +static long long gfCountTilesInNib(struct genoFind *gf, int stepSize, char *fileName) /* Count all tiles in nib file. Returns nib size. */ { FILE *f; int tileSize = gf->tileSize; int bufSize = tileSize * 16*1024; int nibSize, i; int endBuf, basesInBuf; struct dnaSeq *seq; printf("Counting tiles in %s\n", fileName); nibOpenVerify(fileName, &f, &nibSize); for (i=0; i < nibSize; i = endBuf) { endBuf = i + bufSize; if (endBuf >= nibSize) endBuf = nibSize; basesInBuf = endBuf - i; seq = nibLdPart(fileName, f, nibSize, i, basesInBuf); gfCountSeq(gf, seq); freeDnaSeq(&seq); } fclose(f); return nibSize; } -long long maxTotalBases() +static bits64 maxTotalBases() /* Return maximum bases we can index. */ { -long long maxBases = 1024*1024; -maxBases *= 4*1024; -return maxBases; +#ifdef GFSERVER_HUGE +return 18446744073709551615ULL; // 64-bit index +#else +return 4294967295ULL; // 32-bit index +#endif } -static long long twoBitCheckTotalSize(struct twoBitFile *tbf) +static bits64 twoBitCheckTotalSize(struct twoBitFile *tbf) /* Return total size of sequence in two bit file. Squawk and * die if it's more than 4 gig. */ { -long long totalSize = twoBitTotalSize(tbf); +bits64 totalSize = twoBitTotalSize(tbf); if (totalSize > maxTotalBases()) errAbort("Sorry, can only index up to %lld bases, %s has %lld", maxTotalBases(), tbf->fileName, totalSize); return totalSize; } static void gfCountTilesInTwoBit(struct genoFind *gf, int stepSize, - char *fileName, int *retSeqCount, long long *retBaseCount) + char *fileName, int *retSeqCount, bits64 *retBaseCount) /* Count all tiles in 2bit file. Returns number of sequences and * total size of sequences in file. */ { struct dnaSeq *seq; struct twoBitFile *tbf = twoBitOpen(fileName); struct twoBitIndex *index; int seqCount = 0; -long long baseCount = twoBitCheckTotalSize(tbf); +bits64 baseCount = twoBitCheckTotalSize(tbf); printf("Counting tiles in %s\n", fileName); for (index = tbf->indexList; index != NULL; index = index->next) { seq = twoBitReadSeqFragLower(tbf, index->name, 0, 0); gfCountSeq(gf, seq); ++seqCount; freeDnaSeq(&seq); } twoBitClose(&tbf); *retSeqCount = seqCount; *retBaseCount = baseCount; } static int gfAllocLists(struct genoFind *gf) /* Allocate index lists and set up list pointers. * Returns size of all lists. */ { -int oneCount; -int count = 0; +bits64 count = 0; int i; bits32 *listSizes = gf->listSizes; -bits32 **lists = gf->lists; -bits32 *allocated = NULL; -bits32 maxPat = gf->maxPat; -int size; +gfOffset **lists = gf->lists; +gfOffset *allocated = NULL; +gfOffset maxPat = gf->maxPat; int usedCount = 0, overusedCount = 0; int tileSpaceSize = gf->tileSpaceSize; for (i=0; i<tileSpaceSize; ++i) { /* If pattern is too much used it's no good to us, ignore. */ - if ((oneCount = listSizes[i]) < maxPat) + if (listSizes[i] < maxPat) { - count += oneCount; + count += listSizes[i]; usedCount += 1; } else { overusedCount += 1; } } if (count > 0) - gf->allocated = allocated = needHugeMem(count*sizeof(allocated[0])); + gf->allocated = allocated = needHugeMem(count*sizeof(gfOffset)); for (i=0; i<tileSpaceSize; ++i) { - if ((size = listSizes[i]) < maxPat) + if (listSizes[i] < maxPat) { lists[i] = allocated; - allocated += size; + allocated += listSizes[i]; } } return count; } static int gfAllocLargeLists(struct genoFind *gf) /* Allocate large index lists and set up list pointers. * Returns size of all lists. */ { int count = 0; int i; bits32 *listSizes = gf->listSizes; -bits16 **endLists = gf->endLists; -bits16 *allocated = NULL; -int size; +endListPart **endLists = gf->endLists; +endListPart *allocated = NULL; int tileSpaceSize = gf->tileSpaceSize; for (i=0; i<tileSpaceSize; ++i) count += listSizes[i]; if (count > 0) - gf->allocated = allocated = needHugeMem(3*count*sizeof(allocated[0])); + gf->allocated = allocated = needHugeMem(count * ENDLIST_ENTRY_SIZE); for (i=0; i<tileSpaceSize; ++i) { - size = listSizes[i]; endLists[i] = allocated; - allocated += 3*size; + allocated += ENDLIST_ENTRY_SIZE * listSizes[i]; } return count; } -static void gfAddSeq(struct genoFind *gf, bioSeq *seq, bits32 offset) +static void gfAddSeq(struct genoFind *gf, bioSeq *seq, gfOffset offset) /* Add all N-mers in seq. Done after gfCountSeq. */ { char *poly = seq->dna; int tileSize = gf->tileSize; int stepSize = gf->stepSize; int i, lastTile = seq->size - tileSize; int (*makeTile)(char *poly, int n) = (gf->isPep ? gfPepTile : gfDnaTile); int maxPat = gf->maxPat; int tile; bits32 *listSizes = gf->listSizes; -bits32 **lists = gf->lists; +gfOffset **lists = gf->lists; initNtLookup(); for (i=0; i<=lastTile; i += stepSize) { tile = makeTile(poly, tileSize); if (tile >= 0) { if (listSizes[tile] < maxPat) { lists[tile][listSizes[tile]++] = offset; } } offset += stepSize; poly += stepSize; } } -static void gfAddLargeSeq(struct genoFind *gf, bioSeq *seq, bits32 offset) +static void gfAddLargeSeq(struct genoFind *gf, bioSeq *seq, gfOffset offset) /* Add all N-mers to segmented index. Done after gfCountSeq. */ { char *poly = seq->dna; int tileSize = gf->tileSize; int stepSize = gf->stepSize; int tileTailSize = gf->segSize; int tileHeadSize = tileSize - tileTailSize; int i, lastTile = seq->size - tileSize; int (*makeTile)(char *poly, int n) = (gf->isPep ? gfPepTile : gfDnaTile); int tileHead; int tileTail; bits32 *listSizes = gf->listSizes; -bits16 **endLists = gf->endLists; -bits16 *endList; int headCount; initNtLookup(); for (i=0; i<=lastTile; i += stepSize) { tileHead = makeTile(poly, tileHeadSize); tileTail = makeTile(poly + tileHeadSize, tileTailSize); if (tileHead >= 0 && tileTail >= 0) { - endList = endLists[tileHead]; headCount = listSizes[tileHead]++; - endList += headCount * 3; /* Because have three slots per. */ - endList[0] = tileTail; - endList[1] = (offset >> 16); - endList[2] = (offset&0xffff); + endListSetEntryValues(gf, i, headCount, tileTail, offset); } offset += stepSize; poly += stepSize; } } -static int gfAddTilesInNib(struct genoFind *gf, char *fileName, bits32 offset, +static int gfAddTilesInNib(struct genoFind *gf, char *fileName, gfOffset offset, int stepSize) /* Add all tiles in nib file. Returns size of nib file. */ { FILE *f; int tileSize = gf->tileSize; int bufSize = tileSize * 16*1024; int nibSize, i; int endBuf, basesInBuf; struct dnaSeq *seq; printf("Adding tiles in %s\n", fileName); nibOpenVerify(fileName, &f, &nibSize); for (i=0; i < nibSize; i = endBuf) { endBuf = i + bufSize; @@ -939,61 +1029,63 @@ int minMatch, int maxGap, int tileSize, int maxPat, char *oocFile, boolean allowOneMismatch, int stepSize, boolean noSimpRepMask) /* Make index for all nibs and .2bits in list. * minMatch - minimum number of matching tiles to trigger alignments * maxGap - maximum deviation from diagonal of tiles * tileSize - size of tile in nucleotides * maxPat - maximum use of tile to not be considered a repeat * oocFile - .ooc format file that lists repeat tiles. May be NULL. * allowOneMismatch - allow one mismatch in a tile. * stepSize - space between tiles. Zero means default (which is tileSize). * noSimpRepMask - skip simple repeat masking. */ { struct genoFind *gf = gfNewEmpty(minMatch, maxGap, tileSize, stepSize, maxPat, oocFile, FALSE, allowOneMismatch, noSimpRepMask); int i; -bits32 offset = 0, nibSize; +gfOffset offset = 0, nibSize; char *fileName; struct gfSeqSource *ss; -long long totalBases = 0, warnAt = maxTotalBases(); +bits64 totalBases = 0, warnAt = maxTotalBases(); int totalSeq = 0; if (allowOneMismatch) errAbort("Don't currently support allowOneMismatch in gfIndexNibsAndTwoBits"); if (stepSize == 0) stepSize = gf->tileSize; for (i=0; i<fileCount; ++i) { fileName = fileNames[i]; + if (!fileExists(fileName)) + errAbort("Unrecognized genome sequence does not exist %s", fileName); if (twoBitIsFile(fileName)) { int seqCount; - long long baseCount; + bits64 baseCount; gfCountTilesInTwoBit(gf, stepSize, fileName, &seqCount, &baseCount); totalBases += baseCount; totalSeq += seqCount; } else if (nibIsFile(fileName)) { totalBases += gfCountTilesInNib(gf, stepSize, fileName); totalSeq += 1; } else - errAbort("Unrecognized file type %s", fileName); - /* Warn if they exceed 4 gig. */ + errAbort("Unrecognized genome sequence file type %s", fileName); + /* Abort if they exceed size, suggest large sequence blat. */ if (totalBases >= warnAt) - errAbort("Exceeding 4 billion bases, sorry gfServer can't handle that."); + errAbort("Exceeding 4 billion bases, try large genome gfServer."); } gfAllocLists(gf); gfZeroNonOverused(gf); AllocArray(gf->sources, totalSeq); gf->sourceCount = totalSeq; ss = gf->sources; for (i=0; i<fileCount; ++i) { fileName = fileNames[i]; if (nibIsFile(fileName)) { nibSize = gfAddTilesInNib(gf, fileName, offset, stepSize); ss->fileName = cloneString(findTail(fileName, '/')); ss->start = offset; offset += nibSize; @@ -1104,31 +1196,31 @@ for (isRc=0; isRc <= 1; ++isRc) { if (isRc) reverseComplement(seq->dna, seq->size); t3 = trans3New(seq); for (frame = 0; frame < 3; ++frame) { struct genoFind *gf = transGf[isRc][frame]; gfCountSeq(gf, t3->trans[frame]); } trans3Free(&t3); } } static void transIndexBothStrands(struct dnaSeq *seq, - struct genoFind *transGf[2][3], bits32 offset[2][3], + struct genoFind *transGf[2][3], gfOffset offset[2][3], int sourceIx, char *fileName) /* Add unmasked tiles on both strands of sequence to * index. As a side effect this will reverse-complement seq. */ { int isRc, frame; struct trans3 *t3; struct gfSeqSource *ss; for (isRc=0; isRc <= 1; ++isRc) { if (isRc) { reverseComplement(seq->dna, seq->size); } t3 = trans3New(seq); for (frame = 0; frame < 3; ++frame) @@ -1141,35 +1233,35 @@ offset[isRc][frame] += t3->trans[frame]->size; ss->end = offset[isRc][frame]; } trans3Free(&t3); } } void gfIndexTransNibsAndTwoBits(struct genoFind *transGf[2][3], int fileCount, char *fileNames[], int minMatch, int maxGap, int tileSize, int maxPat, char *oocFile, boolean allowOneMismatch, boolean doMask, int stepSize, boolean noSimpRepMask) /* Make translated (6 frame) index for all .nib and .2bit files. */ { struct genoFind *gf; int i,isRc, frame; -bits32 offset[2][3]; +gfOffset offset[2][3]; char *fileName; struct dnaSeq *seq; int sourceCount = 0; -long long totalBases = 0, warnAt = maxTotalBases(); +bits64 totalBases = 0, warnAt = maxTotalBases(); if (allowOneMismatch) errAbort("Don't currently support allowOneMismatch in gfIndexTransNibsAndTwoBits"); /* Allocate indices for all reading frames. */ for (isRc=0; isRc <= 1; ++isRc) { for (frame = 0; frame < 3; ++frame) { transGf[isRc][frame] = gf = gfNewEmpty(minMatch, maxGap, tileSize, stepSize, maxPat, oocFile, TRUE, allowOneMismatch, noSimpRepMask); } } /* Mask simple AA repeats (of period 1 and 2). */ for (isRc = 0; isRc <= 1; ++isRc) @@ -1261,31 +1353,31 @@ gf = transGf[isRc][frame]; gf->totalSeqSize = offset[isRc][frame]; gfZeroOverused(gf); } } } static struct genoFind *gfSmallIndexSeq(struct genoFind *gf, bioSeq *seqList, int minMatch, int maxGap, int tileSize, int maxPat, char *oocFile, boolean isPep, boolean maskUpper) /* Make index for all seqs in list. */ { int seqCount = slCount(seqList); bioSeq *seq; int i; -bits32 offset = 0; +gfOffset offset = 0; struct gfSeqSource *ss; if (isPep) maskSimplePepRepeat(gf); for (seq = seqList; seq != NULL; seq = seq->next) gfCountSeq(gf, seq); gfAllocLists(gf); gfZeroNonOverused(gf); if (seqCount > 0) AllocArray(gf->sources, seqCount); gf->sourceCount = seqCount; for (i=0, seq = seqList; i<seqCount; ++i, seq = seq->next) { gfAddSeq(gf, seq, offset); ss = gf->sources+i; @@ -1297,31 +1389,31 @@ ss->maskedBits = maskFromUpperCaseSeq(seq); } gf->totalSeqSize = offset; gfZeroOverused(gf); return gf; } static struct genoFind *gfLargeIndexSeq(struct genoFind *gf, bioSeq *seqList, int minMatch, int maxGap, int tileSize, int maxPat, char *oocFile, boolean isPep, boolean maskUpper) /* Make index for all seqs in list. */ { int seqCount = slCount(seqList); bioSeq *seq; int i; -bits32 offset = 0; +gfOffset offset = 0; struct gfSeqSource *ss; for (seq = seqList; seq != NULL; seq = seq->next) gfCountSeq(gf, seq); gfAllocLargeLists(gf); gfZeroNonOverused(gf); AllocArray(gf->sources, seqCount); gf->sourceCount = seqCount; for (i=0, seq = seqList; i<seqCount; ++i, seq = seq->next) { gfAddLargeSeq(gf, seq, offset); ss = gf->sources+i; ss->seq = seq; ss->start = offset; offset += seq->size; @@ -1363,46 +1455,46 @@ stepSize = tileSize; if (gf->segSize > 0) { gfLargeIndexSeq(gf, seqList, minMatch, maxGap, tileSize, maxPat, oocFile, isPep, maskUpper); } else { gfSmallIndexSeq(gf, seqList, minMatch, maxGap, tileSize, maxPat, oocFile, isPep, maskUpper); } return gf; } static int bCmpSeqSource(const void *vTarget, const void *vRange) /* Compare function for binary search of gfSeqSource. */ { -const bits32 *pTarget = vTarget; -bits32 target = *pTarget; +const gfOffset *pTarget = vTarget; +gfOffset target = *pTarget; const struct gfSeqSource *ss = vRange; if (target < ss->start) return -1; if (target >= ss->end) return 1; return 0; } -static struct gfSeqSource *findSource(struct genoFind *gf, bits32 targetPos) +static struct gfSeqSource *findSource(struct genoFind *gf, gfOffset targetPos) /* Find source given target position. */ { struct gfSeqSource *ss = bsearch(&targetPos, gf->sources, gf->sourceCount, sizeof(gf->sources[0]), bCmpSeqSource); if (ss == NULL) - errAbort("Couldn't find source for %d", targetPos); + errAbort("Couldn't find source for " GFOFFSET_FMT, targetPos); return ss; } void gfClumpFree(struct gfClump **pClump) /* Free a single clump. */ { struct gfClump *clump; if ((clump = *pClump) == NULL) return; freez(pClump); } void gfClumpFreeList(struct gfClump **pList) /* Free a list of dynamically allocated gfClump's */ { @@ -1411,31 +1503,31 @@ for (el = *pList; el != NULL; el = next) { next = el->next; gfClumpFree(&el); } *pList = NULL; } void gfClumpDump(struct genoFind *gf, struct gfClump *clump, FILE *f) /* Print out info on clump */ { struct gfSeqSource *ss = clump->target; char *name = ss->fileName; if (name == NULL) name = ss->seq->name; -fprintf(f, "%u-%u %s %u-%u, hits %d\n", +fprintf(f, GFOFFSET_FMT "-" GFOFFSET_FMT " %s " GFOFFSET_FMT "-" GFOFFSET_FMT ", hits %d\n", clump->qStart, clump->qEnd, name, clump->tStart - ss->start, clump->tEnd - ss->start, clump->hitCount); #ifdef SOMETIMES for (hit = clump->hitList; hit != NULL; hit = hit->next) fprintf(f, " q %d, t %d, diag %d\n", hit->qStart, hit->tStart, hit->diagonal); #endif } /* Fast sorting routines for sorting gfHits on diagonal. * More or less equivalent to system qsort, but with * comparison function inline. Worth a little tweaking * since this is the bottleneck for the whole procedure. */ static void gfHitSort2(struct gfHit **ptArray, int n); @@ -1626,31 +1718,31 @@ static int gfClumpCmpQueryCoverage(const void *va, const void *vb) /* Compare to sort based on query coverage. */ { const struct gfClump *a = *((struct gfClump **)va); const struct gfClump *b = *((struct gfClump **)vb); return (b->queryCoverage - a->queryCoverage); } static void findClumpBounds(struct gfClump *clump, int tileSize) /* Figure out qStart/qEnd tStart/tEnd from hitList */ { struct gfHit *hit; -int x; +gfOffset x; hit = clump->hitList; if (hit == NULL) return; clump->qStart = clump->qEnd = hit->qStart; clump->tStart = clump->tEnd = hit->tStart; for (hit = hit->next; hit != NULL; hit = hit->next) { x = hit->qStart; if (x < clump->qStart) clump->qStart = x; if (x > clump->qEnd) clump->qEnd = x; x = hit->tStart; if (x < clump->tStart) clump->tStart = x; if (x > clump->tEnd) clump->tEnd = x; } clump->tEnd += tileSize; @@ -1716,31 +1808,31 @@ } clump->hitList = NULL; /* We ate up the hit list. */ gfClumpFree(&clump); } } static int gfNearEnough = 300; static struct gfClump *clumpNear(struct genoFind *gf, struct gfClump *oldClumps, int minMatch) /* Go through clump list and make sure hits are also near each other. * If necessary divide clumps. */ { struct gfClump *newClumps = NULL, *clump, *nextClump; struct gfHit *hit, *nextHit; int tileSize = gf->tileSize; -bits32 lastT; +gfOffset lastT; int nearEnough = (gf->isPep ? gfNearEnough/3 : gfNearEnough); for (clump = oldClumps; clump != NULL; clump = nextClump) { struct gfHit *newHits = NULL, *oldHits = clump->hitList; int clumpSize = 0; clump->hitCount = 0; clump->hitList = NULL; /* Clump no longer owns list. */ nextClump = clump->next; slSort(&oldHits, gfHitCmpTarget); lastT = oldHits->tStart; for (hit = oldHits; hit != NULL; hit = nextHit) { nextHit = hit->next; if (hit->tStart > nearEnough + lastT) @@ -1782,49 +1874,52 @@ return newClumps; } static struct gfClump *clumpHits(struct genoFind *gf, struct gfHit *hitList, int minMatch) /* Clump together hits according to parameters in gf. */ { struct gfClump *clumpList = NULL, *clump = NULL; int maxGap = gf->maxGap; struct gfHit *hit, *nextHit, *lastHit = NULL; int totalHits = 0, usedHits = 0, clumpCount = 0; int tileSize = gf->tileSize; int bucketShift = 16; /* 64k buckets. */ bits32 bucketSize = (1<<bucketShift); int bucketCount = (gf->totalSeqSize >> bucketShift) + 1; int nearEnough = (gf->isPep ? gfNearEnough/3 : gfNearEnough); -bits32 boundary = bucketSize - nearEnough; +gfOffset boundary = bucketSize - nearEnough; int i; struct gfHit **buckets = NULL, **pb; +#ifdef DEBUG_HITS +dumpHits(hitList, stdout); +#endif /* Sort hit list into buckets. */ AllocArray(buckets, bucketCount); for (hit = hitList; hit != NULL; hit = nextHit) { nextHit = hit->next; assert((hit->tStart >> bucketShift) < bucketCount); pb = buckets + (hit->tStart >> bucketShift); slAddHead(pb, hit); } /* Sort each bucket on diagonal and clump. */ for (i=0; i<bucketCount; ++i) { int clumpSize; - bits32 maxT; + gfOffset maxT; struct gfHit *clumpHits; pb = buckets + i; gfHitSortDiagonal(pb); for (hit = *pb; hit != NULL; ) { /* Each time through this loop will get info on a clump. Will only * actually create clump if it is big enough though. */ clumpSize = 0; clumpHits = lastHit = NULL; maxT = 0; for (; hit != NULL; hit = nextHit) { nextHit = hit->next; if (lastHit != NULL && hit->diagonal - lastHit->diagonal > maxGap) break; @@ -1846,134 +1941,130 @@ AllocVar(clump); slAddHead(&clumpList, clump); clump->hitCount = clumpSize; clump->hitList = clumpHits; usedHits += clumpSize; ++clumpCount; } } *pb = NULL; boundary += bucketSize; } clumpList = clumpNear(gf, clumpList, minMatch); gfClumpComputeQueryCoverage(clumpList, tileSize); /* Thanks AG */ slSort(&clumpList, gfClumpCmpQueryCoverage); -#ifdef DEBUG -uglyf("Dumping clumps B\n"); -for (clump = clumpList; clump != NULL; clump = clump->next) /* uglyf */ - { - uglyf(" %d %d %s %d %d (%d hits)\n", clump->qStart, clump->qEnd, clump->target->seq->name, clump->tStart, clump->tEnd, clump->hitCount); - } +#ifdef DEBUG_CLUMP +dumpClumpList(clumpList, stdout); #endif /* DEBUG */ freez(&buckets); return clumpList; } static struct gfHit *gfFastFindDnaHits(struct genoFind *gf, struct dnaSeq *seq, - Bits *qMaskBits, int qMaskOffset, struct lm *lm, int *retHitCount, - struct gfSeqSource *target, int tMin, int tMax) + Bits *qMaskBits, gfOffset qMaskOffset, struct lm *lm, int *retHitCount, + struct gfSeqSource *target, gfOffset tMin, gfOffset tMax) /* Find hits associated with one sequence. This is is special fast * case for DNA that is in an unsegmented index. */ { struct gfHit *hitList = NULL, *hit; -int size = seq->size; +gfOffset size = seq->size; int tileSizeMinusOne = gf->tileSize - 1; int mask = gf->tileMask; DNA *dna = seq->dna; int i, j; -bits32 bits = 0; -bits32 bVal; +gfOffset bits = 0; +gfOffset bVal; int listSize; -bits32 qStart, *tList; +gfOffset qStart, *tList; int hitCount = 0; for (i=0; i<tileSizeMinusOne; ++i) { bVal = ntValNoN[(int)dna[i]]; bits <<= 2; bits += bVal; } for (i=tileSizeMinusOne; i<size; ++i) { bVal = ntValNoN[(int)dna[i]]; bits <<= 2; bits += bVal; bits &= mask; listSize = gf->listSizes[bits]; if (listSize != 0) { qStart = i-tileSizeMinusOne; if (qMaskBits == NULL || bitCountRange(qMaskBits, qStart+qMaskOffset, gf->tileSize) == 0) { tList = gf->lists[bits]; for (j=0; j<listSize; ++j) { - int tStart = tList[j]; + gfOffset tStart = tList[j]; if (target == NULL || (target == findSource(gf, tStart) && tStart >= tMin && tStart < tMax) ) { lmAllocVar(lm, hit); hit->qStart = qStart; hit->tStart = tStart; hit->diagonal = tStart + size - qStart; slAddHead(&hitList, hit); ++hitCount; } } } } } *retHitCount = hitCount; return hitList; } static struct gfHit *gfStraightFindHits(struct genoFind *gf, aaSeq *seq, - Bits *qMaskBits, int qMaskOffset, struct lm *lm, int *retHitCount, - struct gfSeqSource *target, int tMin, int tMax) + Bits *qMaskBits, gfOffset qMaskOffset, struct lm *lm, int *retHitCount, + struct gfSeqSource *target, gfOffset tMin, gfOffset tMax) /* Find hits associated with one sequence in a non-segmented * index where hits match exactly. */ { struct gfHit *hitList = NULL, *hit; int size = seq->size; int tileSize = gf->tileSize; int lastStart = size - tileSize; char *poly = seq->dna; int i, j; int tile; int listSize; -bits32 qStart, *tList; +gfOffset qStart, *tList; int hitCount = 0; int (*makeTile)(char *poly, int n) = (gf->isPep ? gfPepTile : gfDnaTile); initNtLookup(); for (i=0; i<=lastStart; ++i) { tile = makeTile(poly+i, tileSize); if (tile < 0) continue; listSize = gf->listSizes[tile]; if (listSize > 0) { qStart = i; if (qMaskBits == NULL || bitCountRange(qMaskBits, qStart+qMaskOffset, tileSize) == 0) { tList = gf->lists[tile]; for (j=0; j<listSize; ++j) { - int tStart = tList[j]; + gfOffset tStart = tList[j]; if (target == NULL || (target == findSource(gf, tStart) && tStart >= tMin && tStart < tMax) ) { lmAllocVar(lm,hit); hit->qStart = qStart; hit->tStart = tStart; hit->diagonal = tStart + size - qStart; slAddHead(&hitList, hit); ++hitCount; } } } } } *retHitCount = hitCount; @@ -1982,31 +2073,31 @@ static struct gfHit *gfStraightFindNearHits(struct genoFind *gf, aaSeq *seq, Bits *qMaskBits, int qMaskOffset, struct lm *lm, int *retHitCount, struct gfSeqSource *target, int tMin, int tMax) /* Find hits associated with one sequence in a non-segmented * index where hits can mismatch in one letter. */ { struct gfHit *hitList = NULL, *hit; int size = seq->size; int tileSize = gf->tileSize; int lastStart = size - tileSize; char *poly = seq->dna; int i, j; int tile; int listSize; -bits32 qStart, *tList; +gfOffset qStart, *tList; int hitCount = 0; int varPos, varVal; /* Variable position. */ int (*makeTile)(char *poly, int n); int alphabetSize; char oldChar, zeroChar; int *seqValLookup; int posMul, avoid; initNtLookup(); if (gf->isPep) { makeTile = gfPepTile; alphabetSize = 20; zeroChar = 'A'; seqValLookup = aaVal; @@ -2082,55 +2173,56 @@ Bits *qMaskBits, int qMaskOffset, struct lm *lm, int *retHitCount, struct gfSeqSource *target, int tMin, int tMax) /* Find hits associated with one sequence in general case in a segmented * index. */ { struct gfHit *hitList = NULL, *hit; int size = seq->size; int tileSize = gf->tileSize; int tileTailSize = gf->segSize; int tileHeadSize = gf->tileSize - tileTailSize; int lastStart = size - tileSize; char *poly = seq->dna; int i, j; int tileHead, tileTail; int listSize; -bits32 qStart; -bits16 *endList; +gfOffset qStart; +endListPart *endList; int hitCount = 0; int (*makeTile)(char *poly, int n) = (gf->isPep ? gfPepTile : gfDnaTile); initNtLookup(); for (i=0; i<=lastStart; ++i) { if (qMaskBits == NULL || bitCountRange(qMaskBits, i+qMaskOffset, tileSize) == 0) { tileHead = makeTile(poly+i, tileHeadSize); if (tileHead < 0) continue; tileTail = makeTile(poly+i+tileHeadSize, tileTailSize); if (tileTail < 0) continue; listSize = gf->listSizes[tileHead]; qStart = i; endList = gf->endLists[tileHead]; for (j=0; j<listSize; ++j) { - if (endList[0] == tileTail) + endListPart *entry = endListGetEntry(gf, tileHead, j); + if (endListEntryTileTail(entry) == tileTail) { - int tStart = (endList[1]<<16) + endList[2]; + int tStart = endListEntryOffset(entry); if (target == NULL || (target == findSource(gf, tStart) && tStart >= tMin && tStart < tMax) ) { lmAllocVar(lm,hit); hit->qStart = qStart; hit->tStart = tStart; hit->diagonal = tStart + size - qStart; slAddHead(&hitList, hit); ++hitCount; } } endList += 3; } } @@ -2143,32 +2235,32 @@ aaSeq *seq, Bits *qMaskBits, int qMaskOffset, struct lm *lm, int *retHitCount, struct gfSeqSource *target, int tMin, int tMax) /* Find hits associated with one sequence in a segmented * index where one mismatch is allowed. */ { struct gfHit *hitList = NULL, *hit; int size = seq->size; int tileSize = gf->tileSize; int tileTailSize = gf->segSize; int tileHeadSize = gf->tileSize - tileTailSize; int lastStart = size - tileSize; char *poly = seq->dna; int i, j; int tileHead, tileTail; int listSize; -bits32 qStart; -bits16 *endList; +gfOffset qStart; +endListPart *endList; int hitCount = 0; int varPos, varVal; /* Variable position. */ int (*makeTile)(char *poly, int n); int alphabetSize; char oldChar, zeroChar; int headPosMul, tailPosMul, avoid; boolean modTail; int *seqValLookup; initNtLookup(); if (gf->isPep) { makeTile = gfPepTile; alphabetSize = 20; @@ -2200,36 +2292,36 @@ /* Avoid checking the unmodified tile multiple times. */ if (varPos == 0) avoid = -1; else avoid = seqValLookup[(int)oldChar]; if (tileHead >= 0 && tileTail >= 0) { for (varVal=0; varVal<alphabetSize; ++varVal) { if (varVal != avoid) { listSize = gf->listSizes[tileHead]; qStart = i; - endList = gf->endLists[tileHead]; for (j=0; j<listSize; ++j) { - if (endList[0] == tileTail) + endListPart *entry = endListGetEntry(gf, tileHead, j); + if (endListEntryTileTail(entry) == tileTail) { - int tStart = (endList[1]<<16) + endList[2]; + int tStart = endListEntryOffset(entry); if (target == NULL || (target == findSource(gf, tStart) && tStart >= tMin && tStart < tMax) ) { lmAllocVar(lm,hit); hit->qStart = qStart; hit->tStart = tStart; hit->diagonal = tStart + size - qStart; slAddHead(&hitList, hit); ++hitCount; } } endList += 3; } } @@ -2283,49 +2375,30 @@ if (gf->allowOneMismatch) { hitList = gfSegmentedFindNearHits(gf, seq, qMaskBits, qMaskOffset, lm, retHitCount, target, tMin, tMax); } else { hitList = gfSegmentedFindHits(gf, seq, qMaskBits, qMaskOffset, lm, retHitCount, target, tMin, tMax); } } } return hitList; } -#ifdef DEBUG -static void dumpClump(struct gfClump *clump, FILE *f) -/* Print out a clump */ -{ -struct gfSeqSource *target = clump->target; -char *tName = target->fileName; -if (tName == NULL) tName = target->seq->name; -fprintf(f, "%d-%d\t%s:%d-%d\n", clump->qStart, clump->qEnd, tName, clump->tStart, clump->tEnd); -} - -static void dumpClumpList(struct gfClump *clumpList, FILE *f) -/* Dump list of clumps. */ -{ -struct gfClump *clump; -for (clump = clumpList; clump != NULL; clump = clump->next) - dumpClump(clump, f); -} -#endif /* DEBUG */ - struct gfClump *gfFindClumpsWithQmask(struct genoFind *gf, bioSeq *seq, Bits *qMaskBits, int qMaskOffset, struct lm *lm, int *retHitCount) /* Find clumps associated with one sequence soft-masking seq according to qMaskBits */ { struct gfClump *clumpList = NULL; struct gfHit *hitList; int minMatch = gf->minMatch; #ifdef OLD /* stepSize makes this obsolete. */ if (seq->size < gf->tileSize * (gf->minMatch+1)) minMatch = 1; #endif /* OLD */ hitList = gfFindHitsWithQmask(gf, seq, qMaskBits, qMaskOffset, lm, @@ -2495,56 +2568,57 @@ { while (--count >= 0) { if (sameString(source->seq->name, name)) return source; source += 1; } } return NULL; } static void mergeAdd(struct binKeeper *bk, int start, int end, struct gfSeqSource *src) /* Add interval to bin-keeper, merging with any existing overlapping * intervals. */ { +assert((start >= 0) && (end > start)); struct binElement *iEl, *iList = binKeeperFind(bk, start, end); for (iEl = iList; iEl != NULL; iEl = iEl->next) { if (iEl->start < start) start = iEl->start; if (iEl->end > end) end = iEl->end; binKeeperRemove(bk, iEl->start, iEl->end, src); } slFreeList(&iList); binKeeperAdd(bk, start, end, src); } static struct gfClump *pcrClumps(struct genoFind *gf, char *fPrimer, int fPrimerSize, char *rPrimer, int rPrimerSize, int minDistance, int maxDistance) /* Find possible PCR hits. The fPrimer and rPrimer are on the same strand. */ { struct gfClump *clumpList = NULL; int tileSize = gf->tileSize; int fTile; int fTileCount = fPrimerSize - tileSize; int *rTiles, rTile; int rTileCount = rPrimerSize - tileSize; int fTileIx,rTileIx,fPosIx,rPosIx; -bits32 *fPosList, fPos, *rPosList, rPos; -int fPosListSize, rPosListSize; +gfOffset *fPosList, fPos, *rPosList, rPos; +gfOffset fPosListSize, rPosListSize; struct hash *targetHash = newHash(0); /* Build up array of all tiles in reverse primer. */ initNtLookup(); AllocArray(rTiles, rTileCount); for (rTileIx = 0; rTileIx<rTileCount; ++rTileIx) { rTiles[rTileIx] = gfDnaTile(rPrimer + rTileIx, tileSize); if (rTiles[rTileIx] == -1) errAbort("Bad char in reverse primer sequence: %s", rPrimer); } /* Loop through all tiles in forward primer. */ for (fTileIx=0; fTileIx<fTileCount; ++fTileIx) { @@ -2562,31 +2636,31 @@ rTile = rTiles[rTileIx]; rPosListSize = gf->listSizes[rTile]; rPosList = gf->lists[rTile]; for (rPosIx=0; rPosIx < rPosListSize; ++rPosIx) { rPos = rPosList[rPosIx]; if (rPos > fPos) { int distance = rPos - fPos; if (distance >= minDistance && distance <= maxDistance) { struct gfSeqSource *target = findSource(gf, fPos); if (rPos < target->end) { struct binKeeper *bk; - int tStart = target->start; + gfOffset tStart = target->start; char *tName = target->fileName; if (tName == NULL) tName = target->seq->name; if ((bk = hashFindVal(targetHash, tName)) == NULL) { bk = binKeeperNew(0, target->end - tStart); hashAdd(targetHash, tName, bk); } mergeAdd(bk, fPos - tStart, rPos - tStart, target); } } } } } } @@ -2681,15 +2755,16 @@ else if (tileSize == 9) repMatch = 64*256; else if (tileSize == 8) repMatch = 256*256; else if (tileSize == 7) repMatch = 1024*256; else if (tileSize == 6) repMatch = 4*1024*256; else internalErr(); } repMatch *= tileSize; repMatch /= stepSize; return repMatch; } +