c3c65fde6dd5aa6f20860c7113eb9ee22cf35b96 markd Wed Jan 15 08:37:28 2020 -0800 Initial pass at 64bit blat index diff --git src/gfServer/gfServer.c src/gfServer/gfServer.c index b12a7fd..7593018 100644 --- src/gfServer/gfServer.c +++ src/gfServer/gfServer.c @@ -259,31 +259,31 @@ /* Handle a query for DNA/DNA match. */ { struct gfClump *clumpList = NULL, *clump; int limit = 1000; int clumpCount = 0, hitCount = -1; struct lm *lm = lmInit(0); if (seq->size > gf->tileSize + gf->stepSize + gf->stepSize) limit = maxDnaHits; clumpList = gfFindClumps(gf, seq, lm, &hitCount); if (clumpList == NULL) ++missCount; for (clump = clumpList; clump != NULL; clump = clump->next) { struct gfSeqSource *ss = clump->target; - sprintf(buf, "%d\t%d\t%s\t%d\t%d\t%d", + sprintf(buf, "%lld\t%lld\t%s\t%lld\t%lld\t%d", clump->qStart, clump->qEnd, ss->fileName, clump->tStart-ss->start, clump->tEnd-ss->start, clump->hitCount); netSendString(connectionHandle, buf); ++clumpCount; int perSeqCount = -1; if (perSeqMaxHash && ((perSeqCount = hashIntValDefault(perSeqMaxHash, ss->fileName, -1)) >= 0)) { if (perSeqCount >= (maxDnaHits / 2)) break; hashIncInt(perSeqMaxHash, ss->fileName); } else if (--limit < 0) break; } @@ -307,38 +307,38 @@ sprintf(buf, "tileSize %d", tileSize); netSendString(connectionHandle, buf); for (frame = 0; frame < 3; ++frame) clumps[frame] = NULL; for (isRc = 0; isRc <= 1; ++isRc) { strand = (isRc ? '-' : '+'); gfTransFindClumps(transGf[isRc], seq, clumps, lm, &oneHit); hitCount += oneHit; for (frame = 0; frame < 3; ++frame) { int limit = maxTransHits; for (clump = clumps[frame]; clump != NULL; clump = clump->next) { struct gfSeqSource *ss = clump->target; - sprintf(buf, "%d\t%d\t%s\t%d\t%d\t%d\t%c\t%d", + sprintf(buf, "%lld\t%lld\t%s\t%lld\t%lld\t%d\t%c\t%d", clump->qStart, clump->qEnd, ss->fileName, clump->tStart-ss->start, clump->tEnd-ss->start, clump->hitCount, strand, frame); netSendString(connectionHandle, buf); dyStringClear(dy); for (hit = clump->hitList; hit != NULL; hit = hit->next) - dyStringPrintf(dy, " %d %d", hit->qStart, hit->tStart - ss->start); + dyStringPrintf(dy, " %lld %lld", hit->qStart, hit->tStart - ss->start); netSendLongString(connectionHandle, dy->string); ++clumpCount; if (--limit < 0) break; } gfClumpFreeList(&clumps[frame]); } } if (clumpCount == 0) ++missCount; freeDyString(&dy); lmCleanup(&lm); logDebug("%lu %d clumps, %d hits", clock1000(), clumpCount, hitCount); } @@ -361,39 +361,39 @@ clumps[qFrame][tFrame] = NULL; for (isRc = 0; isRc <= 1; ++isRc) { struct lm *lm = lmInit(0); strand = (isRc ? '-' : '+'); gfTransTransFindClumps(transGf[isRc], t3->trans, clumps, lm, &oneCount); hitCount += oneCount; for (qFrame = 0; qFrame<3; ++qFrame) { for (tFrame=0; tFrame<3; ++tFrame) { int limit = maxTransHits; for (clump = clumps[qFrame][tFrame]; clump != NULL; clump = clump->next) { struct gfSeqSource *ss = clump->target; - sprintf(buf, "%d\t%d\t%s\t%d\t%d\t%d\t%c\t%d\t%d", + sprintf(buf, "%lld\t%lld\t%s\t%lld\t%lld\t%d\t%c\t%d\t%d", clump->qStart, clump->qEnd, ss->fileName, clump->tStart-ss->start, clump->tEnd-ss->start, clump->hitCount, strand, qFrame, tFrame); netSendString(connectionHandle, buf); dyStringClear(dy); for (hit = clump->hitList; hit != NULL; hit = hit->next) { - dyStringPrintf(dy, " %d %d", hit->qStart, hit->tStart - ss->start); + dyStringPrintf(dy, " %lld %lld", hit->qStart, hit->tStart - ss->start); } netSendLongString(connectionHandle, dy->string); ++clumpCount; if (--limit < 0) break; } gfClumpFreeList(&clumps[qFrame][tFrame]); } } lmCleanup(&lm); } trans3Free(&t3); if (clumpCount == 0) ++missCount; logDebug("%lu %d clumps, %d hits", clock1000(), clumpCount, hitCount); @@ -401,43 +401,43 @@ static void pcrQuery(struct genoFind *gf, char *fPrimer, char *rPrimer, int maxDistance, int connectionHandle) /* Do PCR query and report results down socket. */ { int fPrimerSize = strlen(fPrimer); int rPrimerSize = strlen(rPrimer); struct gfClump *clumpList, *clump; int clumpCount = 0; char buf[256]; clumpList = gfPcrClumps(gf, fPrimer, fPrimerSize, rPrimer, rPrimerSize, 0, maxDistance); for (clump = clumpList; clump != NULL; clump = clump->next) { struct gfSeqSource *ss = clump->target; - safef(buf, sizeof(buf), "%s\t%d\t%d\t+", ss->fileName, + safef(buf, sizeof(buf), "%s\t%lld\t%lld\t+", ss->fileName, clump->tStart, clump->tEnd); netSendString(connectionHandle, buf); ++clumpCount; } gfClumpFreeList(&clumpList); clumpList = gfPcrClumps(gf, rPrimer, rPrimerSize, fPrimer, fPrimerSize, 0, maxDistance); for (clump = clumpList; clump != NULL; clump = clump->next) { struct gfSeqSource *ss = clump->target; - safef(buf, sizeof(buf), "%s\t%d\t%d\t-", ss->fileName, + safef(buf, sizeof(buf), "%s\t%lld\t%lld\t-", ss->fileName, clump->tStart, clump->tEnd); netSendString(connectionHandle, buf); ++clumpCount; } gfClumpFreeList(&clumpList); netSendString(connectionHandle, "end"); logDebug("%lu PCR %s %s %d clumps\n", clock1000(), fPrimer, rPrimer, clumpCount); } static jmp_buf gfRecover; static char *ripCord = NULL; /* A little memory to give back to system * during error recovery. */ static void gfAbort() @@ -691,56 +691,56 @@ } mustRead(f, &ss->start, sizeof(bits32)); mustRead(f, &ss->end, sizeof(bits32)); // no seq information written/read // no masking information written/read } // listSizes: length = (gf->tileSpaceSize) gf->listSizes = memMapped + ftell(f); mustSeek(f, (gf->tileSpaceSize * sizeof(gf->listSizes[0])), SEEK_CUR); gf->allocated = memMapped + ftell(f); if (gf->segSize == 0) { // use lists gf->lists = needHugeZeroedMem(gf->tileSpaceSize * sizeof(gf->lists[0])); - bits32 *cur = gf->allocated; + void *cur = gf->allocated; size_t count = 0; for (i = 0; i < gf->tileSpaceSize; i++) { if (gf->listSizes[i] < gf->maxPat) { gf->lists[i] = cur; cur += gf->listSizes[i]; count += gf->listSizes[i]; } } - mustSeek(f, count*sizeof(bits32), SEEK_CUR); + mustSeek(f, count*sizeof(gfOffset), SEEK_CUR); } else { // use endLists - gf->endLists = needHugeZeroedMem(gf->tileSpaceSize * sizeof(gf->endLists[0])); - bits16 *cur = gf->allocated; + gf->endLists = needHugeZeroedMem(gf->tileSpaceSize * sizeof(struct endList)); + void *cur = gf->allocated; size_t count = 0; for (i = 0; i < gf->tileSpaceSize; i++) { gf->endLists[i] = cur; - cur += 3 * gf->listSizes[i]; + cur += gf->listSizes[i]; count += gf->listSizes[i]; } - mustSeek(f, 3*count*sizeof(bits16), SEEK_CUR); + mustSeek(f, count*sizeof(struct endList), SEEK_CUR); } return gf; } void loadGenoFindIndex(char *fileName, struct genoFind **gfRet, struct genoFind *transGf[2][3]) /* load indexes from file. Only one of gfRet or transGf is set. */ { FILE *f = mustOpen(fileName, "r"); char fileMagic[sizeof(indexMagic) + 1]; mustRead(f, fileMagic, sizeof(indexMagic)); fileMagic[sizeof(indexMagic)] = '\0'; if (strcmp(fileMagic, indexMagic)) errAbort("wrong magic string for index file"); boolean isTrans; mustRead(f, &isTrans, sizeof(isTrans));