5a4c9d70e2416f53241781d17549445b2bc80d42 galt Sat Aug 6 00:09:25 2011 -0700 adding 4k readAhead buffer for sparseData reduces thousands of reads down to a dozen diff --git src/lib/udc.c src/lib/udc.c index 15420fe..e94877e 100644 --- src/lib/udc.c +++ src/lib/udc.c @@ -73,30 +73,32 @@ struct udcFile /* A file handle for our caching system. */ { struct udcFile *next; /* Next in list. */ char *url; /* Name of file - includes protocol */ char *protocol; /* The URL up to the first colon. http: etc. */ struct udcProtocol *prot; /* Protocol specific data and methods. */ time_t updateTime; /* Last modified timestamp. */ bits64 size; /* Size of file. */ bits64 offset; /* Current offset in file. */ char *cacheDir; /* Directory for cached file parts. */ char *bitmapFileName; /* Name of bitmap file. */ char *sparseFileName; /* Name of sparse data file. */ int fdSparse; /* File descriptor for sparse data file. */ + char *sparseReadAhead; /* Read-ahead buffer, if any */ + bits64 sparseRAOffset; /* Read-ahead buffer offset */ struct udcBitmap *bits; /* udcBitMap */ bits64 startData; /* Start of area in file we know to have data. */ bits64 endData; /* End of area in file we know to have data. */ bits32 bitmapVersion; /* Version of associated bitmap we were opened with. */ struct connInfo connInfo; /* Connection info for open net connection. */ }; struct udcBitmap /* The control structure including the bitmap of blocks that are cached. */ { struct udcBitmap *next; /* Next in list. */ bits32 blockSize; /* Number of bytes per block of file. */ bits64 remoteUpdate; /* Remote last update time. */ bits64 fileSize; /* File size */ bits32 version; /* Version - increments each time cache is stale. */ @@ -910,30 +912,31 @@ /* Close down cached file. */ { struct udcFile *file = *pFile; if (file != NULL) { if (file->connInfo.socket != 0) mustCloseFd(&(file->connInfo.socket)); if (file->connInfo.ctrlSocket != 0) mustCloseFd(&(file->connInfo.ctrlSocket)); freeMem(file->url); freeMem(file->protocol); udcProtocolFree(&file->prot); freeMem(file->cacheDir); freeMem(file->bitmapFileName); freeMem(file->sparseFileName); + freeMem(file->sparseReadAhead); mustCloseFd(&(file->fdSparse)); udcBitmapClose(&file->bits); } freez(pFile); } static void qDecode(const char *input, char *buf, size_t size) /* Reverse the qEncode performed on afterProcotol above into buf or abort. */ { safecpy(buf, size, input); char c, *r = buf, *w = buf; while ((c = *r++) != '\0') { if (c == 'Q') { @@ -1197,64 +1200,126 @@ { udcFetchMissing(file, bits, s, e); } else { ok = FALSE; verbose(2, "udcCachePreload version check failed %d vs %d", bits->version, file->bitmapVersion); } if (!ok) break; } return ok; } +#define READAHEADBUFSIZE 4096 int udcRead(struct udcFile *file, void *buf, int size) /* Read a block from file. Return amount actually read. */ { + /* Figure out region of file we're going to read, and clip it against file size. */ bits64 start = file->offset; if (start > file->size) return 0; bits64 end = start + size; if (end > file->size) end = file->size; size = end - start; +/* use read-ahead buffer if present */ +int bytesRead = 0; + +bits64 raStart; +bits64 raEnd; +while(TRUE) + { + raStart = file->sparseRAOffset; + raEnd = raStart+READAHEADBUFSIZE; + if (file->sparseReadAhead) + { + if (start >= raStart && start < raEnd) + { + // copy bytes out of rabuf + int endInBuf = min(raEnd, end); + int sizeInBuf = endInBuf - start; + memcpy(buf, file->sparseReadAhead + (start-raStart), sizeInBuf); + buf += sizeInBuf; + bytesRead += sizeInBuf; + start = raEnd; + size -= sizeInBuf; + file->offset += sizeInBuf; + mustLseek(file->fdSparse, start, SEEK_SET); + if (size == 0) + break; + } + else + { + freez(&file->sparseReadAhead); + } + } + + int saveEnd = end; + if (size < READAHEADBUFSIZE) + { + file->sparseReadAhead = needMem(READAHEADBUFSIZE); + file->sparseRAOffset = start; + size = READAHEADBUFSIZE; + end = start + size; + if (end > file->size) + { + end = file->size; + size = end - start; + } + } + /* If we're outside of the window of file we already know is good, then have to * consult cache on disk, and maybe even fetch data remotely! */ if (start < file->startData || end > file->endData) { if (!udcCachePreload(file, start, size)) { verbose(2, "udcCachePreload failed"); - return 0; + bytesRead = 0; + break; } /* Currently only need fseek here. Would be safer, but possibly * slower to move fseek so it is always executed in front of read, in * case other code is moving around file pointer. */ mustLseek(file->fdSparse, start, SEEK_SET); } + if (file->sparseReadAhead) + { + mustReadFd(file->fdSparse, file->sparseReadAhead, size); + end = saveEnd; + size = end - start; + } + else + { mustReadFd(file->fdSparse, buf, size); file->offset += size; -return size; + bytesRead += size; + break; + } + } + +return bytesRead; } void udcMustRead(struct udcFile *file, void *buf, int size) /* Read a block from file. Abort if any problem, including EOF before size is read. */ { int sizeRead = udcRead(file, buf, size); if (sizeRead < size) errAbort("udc couldn't read %d bytes from %s, did read %d", size, file->url, sizeRead); } int udcGetChar(struct udcFile *file) /* Get next character from file or die trying. */ { UBYTE b; udcMustRead(file, &b, 1);