0df94f90604087383b6ee79051e4f2bd48c8fc71 galt Fri Jul 22 15:31:07 2011 -0700 speeding up udc by not re-opening bitmap and sparseData files unnecessarilly; also combined udcCacheContains with fetchMissing so that the bitmap would not be getting read twice upon cahe-misses; tested with udcTest both interleave and fork. diff --git src/lib/udc.c src/lib/udc.c index 0e7b23e..15420fe 100644 --- src/lib/udc.c +++ src/lib/udc.c @@ -73,30 +73,31 @@ struct udcFile /* A file handle for our caching system. */ { struct udcFile *next; /* Next in list. */ char *url; /* Name of file - includes protocol */ char *protocol; /* The URL up to the first colon. http: etc. */ struct udcProtocol *prot; /* Protocol specific data and methods. */ time_t updateTime; /* Last modified timestamp. */ bits64 size; /* Size of file. */ bits64 offset; /* Current offset in file. */ char *cacheDir; /* Directory for cached file parts. */ char *bitmapFileName; /* Name of bitmap file. */ char *sparseFileName; /* Name of sparse data file. */ int fdSparse; /* File descriptor for sparse data file. */ + struct udcBitmap *bits; /* udcBitMap */ bits64 startData; /* Start of area in file we know to have data. */ bits64 endData; /* End of area in file we know to have data. */ bits32 bitmapVersion; /* Version of associated bitmap we were opened with. */ struct connInfo connInfo; /* Connection info for open net connection. */ }; struct udcBitmap /* The control structure including the bitmap of blocks that are cached. */ { struct udcBitmap *next; /* Next in list. */ bits32 blockSize; /* Number of bytes per block of file. */ bits64 remoteUpdate; /* Remote last update time. */ bits64 fileSize; /* File size */ bits32 version; /* Version - increments each time cache is stale. */ bits64 localUpdate; /* Time we last fetched new data into cache. */ @@ -589,42 +590,47 @@ prot->fetchInfo = udcInfoViaTransparent; } else { errAbort("Unrecognized protocol %s in udcProtNew", upToColon); } return prot; } static void udcProtocolFree(struct udcProtocol **pProt) /* Free up protocol resources. */ { freez(pProt); } -static void setInitialCachedDataBounds(struct udcFile *file) +static void setInitialCachedDataBounds(struct udcFile *file, boolean useCacheInfo) /* Open up bitmap file and read a little bit of it to see if cache is stale, * and if not to see if the initial part is cached. Sets the data members * startData, and endData. If the case is stale it makes fresh empty * cacheDir/sparseData and cacheDir/bitmap files. */ { bits32 version = 0; /* Get existing bitmap, and if it's stale clean up. */ struct udcBitmap *bits = udcBitmapOpen(file->bitmapFileName); if (bits != NULL) { + if (useCacheInfo) + { + file->size = bits->fileSize; + file->updateTime = bits->remoteUpdate; + } version = bits->version; if (bits->remoteUpdate != file->updateTime || bits->fileSize != file->size || !fileExists(file->sparseFileName)) { verbose(2, "removing stale version (%lld! = %lld or %lld! = %lld or %s doesn't exist), " "new version %d\n", bits->remoteUpdate, (long long)file->updateTime, bits->fileSize, file->size, file->sparseFileName, version); udcBitmapClose(&bits); remove(file->bitmapFileName); remove(file->sparseFileName); ++version; } } else @@ -633,39 +639,42 @@ /* If no bitmap, then create one, and also an empty sparse data file. */ if (bits == NULL) { udcNewCreateBitmapAndSparse(file, file->updateTime, file->size, version); bits = udcBitmapOpen(file->bitmapFileName); if (bits == NULL) errAbort("Unable to open bitmap file %s", file->bitmapFileName); } file->bitmapVersion = bits->version; /* Read in a little bit from bitmap while we have it open to see if we have anything cached. */ if (file->size > 0) { Bits b; + off_t wasAt = lseek(bits->fd, 0, SEEK_CUR); mustReadOneFd(bits->fd, b); int endBlock = (file->size + udcBlockSize - 1)/udcBlockSize; if (endBlock > 8) endBlock = 8; int initialCachedBlocks = bitFindClear(&b, 0, endBlock); file->endData = initialCachedBlocks * udcBlockSize; + mustLseek(bits->fd, wasAt, SEEK_SET); } -udcBitmapClose(&bits); +file->bits = bits; + } static boolean qEscaped(char c) /* Returns TRUE if character needs to be escaped in q-encoding. */ { if (isalnum(c)) return c == 'Q'; else return c != '_' && c != '-' && c != '/' && c != '.'; } static char *qEncode(char *input) /* Do a simple encoding to convert input string into "normal" characters. * Abnormal letters, and '!' get converted into Q followed by two hexadecimal digits. */ { @@ -827,51 +836,49 @@ file->protocol = protocol; file->prot = prot; if (isTransparent) { /* If transparent dummy up things so that the "sparse" file pointer is actually * the file itself, which appears to be completely loaded in cache. */ int fd = file->fdSparse = mustOpenFd(url, O_RDONLY); struct stat status; fstat(fd, &status); file->startData = 0; file->endData = file->size = status.st_size; } else { udcPathAndFileNames(file, cacheDir, protocol, afterProtocol); - if (useCacheInfo) - { - file->size = udcSizeAndModTimeFromBitmap(file->bitmapFileName, &(file->updateTime)); - } - else + if (!useCacheInfo) { file->updateTime = info.updateTime; file->size = info.size; memcpy(&(file->connInfo), &(info.ci), sizeof(struct connInfo)); // update cache file mod times, so if we're caching we won't do this again // until the timeout has expired again: if (udcCacheTimeout() > 0 && fileExists(file->bitmapFileName)) (void)maybeTouchFile(file->bitmapFileName); } /* Make directory. */ makeDirsOnPath(file->cacheDir); - /* Figure out a little bit about the extent of the good cached data if any. */ - setInitialCachedDataBounds(file); + /* Figure out a little bit about the extent of the good cached data if any. Open bits bitmap. */ + setInitialCachedDataBounds(file, useCacheInfo); + file->fdSparse = mustOpenFd(file->sparseFileName, O_RDWR); + } freeMem(afterProtocol); return file; } struct udcFile *udcFileOpen(char *url, char *cacheDir) /* Open up a cached file. cacheDir may be null in which case udcDefaultDir() will be * used. Abort if if file doesn't exist. */ { struct udcFile *udcFile = udcFileMayOpen(url, cacheDir); if (udcFile == NULL) errAbort("Couldn't open %s", url); return udcFile; } @@ -904,30 +911,31 @@ { struct udcFile *file = *pFile; if (file != NULL) { if (file->connInfo.socket != 0) mustCloseFd(&(file->connInfo.socket)); if (file->connInfo.ctrlSocket != 0) mustCloseFd(&(file->connInfo.ctrlSocket)); freeMem(file->url); freeMem(file->protocol); udcProtocolFree(&file->prot); freeMem(file->cacheDir); freeMem(file->bitmapFileName); freeMem(file->sparseFileName); mustCloseFd(&(file->fdSparse)); + udcBitmapClose(&file->bits); } freez(pFile); } static void qDecode(const char *input, char *buf, size_t size) /* Reverse the qEncode performed on afterProcotol above into buf or abort. */ { safecpy(buf, size, input); char c, *r = buf, *w = buf; while ((c = *r++) != '\0') { if (c == 'Q') { int q; if (sscanf(r, "%02X", &q)) @@ -1015,44 +1023,37 @@ static void readBitsIntoBuf(int fd, int headerSize, int bitStart, int bitEnd, Bits **retBits, int *retPartOffset) /* Do some bit-to-byte offset conversions and read in all the bytes that * have information in the bits we're interested in. */ { int byteStart = bitStart/8; int byteEnd = bitToByteSize(bitEnd); int byteSize = byteEnd - byteStart; Bits *bits = needLargeMem(byteSize); mustLseek(fd, headerSize + byteStart, SEEK_SET); mustReadFd(fd, bits, byteSize); *retBits = bits; *retPartOffset = byteStart*8; } -static boolean allBitsSetInFile(int fd, int headerSize, int bitStart, int bitEnd) +static boolean allBitsSetInFile(int bitStart, int bitEnd, int partOffset, Bits *bits) /* Return TRUE if all bits in file between start and end are set. */ { -int partOffset; -Bits *bits; - -readBitsIntoBuf(fd, headerSize, bitStart, bitEnd, &bits, &partOffset); - int partBitStart = bitStart - partOffset; int partBitEnd = bitEnd - partOffset; int nextClearBit = bitFindClear(bits, partBitStart, partBitEnd); boolean allSet = (nextClearBit >= partBitEnd); - -freeMem(bits); return allSet; } // For tests/udcTest.c debugging: not declared in udc.h, but not static either: boolean udcCheckCacheBits(struct udcFile *file, int startBlock, int endBlock) /* Warn and return TRUE if any bit in (startBlock,endBlock] is not set. */ { boolean gotUnset = FALSE; struct udcBitmap *bitmap = udcBitmapOpen(file->bitmapFileName); int partOffset; Bits *bits; readBitsIntoBuf(bitmap->fd, udcBitmapHeaderSize, startBlock, endBlock, &bits, &partOffset); int partBitStart = startBlock - partOffset; int partBitEnd = endBlock - partOffset; @@ -1070,50 +1071,56 @@ return gotUnset; } static void fetchMissingBlocks(struct udcFile *file, struct udcBitmap *bits, int startBlock, int blockCount, int blockSize) /* Fetch missing blocks from remote and put them into file. errAbort if trouble. */ { bits64 startPos = (bits64)startBlock * blockSize; bits64 endPos = startPos + (bits64)blockCount * blockSize; if (endPos > file->size) endPos = file->size; if (endPos > startPos) { bits64 readSize = endPos - startPos; void *buf = needLargeMem(readSize); + int actualSize = file->prot->fetchData(file->url, startPos, readSize, buf, &(file->connInfo)); if (actualSize != readSize) errAbort("unable to fetch %lld bytes from %s @%lld (got %d bytes)", readSize, file->url, startPos, actualSize); mustLseek(file->fdSparse, startPos, SEEK_SET); mustWriteFd(file->fdSparse, buf, readSize); freez(&buf); } } -static void fetchMissingBits(struct udcFile *file, struct udcBitmap *bits, +static boolean fetchMissingBits(struct udcFile *file, struct udcBitmap *bits, bits64 start, bits64 end, bits64 *retFetchedStart, bits64 *retFetchedEnd) /* Scan through relevant parts of bitmap, fetching blocks we don't already have. */ { /* Fetch relevant part of bitmap into memory */ int partOffset; Bits *b; int startBlock = start / bits->blockSize; int endBlock = (end + bits->blockSize - 1) / bits->blockSize; readBitsIntoBuf(bits->fd, udcBitmapHeaderSize, startBlock, endBlock, &b, &partOffset); +if (allBitsSetInFile(startBlock, endBlock, partOffset, b)) + { // it is already in the cache + freeMem(b); + return TRUE; + } /* Loop around first skipping set bits, then fetching clear bits. */ boolean dirty = FALSE; int s = startBlock - partOffset; int e = endBlock - partOffset; for (;;) { int nextClearBit = bitFindClear(b, s, e); if (nextClearBit >= e) break; int nextSetBit = bitFindSet(b, nextClearBit, e); int clearSize = nextSetBit - nextClearBit; fetchMissingBlocks(file, bits, nextClearBit + partOffset, clearSize, bits->blockSize); bitSetRange(b, nextClearBit, clearSize); @@ -1125,125 +1132,126 @@ } if (dirty) { /* Update bitmap on disk.... */ int byteStart = startBlock/8; int byteEnd = bitToByteSize(endBlock); int byteSize = byteEnd - byteStart; mustLseek(bits->fd, byteStart + udcBitmapHeaderSize, SEEK_SET); mustWriteFd(bits->fd, b, byteSize); } freeMem(b); *retFetchedStart = startBlock * bits->blockSize; *retFetchedEnd = endBlock * bits->blockSize; +return FALSE; } -static boolean udcCacheContains(struct udcBitmap *bits, bits64 offset, int size) -/* Return TRUE if cache already contains region. */ -{ -bits64 endOffset = offset + size; -int startBlock = offset / bits->blockSize; -int endBlock = (endOffset + bits->blockSize - 1) / bits->blockSize; -return allBitsSetInFile(bits->fd, udcBitmapHeaderSize, startBlock, endBlock); +static boolean rangeIntersectOrTouch64(bits64 start1, bits64 end1, bits64 start2, bits64 end2) +/* Return true if two 64-bit ranges intersect or touch. */ +{ // cannot use the version of this function that is in common.c since it only handles integers. +bits64 s = max(start1,start2); +bits64 e = min(end1,end2); +return e >= s; } static void udcFetchMissing(struct udcFile *file, struct udcBitmap *bits, bits64 start, bits64 end) /* Fetch missing pieces of data from file */ { /* Call lower level routine fetch remote data that is not already here. */ bits64 fetchedStart, fetchedEnd; -fetchMissingBits(file, bits, start, end, &fetchedStart, &fetchedEnd); +if (fetchMissingBits(file, bits, start, end, &fetchedStart, &fetchedEnd)) + return; /* Update file startData/endData members to include new data (and old as well if * the new data overlaps the old). */ -if (rangeIntersection(file->startData, file->endData, fetchedStart, fetchedEnd) >= 0) +if (rangeIntersectOrTouch64(file->startData, file->endData, fetchedStart, fetchedEnd)) { if (fetchedStart > file->startData) fetchedStart = file->startData; if (fetchedEnd < file->endData) fetchedEnd = file->endData; } file->startData = fetchedStart; file->endData = fetchedEnd; } static boolean udcCachePreload(struct udcFile *file, bits64 offset, int size) /* Make sure that given data is in cache - fetching it remotely if need be. * Return TRUE on success. */ { boolean ok = TRUE; /* We'll break this operation into blocks of a reasonable size to allow * other processes to get cache access, since we have to lock the cache files. */ bits64 s,e, endPos=offset+size; for (s = offset; s < endPos; s = e) { /* Figure out bounds of this section. */ e = s + udcMaxBytesPerRemoteFetch; if (e > endPos) e = endPos; - struct udcBitmap *bits = udcBitmapOpen(file->bitmapFileName); + struct udcBitmap *bits = file->bits; if (bits->version == file->bitmapVersion) { - if (!udcCacheContains(bits, s, e-s)) udcFetchMissing(file, bits, s, e); } else { ok = FALSE; verbose(2, "udcCachePreload version check failed %d vs %d", bits->version, file->bitmapVersion); } - udcBitmapClose(&bits); if (!ok) break; } return ok; } int udcRead(struct udcFile *file, void *buf, int size) /* Read a block from file. Return amount actually read. */ { /* Figure out region of file we're going to read, and clip it against file size. */ bits64 start = file->offset; if (start > file->size) return 0; bits64 end = start + size; if (end > file->size) end = file->size; size = end - start; + /* If we're outside of the window of file we already know is good, then have to * consult cache on disk, and maybe even fetch data remotely! */ if (start < file->startData || end > file->endData) { + if (!udcCachePreload(file, start, size)) { verbose(2, "udcCachePreload failed"); return 0; } - // Even with this, the changes we wrote to fdSparse may not be visible when we read! - mustCloseFd(&(file->fdSparse)); - file->fdSparse = mustOpenFd(file->sparseFileName, O_RDWR); + /* Currently only need fseek here. Would be safer, but possibly * slower to move fseek so it is always executed in front of read, in * case other code is moving around file pointer. */ + mustLseek(file->fdSparse, start, SEEK_SET); } + mustReadFd(file->fdSparse, buf, size); file->offset += size; return size; } void udcMustRead(struct udcFile *file, void *buf, int size) /* Read a block from file. Abort if any problem, including EOF before size is read. */ { int sizeRead = udcRead(file, buf, size); if (sizeRead < size) errAbort("udc couldn't read %d bytes from %s, did read %d", size, file->url, sizeRead); } int udcGetChar(struct udcFile *file) /* Get next character from file or die trying. */