d212664046ced8d785cd7ef2657d389d5e524bc0 braney Mon Nov 16 12:31:08 2015 -0800 instrument the UDC layer #16387 diff --git src/lib/udc.c src/lib/udc.c index b0d078f..1966d7d 100644 --- src/lib/udc.c +++ src/lib/udc.c @@ -26,171 +26,242 @@ #include <sys/file.h> #include "common.h" #include "hash.h" #include "obscure.h" #include "bits.h" #include "linefile.h" #include "portable.h" #include "sig.h" #include "net.h" #include "cheapcgi.h" #include "udc.h" #include "hex.h" #include <dirent.h> #include <openssl/sha.h> +/* The stdio stream we'll use to output statistics on file i/o. Off by default. */ FILE *udcLogStream = NULL; +void udcSetLog(FILE *fp) +/* Turn on logging of file i/o. + * For each UDC file two lines are written. One line for the open, and one line for the close. + * The Open line just has the URL being opened. + * The Close line has the the URL plus a bunch of counts of the number of seeks, reads, and writes + * for the following four files: the udc bitmap, the udc sparse data, the incoming calls + * to the UDC layer, and the network connection to the (possibly) remote file. + * There are two additional counts: the number of socket connects, and the + * number of times a socket is reused instead of closed and reopened. + */ +{ +udcLogStream = fp; +fprintf(fp, "Begin\n"); +} + +struct ioStats +/* Statistics concerning reads and seeks. */ + { + bits64 numSeeks; /* The number of seeks on this file */ + bits64 numReads; /* The number of reads from this file */ + bits64 bytesRead; /* The number of bytes read from this file */ + bits64 numWrites; /* The number of writes to this file */ + bits64 bytesWritten; /* The number of bytes written to this file */ + }; + +struct ios +/* Statistics concerning reads and seeks for sparse, bitmap, url, and to us. */ + { + struct ioStats bit; /* Statistics on file i/o to the bitmap file. */ + struct ioStats sparse; /* Statistics on file i/o to the sparse data file. */ + struct ioStats udc; /* Statistics on file i/o from the application to us. */ + struct ioStats net; /* Statistics on file i/o over the network. */ + bits64 numConnects; /* The number of socket connections made. */ + bits64 numReuse; /* The number of socket reuses. */ + }; + #define udcBlockSize (8*1024) /* All fetch requests are rounded up to block size. */ #define udcMaxBytesPerRemoteFetch (udcBlockSize * 32) /* Very large remote reads are broken down into chunks this size. */ struct connInfo /* Socket descriptor and associated info, for keeping net connections open. */ { int socket; /* Socket descriptor for data connection (or 0). */ bits64 offset; /* Current file offset of socket. */ int ctrlSocket; /* (FTP only) Control socket descriptor or 0. */ char *redirUrl; /* (HTTP(S) only) use redirected url */ }; typedef int (*UdcDataCallback)(char *url, bits64 offset, int size, void *buffer, - struct connInfo *ci); + struct udcFile *file); /* Type for callback function that fetches file data. */ struct udcRemoteFileInfo /* Information about a remote file. */ { bits64 updateTime; /* Last update in seconds since 1970 */ bits64 size; /* Remote file size */ struct connInfo ci; /* Connection info for open net connection */ }; typedef boolean (*UdcInfoCallback)(char *url, struct udcRemoteFileInfo *retInfo); /* Type for callback function that fetches file timestamp and size. */ struct udcProtocol /* Something to handle a communications protocol like http, https, ftp, local file i/o, etc. */ { struct udcProtocol *next; /* Next in list */ UdcDataCallback fetchData; /* Data fetcher */ UdcInfoCallback fetchInfo; /* Timestamp & size fetcher */ - }; - -struct ioStats -/* Statistics concerning reads and seeks. */ - { - bits64 numSeeks; - bits64 numReads; - bits64 bytesRead; + char *type; }; struct udcFile /* A file handle for our caching system. */ { struct udcFile *next; /* Next in list. */ char *url; /* Name of file - includes protocol */ char *protocol; /* The URL up to the first colon. http: etc. */ struct udcProtocol *prot; /* Protocol specific data and methods. */ time_t updateTime; /* Last modified timestamp. */ bits64 size; /* Size of file. */ bits64 offset; /* Current offset in file. */ char *cacheDir; /* Directory for cached file parts. */ char *bitmapFileName; /* Name of bitmap file. */ char *sparseFileName; /* Name of sparse data file. */ char *redirFileName; /* Name of redir file. */ int fdSparse; /* File descriptor for sparse data file. */ boolean sparseReadAhead; /* Read-ahead has something in the buffer */ char *sparseReadAheadBuf; /* Read-ahead buffer, if any */ bits64 sparseRAOffset; /* Read-ahead buffer offset */ struct udcBitmap *bits; /* udcBitMap */ bits64 startData; /* Start of area in file we know to have data. */ bits64 endData; /* End of area in file we know to have data. */ bits32 bitmapVersion; /* Version of associated bitmap we were opened with. */ struct connInfo connInfo; /* Connection info for open net connection. */ - struct ioStats ioStats; /* Statistics on file access. */ + struct ios ios; /* Statistics on file access. */ }; struct udcBitmap /* The control structure including the bitmap of blocks that are cached. */ { struct udcBitmap *next; /* Next in list. */ bits32 blockSize; /* Number of bytes per block of file. */ bits64 remoteUpdate; /* Remote last update time. */ bits64 fileSize; /* File size */ bits32 version; /* Version - increments each time cache is stale. */ bits64 localUpdate; /* Time we last fetched new data into cache. */ bits64 localAccess; /* Time we last accessed data. */ boolean isSwapped; /* If true need to swap all bytes on read. */ int fd; /* File descriptor for file with current block. */ }; static char *bitmapName = "bitmap"; static char *sparseDataName = "sparseData"; static char *redirName = "redir"; #define udcBitmapHeaderSize (64) static int cacheTimeout = 0; #define MAX_SKIP_TO_SAVE_RECONNECT (udcMaxBytesPerRemoteFetch / 2) -static void readAndIgnore(int sd, bits64 size) +static off_t ourMustLseek(struct ioStats *ioStats, int fd, off_t offset, int whence) +{ +ioStats->numSeeks++; +return mustLseek(fd, offset, whence); +} + + +static void ourMustWrite(struct ioStats *ioStats, int fd, void *buf, size_t size) +{ +ioStats->numWrites++; +ioStats->bytesWritten += size; +mustWriteFd(fd, buf, size); +} + +static size_t ourRead(struct ioStats *ioStats, int fd, void *buf, size_t size) +{ +ioStats->numReads++; +size_t bytesRead = read(fd, buf, size); +ioStats->bytesRead += bytesRead; + +return bytesRead; +} + +static void ourMustRead(struct ioStats *ioStats, int fd, void *buf, size_t size) +{ +ioStats->numReads++; +ioStats->bytesRead += size; +mustReadFd(fd, buf, size); +} + +static size_t ourFread(struct ioStats *ioStats, void *buf, size_t size, size_t nmemb, FILE *stream) +{ +ioStats->numReads++; +ioStats->bytesRead += size * nmemb; +return fread(buf, size, nmemb, stream); +} + + +static void readAndIgnore(struct ioStats *ioStats, int sd, bits64 size) /* Read size bytes from sd and return. */ { static char *buf = NULL; if (buf == NULL) buf = needMem(udcBlockSize); bits64 remaining = size, total = 0; while (remaining > 0) { bits64 chunkSize = min(remaining, udcBlockSize); - ssize_t rd = read(sd, buf, chunkSize); + ssize_t rd = ourRead(ioStats, sd, buf, chunkSize); if (rd < 0) errnoAbort("readAndIgnore: error reading socket after %lld bytes", total); remaining -= rd; total += rd; } if (total < size) errAbort("readAndIgnore: got EOF at %lld bytes (wanted %lld)", total, size); } -static int connInfoGetSocket(struct connInfo *ci, char *url, bits64 offset, int size) +static int connInfoGetSocket(struct udcFile *file, char *url, bits64 offset, int size) /* If ci has an open socket and the given offset matches ci's current offset, * reuse ci->socket. Otherwise close the socket, open a new one, and update ci, * or return -1 if there is an error opening a new one. */ { +struct connInfo *ci = &file->connInfo; if (ci != NULL && ci->socket > 0 && ci->offset != offset) { bits64 skipSize = (offset - ci->offset); if (skipSize > 0 && skipSize <= MAX_SKIP_TO_SAVE_RECONNECT) { verbose(4, "!! skipping %lld bytes @%lld to avoid reconnect\n", skipSize, ci->offset); - readAndIgnore(ci->socket, skipSize); + readAndIgnore(&file->ios.net, ci->socket, skipSize); ci->offset = offset; + file->ios.numReuse++; } else { verbose(4, "Offset mismatch (ci %lld != new %lld), reopening.\n", ci->offset, offset); mustCloseFd(&(ci->socket)); if (ci->ctrlSocket > 0) mustCloseFd(&(ci->ctrlSocket)); ZeroVar(ci); } } int sd; if (ci == NULL || ci->socket <= 0) { + file->ios.numConnects++; if (ci->redirUrl) { url = transferParamsToRedirectedUrl(url, ci->redirUrl); } char rangeUrl[2048]; if (ci == NULL) { safef(rangeUrl, sizeof(rangeUrl), "%s;byterange=%lld-%lld", url, offset, (offset + size - 1)); sd = netUrlOpen(rangeUrl); } else { safef(rangeUrl, sizeof(rangeUrl), "%s;byterange=%lld-", url, offset); sd = ci->socket = netUrlOpenSockets(rangeUrl, &(ci->ctrlSocket)); @@ -223,107 +294,107 @@ static char *assertLocalUrl(char *url) /* Make sure that url is local and return bits past the protocol. */ { if (startsWith("local:", url)) url += 6; if (url[0] != '/') errAbort("Local urls must start at /"); if (stringIn("..", url) || stringIn("~", url) || stringIn("//", url) || stringIn("/./", url) || endsWith("/.", url)) { errAbort("relative paths not allowed in local urls (%s)", url); } return url; } -static int udcDataViaLocal(char *url, bits64 offset, int size, void *buffer, struct connInfo *ci) +static int udcDataViaLocal(char *url, bits64 offset, int size, void *buffer, struct udcFile *file) /* Fetch a block of data of given size into buffer using the http: protocol. * Returns number of bytes actually read. Does an errAbort on * error. Typically will be called with size in the 8k - 64k range. */ { /* Need to check time stamp here. */ verbose(4, "reading remote data - %d bytes at %lld - on %s\n", size, offset, url); url = assertLocalUrl(url); FILE *f = mustOpen(url, "rb"); fseek(f, offset, SEEK_SET); -int sizeRead = fread(buffer, 1, size, f); +int sizeRead = ourFread(&file->ios.net, buffer, 1, size, f); if (ferror(f)) { warn("udcDataViaLocal failed to fetch %d bytes at %lld", size, offset); errnoAbort("file %s", url); } carefulClose(&f); return sizeRead; } static boolean udcInfoViaLocal(char *url, struct udcRemoteFileInfo *retInfo) /* Fill in *retTime with last modified time for file specified in url. * Return FALSE if file does not even exist. */ { verbose(4, "checking remote info on %s\n", url); url = assertLocalUrl(url); struct stat status; int ret = stat(url, &status); if (ret < 0) return FALSE; retInfo->updateTime = status.st_mtime; retInfo->size = status.st_size; return TRUE; } /********* Section for transparent file protocol **********/ static int udcDataViaTransparent(char *url, bits64 offset, int size, void *buffer, - struct connInfo *ci) + struct udcFile *file) /* Fetch a block of data of given size into buffer using the http: protocol. * Returns number of bytes actually read. Does an errAbort on * error. Typically will be called with size in the 8k - 64k range. */ { internalErr(); /* Should not get here. */ return size; } static boolean udcInfoViaTransparent(char *url, struct udcRemoteFileInfo *retInfo) /* Fill in *retInfo with last modified time for file specified in url. * Return FALSE if file does not even exist. */ { internalErr(); /* Should not get here. */ return FALSE; } /********* Section for slow local file protocol - simulates network... **********/ -static int udcDataViaSlow(char *url, bits64 offset, int size, void *buffer, struct connInfo *ci) +static int udcDataViaSlow(char *url, bits64 offset, int size, void *buffer, struct udcFile *file) /* Fetch a block of data of given size into buffer using the http: protocol. * Returns number of bytes actually read. Does an errAbort on * error. Typically will be called with size in the 8k - 64k range. */ { verbose(4, "slow reading remote data - %d bytes at %lld - on %s\n", size, offset, url); sleep1000(500); char *fileName = url + 5; /* skip over 'slow:' */ FILE *f = mustOpen(fileName, "rb"); fseek(f, offset, SEEK_SET); char *pt = buffer; int i, step=1024; int sizeRead = 0; for (i=0; i<size; i += step) { sleep1000(250); int readChunk = size - i; if (readChunk > step) readChunk = step; - int oneReadSize = fread(pt, 1, readChunk, f); + int oneReadSize = ourFread(&file->ios.net, pt, 1, readChunk, f); verbose(4, "slowly read %d bytes\n", oneReadSize); if (ferror(f)) { warn("udcDataViaSlow failed to fetch %d bytes at %lld", size, offset); errnoAbort("file %s", fileName); } pt += step; sizeRead += oneReadSize; } carefulClose(&f); return sizeRead; } static boolean udcInfoViaSlow(char *url, struct udcRemoteFileInfo *retInfo) /* Fill in *retTime with last modified time for file specified in url. @@ -357,54 +428,55 @@ defaultDir = cloneString(path); } void udcDisableCache() /* Switch off caching. Re-enable with udcSetDefaultDir */ { defaultDir = NULL; } static bool udcCacheEnabled() /* TRUE if caching is activated */ { return (defaultDir != NULL); } -int udcDataViaHttpOrFtp(char *url, bits64 offset, int size, void *buffer, struct connInfo *ci) +int udcDataViaHttpOrFtp( char *url, bits64 offset, int size, void *buffer, struct udcFile *file) /* Fetch a block of data of given size into buffer using url's protocol, * which must be http, https or ftp. Returns number of bytes actually read. * Does an errAbort on error. * Typically will be called with size in the 8k-64k range. */ { if (startsWith("http://",url) || startsWith("https://",url) || startsWith("ftp://",url)) verbose(4, "reading http/https/ftp data - %d bytes at %lld - on %s\n", size, offset, url); else errAbort("Invalid protocol in url [%s] in udcDataViaFtp, only http, https, or ftp supported", url); -int sd = connInfoGetSocket(ci, url, offset, size); +int sd = connInfoGetSocket(file, url, offset, size); if (sd < 0) errAbort("Can't get data socket for %s", url); int rd = 0, total = 0, remaining = size; char *buf = (char *)buffer; -while ((remaining > 0) && ((rd = read(sd, buf, remaining)) > 0)) +while ((remaining > 0) && ((rd = ourRead(&file->ios.net, sd, buf, remaining)) > 0)) { total += rd; buf += rd; remaining -= rd; } if (rd == -1) errnoAbort("udcDataViaHttpOrFtp: error reading socket"); +struct connInfo *ci = &file->connInfo; if (ci == NULL) mustCloseFd(&sd); else ci->offset += total; return total; } boolean udcInfoViaHttp(char *url, struct udcRemoteFileInfo *retInfo) /* Gets size and last modified time of URL * and returns status of HEAD GET. */ { verbose(4, "checking http remote info on %s\n", url); int redirectCount = 0; struct hash *hash; int status; @@ -550,39 +622,39 @@ bits32 reserved32 = 0; int blockCount = (remoteSize + udcBlockSize - 1)/udcBlockSize; int bitmapSize = bitToByteSize(blockCount); /* Write out fixed part of header. */ writeOneFd(fd, sig); writeOneFd(fd, blockSize); writeOneFd(fd, remoteUpdate); writeOneFd(fd, remoteSize); writeOneFd(fd, version); writeOneFd(fd, reserved32); writeOneFd(fd, reserved64); writeOneFd(fd, reserved64); writeOneFd(fd, reserved64); writeOneFd(fd, reserved64); -long long offset = mustLseek(fd, 0, SEEK_CUR); +long long offset = ourMustLseek(&file->ios.bit, fd, 0, SEEK_CUR); if (offset != udcBitmapHeaderSize) errAbort("offset in fd=%d, f=%s is %lld, not expected udcBitmapHeaderSize %d", fd, file->bitmapFileName, offset, udcBitmapHeaderSize); /* Write out initial all-zero bitmap, using sparse-file method: write 0 to final address. */ unsigned char zero = 0; -mustLseek(fd, bitmapSize-1, SEEK_CUR); -mustWriteFd(fd, &zero, 1); +ourMustLseek(&file->ios.bit, fd, bitmapSize-1, SEEK_CUR); +ourMustWrite(&file->ios.bit, fd, &zero, 1); /* Clean up bitmap file and name. */ mustCloseFd(&fd); /* Create an empty data file. */ fd = mustOpenFd(file->sparseFileName, O_WRONLY | O_CREAT | O_TRUNC); mustCloseFd(&fd); } static struct udcBitmap *udcBitmapOpen(char *fileName) /* Open up a bitmap file and read and verify header. Return NULL if file doesn't * exist, abort on error. */ { /* Open file, returning NULL if can't. */ int fd = open(fileName, O_RDWR); @@ -645,50 +717,55 @@ { mustCloseFd(&(bits->fd)); freez(pBits); } } static struct udcProtocol *udcProtocolNew(char *upToColon) /* Build up a new protocol around a string such as "http" or "local" */ { struct udcProtocol *prot; AllocVar(prot); if (sameString(upToColon, "local")) { prot->fetchData = udcDataViaLocal; prot->fetchInfo = udcInfoViaLocal; + prot->type = "local"; } else if (sameString(upToColon, "slow")) { prot->fetchData = udcDataViaSlow; prot->fetchInfo = udcInfoViaSlow; + prot->type = "slow"; } else if (sameString(upToColon, "http") || sameString(upToColon, "https")) { prot->fetchData = udcDataViaHttpOrFtp; prot->fetchInfo = udcInfoViaHttp; + prot->type = "http"; } else if (sameString(upToColon, "ftp")) { prot->fetchData = udcDataViaHttpOrFtp; prot->fetchInfo = udcInfoViaFtp; + prot->type = "ftp"; } else if (sameString(upToColon, "transparent")) { prot->fetchData = udcDataViaTransparent; prot->fetchInfo = udcInfoViaTransparent; + prot->type = "transparent"; } else { errAbort("Unrecognized protocol %s in udcProtNew", upToColon); } return prot; } static void udcProtocolFree(struct udcProtocol **pProt) /* Free up protocol resources. */ { freez(pProt); } static void setInitialCachedDataBounds(struct udcFile *file, boolean useCacheInfo) @@ -737,31 +814,31 @@ } file->bitmapVersion = bits->version; /* Read in a little bit from bitmap while we have it open to see if we have anything cached. */ if (file->size > 0) { Bits b; off_t wasAt = lseek(bits->fd, 0, SEEK_CUR); mustReadOneFd(bits->fd, b); int endBlock = (file->size + udcBlockSize - 1)/udcBlockSize; if (endBlock > 8) endBlock = 8; int initialCachedBlocks = bitFindClear(&b, 0, endBlock); file->endData = initialCachedBlocks * udcBlockSize; - mustLseek(bits->fd, wasAt, SEEK_SET); + ourMustLseek(&file->ios.bit, bits->fd, wasAt, SEEK_SET); } file->bits = bits; } static boolean qEscaped(char c) /* Returns TRUE if character needs to be escaped in q-encoding. */ { if (isalnum(c)) return c == 'Q'; else return c != '_' && c != '-' && c != '/' && c != '.'; } @@ -1091,32 +1168,36 @@ freeMem(file->sparseFileName); freeMem(file); freeMem(protocol); freeMem(afterProtocol); return list; } void udcFileClose(struct udcFile **pFile) /* Close down cached file. */ { struct udcFile *file = *pFile; if (file != NULL) { if (udcLogStream) { - fprintf(udcLogStream, "Close %s %lld %lld %lld\n", file->url, - file->ioStats.numSeeks, file->ioStats.numReads, file->ioStats.bytesRead); + fprintf(udcLogStream, "Close %s %s %lld %lld bit %lld %lld %lld %lld %lld sparse %lld %lld %lld %lld %lld udc %lld %lld %lld %lld %lld net %lld %lld %lld %lld %lld \n", + file->url, file->prot->type, file->ios.numConnects, file->ios.numReuse, + file->ios.bit.numSeeks, file->ios.bit.numReads, file->ios.bit.bytesRead, file->ios.bit.numWrites, file->ios.bit.bytesWritten, + file->ios.sparse.numSeeks, file->ios.sparse.numReads, file->ios.sparse.bytesRead, file->ios.sparse.numWrites, file->ios.sparse.bytesWritten, + file->ios.udc.numSeeks, file->ios.udc.numReads, file->ios.udc.bytesRead, file->ios.udc.numWrites, file->ios.udc.bytesWritten, + file->ios.net.numSeeks, file->ios.net.numReads, file->ios.net.bytesRead, file->ios.net.numWrites, file->ios.net.bytesWritten); } if (file->connInfo.socket != 0) mustCloseFd(&(file->connInfo.socket)); if (file->connInfo.ctrlSocket != 0) mustCloseFd(&(file->connInfo.ctrlSocket)); freeMem(file->url); freeMem(file->protocol); udcProtocolFree(&file->prot); freeMem(file->cacheDir); freeMem(file->bitmapFileName); freeMem(file->sparseFileName); freeMem(file->sparseReadAheadBuf); if (file->fdSparse != 0) mustCloseFd(&(file->fdSparse)); udcBitmapClose(&file->bits); @@ -1204,114 +1285,114 @@ cacheDir = udcDefaultDir(); struct slName *sl, *slList = udcFileCacheFiles(url, cacheDir); if (slList == NULL) return now; for (sl = slList; sl != NULL; sl = sl->next) if (endsWith(sl->name, bitmapName)) { if (fileExists(sl->name)) oldestTime = min(fileModTime(sl->name), oldestTime); else return now; } return (now - oldestTime); } -static void readBitsIntoBuf(int fd, int headerSize, int bitStart, int bitEnd, +static void readBitsIntoBuf(struct udcFile *file, int fd, int headerSize, int bitStart, int bitEnd, Bits **retBits, int *retPartOffset) /* Do some bit-to-byte offset conversions and read in all the bytes that * have information in the bits we're interested in. */ { int byteStart = bitStart/8; int byteEnd = bitToByteSize(bitEnd); int byteSize = byteEnd - byteStart; Bits *bits = needLargeMem(byteSize); -mustLseek(fd, headerSize + byteStart, SEEK_SET); -mustReadFd(fd, bits, byteSize); +ourMustLseek(&file->ios.bit,fd, headerSize + byteStart, SEEK_SET); +ourMustRead(&file->ios.bit, fd, bits, byteSize); *retBits = bits; *retPartOffset = byteStart*8; } static boolean allBitsSetInFile(int bitStart, int bitEnd, int partOffset, Bits *bits) /* Return TRUE if all bits in file between start and end are set. */ { int partBitStart = bitStart - partOffset; int partBitEnd = bitEnd - partOffset; int nextClearBit = bitFindClear(bits, partBitStart, partBitEnd); boolean allSet = (nextClearBit >= partBitEnd); return allSet; } // For tests/udcTest.c debugging: not declared in udc.h, but not static either: boolean udcCheckCacheBits(struct udcFile *file, int startBlock, int endBlock) /* Warn and return TRUE if any bit in (startBlock,endBlock] is not set. */ { boolean gotUnset = FALSE; struct udcBitmap *bitmap = udcBitmapOpen(file->bitmapFileName); int partOffset; Bits *bits; -readBitsIntoBuf(bitmap->fd, udcBitmapHeaderSize, startBlock, endBlock, &bits, &partOffset); +readBitsIntoBuf(file, bitmap->fd, udcBitmapHeaderSize, startBlock, endBlock, &bits, &partOffset); int partBitStart = startBlock - partOffset; int partBitEnd = endBlock - partOffset; int nextClearBit = bitFindClear(bits, partBitStart, partBitEnd); while (nextClearBit < partBitEnd) { int clearBlock = nextClearBit + partOffset; warn("... udcFile 0x%04lx: bit for block %d (%lld..%lld] is not set", (unsigned long)file, clearBlock, ((long long)clearBlock * udcBlockSize), (((long long)clearBlock+1) * udcBlockSize)); gotUnset = TRUE; int nextSetBit = bitFindSet(bits, nextClearBit, partBitEnd); nextClearBit = bitFindClear(bits, nextSetBit, partBitEnd); } return gotUnset; } static void fetchMissingBlocks(struct udcFile *file, struct udcBitmap *bits, int startBlock, int blockCount, int blockSize) /* Fetch missing blocks from remote and put them into file. errAbort if trouble. */ { bits64 startPos = (bits64)startBlock * blockSize; bits64 endPos = startPos + (bits64)blockCount * blockSize; if (endPos > file->size) endPos = file->size; if (endPos > startPos) { bits64 readSize = endPos - startPos; void *buf = needLargeMem(readSize); - int actualSize = file->prot->fetchData(file->url, startPos, readSize, buf, &(file->connInfo)); + int actualSize = file->prot->fetchData(file->url, startPos, readSize, buf, file); if (actualSize != readSize) errAbort("unable to fetch %lld bytes from %s @%lld (got %d bytes)", readSize, file->url, startPos, actualSize); - mustLseek(file->fdSparse, startPos, SEEK_SET); - mustWriteFd(file->fdSparse, buf, readSize); + ourMustLseek(&file->ios.sparse, file->fdSparse, startPos, SEEK_SET); + ourMustWrite(&file->ios.sparse, file->fdSparse, buf, readSize); freez(&buf); } } static boolean fetchMissingBits(struct udcFile *file, struct udcBitmap *bits, bits64 start, bits64 end, bits64 *retFetchedStart, bits64 *retFetchedEnd) /* Scan through relevant parts of bitmap, fetching blocks we don't already have. */ { /* Fetch relevant part of bitmap into memory */ int partOffset; Bits *b; int startBlock = start / bits->blockSize; int endBlock = (end + bits->blockSize - 1) / bits->blockSize; -readBitsIntoBuf(bits->fd, udcBitmapHeaderSize, startBlock, endBlock, &b, &partOffset); +readBitsIntoBuf(file, bits->fd, udcBitmapHeaderSize, startBlock, endBlock, &b, &partOffset); if (allBitsSetInFile(startBlock, endBlock, partOffset, b)) { // it is already in the cache freeMem(b); return TRUE; } /* Loop around first skipping set bits, then fetching clear bits. */ boolean dirty = FALSE; int s = startBlock - partOffset; int e = endBlock - partOffset; for (;;) { int nextClearBit = bitFindClear(b, s, e); if (nextClearBit >= e) break; @@ -1321,32 +1402,32 @@ fetchMissingBlocks(file, bits, nextClearBit + partOffset, clearSize, bits->blockSize); bitSetRange(b, nextClearBit, clearSize); dirty = TRUE; if (nextSetBit >= e) break; s = nextSetBit; } if (dirty) { /* Update bitmap on disk.... */ int byteStart = startBlock/8; int byteEnd = bitToByteSize(endBlock); int byteSize = byteEnd - byteStart; - mustLseek(bits->fd, byteStart + udcBitmapHeaderSize, SEEK_SET); - mustWriteFd(bits->fd, b, byteSize); + ourMustLseek(&file->ios.bit, bits->fd, byteStart + udcBitmapHeaderSize, SEEK_SET); + ourMustWrite(&file->ios.bit, bits->fd, b, byteSize); } freeMem(b); *retFetchedStart = startBlock * bits->blockSize; *retFetchedEnd = endBlock * bits->blockSize; return FALSE; } static boolean rangeIntersectOrTouch64(bits64 start1, bits64 end1, bits64 start2, bits64 end2) /* Return true if two 64-bit ranges intersect or touch. */ { // cannot use the version of this function that is in common.c since it only handles integers. bits64 s = max(start1,start2); bits64 e = min(end1,end2); return e >= s; } @@ -1400,39 +1481,40 @@ { ok = FALSE; verbose(4, "udcCachePreload version check failed %d vs %d", bits->version, file->bitmapVersion); } if (!ok) break; } return ok; } #define READAHEADBUFSIZE 4096 bits64 udcRead(struct udcFile *file, void *buf, bits64 size) /* Read a block from file. Return amount actually read. */ { -file->ioStats.numReads++; +file->ios.udc.numReads++; // if not caching, just fetch the data if (!udcCacheEnabled() && !sameString(file->protocol, "transparent")) { - int actualSize = file->prot->fetchData(file->url, file->offset, size, buf, &(file->connInfo)); + int actualSize = file->prot->fetchData(file->url, file->offset, size, buf, file); file->offset += actualSize; - file->ioStats.bytesRead += actualSize; + file->ios.udc.bytesRead += actualSize; return actualSize; } +file->ios.udc.bytesRead += size; /* Figure out region of file we're going to read, and clip it against file size. */ bits64 start = file->offset; if (start > file->size) return 0; bits64 end = start + size; if (end > file->size) end = file->size; size = end - start; char *cbuf = buf; /* use read-ahead buffer if present */ bits64 bytesRead = 0; bits64 raStart; @@ -1442,36 +1524,35 @@ if (file->sparseReadAhead) { raStart = file->sparseRAOffset; raEnd = raStart+READAHEADBUFSIZE; if (start >= raStart && start < raEnd) { // copy bytes out of rabuf bits64 endInBuf = min(raEnd, end); bits64 sizeInBuf = endInBuf - start; memcpy(cbuf, file->sparseReadAheadBuf + (start-raStart), sizeInBuf); cbuf += sizeInBuf; bytesRead += sizeInBuf; start = raEnd; size -= sizeInBuf; file->offset += sizeInBuf; - file->ioStats.bytesRead += sizeInBuf; if (size == 0) break; } file->sparseReadAhead = FALSE; - mustLseek(file->fdSparse, start, SEEK_SET); + ourMustLseek(&file->ios.sparse,file->fdSparse, start, SEEK_SET); } bits64 saveEnd = end; if (size < READAHEADBUFSIZE) { file->sparseReadAhead = TRUE; if (!file->sparseReadAheadBuf) file->sparseReadAheadBuf = needMem(READAHEADBUFSIZE); file->sparseRAOffset = start; size = READAHEADBUFSIZE; end = start + size; if (end > file->size) { end = file->size; size = end - start; @@ -1483,44 +1564,43 @@ * consult cache on disk, and maybe even fetch data remotely! */ if (start < file->startData || end > file->endData) { if (!udcCachePreload(file, start, size)) { verbose(4, "udcCachePreload failed"); bytesRead = 0; break; } /* Currently only need fseek here. Would be safer, but possibly * slower to move fseek so it is always executed in front of read, in * case other code is moving around file pointer. */ - mustLseek(file->fdSparse, start, SEEK_SET); + ourMustLseek(&file->ios.sparse,file->fdSparse, start, SEEK_SET); } if (file->sparseReadAhead) { - mustReadFd(file->fdSparse, file->sparseReadAheadBuf, size); + ourMustRead(&file->ios.sparse,file->fdSparse, file->sparseReadAheadBuf, size); end = saveEnd; size = end - start; } else { - mustReadFd(file->fdSparse, cbuf, size); + ourMustRead(&file->ios.sparse,file->fdSparse, cbuf, size); file->offset += size; - file->ioStats.bytesRead += size; bytesRead += size; break; } } return bytesRead; } void udcMustRead(struct udcFile *file, void *buf, bits64 size) /* Read a block from file. Abort if any problem, including EOF before size is read. */ { bits64 sizeRead = udcRead(file, buf, size); if (sizeRead < size) errAbort("udc couldn't read %llu bytes from %s, did read %llu", size, file->url, sizeRead); } @@ -1669,43 +1749,43 @@ } struct lineFile *udcWrapShortLineFile(char *url, char *cacheDir, size_t maxSize) /* Read in entire short (up to maxSize) url into memory and wrap a line file around it. * The cacheDir may be null in which case udcDefaultDir() will be used. If maxSize * is zero then a default value (currently 64 meg) will be used. */ { if (maxSize == 0) maxSize = 64 * 1024 * 1024; char *buf = udcFileReadAll(url, cacheDir, maxSize, NULL); return lineFileOnString(url, TRUE, buf); } void udcSeekCur(struct udcFile *file, bits64 offset) /* Seek to a particular position in file. */ { -file->ioStats.numSeeks++; +file->ios.udc.numSeeks++; file->offset += offset; if (udcCacheEnabled()) - mustLseek(file->fdSparse, offset, SEEK_CUR); + ourMustLseek(&file->ios.sparse,file->fdSparse, offset, SEEK_CUR); } void udcSeek(struct udcFile *file, bits64 offset) /* Seek to a particular position in file. */ { -file->ioStats.numSeeks++; +file->ios.udc.numSeeks++; file->offset = offset; if (udcCacheEnabled()) - mustLseek(file->fdSparse, offset, SEEK_SET); + ourMustLseek(&file->ios.sparse,file->fdSparse, offset, SEEK_SET); } bits64 udcTell(struct udcFile *file) /* Return current file position. */ { return file->offset; } static long bitRealDataSize(char *fileName) /* Return number of real bytes indicated by bitmaps */ { struct udcBitmap *bits = udcBitmapOpen(fileName); int blockSize = bits->blockSize; long byteSize = 0; int blockCount = (bits->fileSize + blockSize - 1)/blockSize; @@ -1848,20 +1928,15 @@ /* return true if file is not a http or ftp file, just a local file */ { // copied from above char *protocol = NULL, *afterProtocol = NULL, *colon; udcParseUrl(url, &protocol, &afterProtocol, &colon); freez(&protocol); freez(&afterProtocol); return colon==NULL; } boolean udcExists(char *url) /* return true if a local or remote file exists */ { return udcFileSize(url)!=-1; } - -void udcSetLog(FILE *fp) -{ -udcLogStream = fp; -}