09243c7da5f14dbfa408cf7a54e567080da81a05 markd Wed Oct 17 10:44:15 2018 -0700 Added ability to access UDC file in cache using virtual memory (mmap). The test sure are nice. diff --git src/lib/udc.c src/lib/udc.c index 34fcaef..aa4c2dd 100644 --- src/lib/udc.c +++ src/lib/udc.c @@ -12,30 +12,31 @@ * contains two files - "bitmap" and "sparseData" that contains information on which * parts of the URL are cached and the actual cached data respectively. The subdirectory name * associated with the file is constructed from the URL in a straightforward manner. * http://genome.ucsc.edu/cgi-bin/hgGateway * gets mapped to: * rootCacheDir/http/genome.ucsc.edu/cgi-bin/hgGateway/ * The URL protocol is the first directory under the root, and the remainder of the * URL, with some necessary escaping, is used to define the rest of the cache directory * structure, with each '/' after the protocol line translating into another directory * level. * * The bitmap file contains time stamp and size data as well as an array with one bit * for each block of the file that has been fetched. Currently the block size is 8K. */ #include <sys/file.h> +#include <sys/mman.h> #include "common.h" #include "hash.h" #include "obscure.h" #include "bits.h" #include "linefile.h" #include "portable.h" #include "sig.h" #include "net.h" #include "cheapcgi.h" #include "udc.h" #include "hex.h" #include <dirent.h> #include <openssl/sha.h> /* The stdio stream we'll use to output statistics on file i/o. Off by default. */ @@ -127,30 +128,31 @@ bits64 size; /* Size of file. */ bits64 offset; /* Current offset in file. */ char *cacheDir; /* Directory for cached file parts. */ char *bitmapFileName; /* Name of bitmap file. */ char *sparseFileName; /* Name of sparse data file. */ char *redirFileName; /* Name of redir file. */ int fdSparse; /* File descriptor for sparse data file. */ boolean sparseReadAhead; /* Read-ahead has something in the buffer */ char *sparseReadAheadBuf; /* Read-ahead buffer, if any */ bits64 sparseRAOffset; /* Read-ahead buffer offset */ struct udcBitmap *bits; /* udcBitMap */ bits64 startData; /* Start of area in file we know to have data. */ bits64 endData; /* End of area in file we know to have data. */ bits32 bitmapVersion; /* Version of associated bitmap we were opened with. */ struct connInfo connInfo; /* Connection info for open net connection. */ + void *mmapBase; /* pointer to memory address if file has been mmapped, or NULL */ struct ios ios; /* Statistics on file access. */ }; struct udcBitmap /* The control structure including the bitmap of blocks that are cached. */ { struct udcBitmap *next; /* Next in list. */ bits32 blockSize; /* Number of bytes per block of file. */ bits64 remoteUpdate; /* Remote last update time. */ bits64 fileSize; /* File size */ bits32 version; /* Version - increments each time cache is stale. */ bits64 localUpdate; /* Time we last fetched new data into cache. */ bits64 localAccess; /* Time we last accessed data. */ boolean isSwapped; /* If true need to swap all bytes on read. */ int fd; /* File descriptor for file with current block. */ @@ -1231,30 +1233,35 @@ void udcFileClose(struct udcFile **pFile) /* Close down cached file. */ { struct udcFile *file = *pFile; if (file != NULL) { if (udcLogStream) { fprintf(udcLogStream, "Close %s %s %lld %lld bit %lld %lld %lld %lld %lld sparse %lld %lld %lld %lld %lld udc %lld %lld %lld %lld %lld net %lld %lld %lld %lld %lld \n", file->url, file->prot->type, file->ios.numConnects, file->ios.numReuse, file->ios.bit.numSeeks, file->ios.bit.numReads, file->ios.bit.bytesRead, file->ios.bit.numWrites, file->ios.bit.bytesWritten, file->ios.sparse.numSeeks, file->ios.sparse.numReads, file->ios.sparse.bytesRead, file->ios.sparse.numWrites, file->ios.sparse.bytesWritten, file->ios.udc.numSeeks, file->ios.udc.numReads, file->ios.udc.bytesRead, file->ios.udc.numWrites, file->ios.udc.bytesWritten, file->ios.net.numSeeks, file->ios.net.numReads, file->ios.net.bytesRead, file->ios.net.numWrites, file->ios.net.bytesWritten); } + if (file->mmapBase != NULL) + { + if (munmap(file->mmapBase, file->size) < 0) + errnoAbort("munmap() failed on %s", file->url); + } if (file->connInfo.socket != 0) mustCloseFd(&(file->connInfo.socket)); if (file->connInfo.ctrlSocket != 0) mustCloseFd(&(file->connInfo.ctrlSocket)); freeMem(file->url); freeMem(file->protocol); udcProtocolFree(&file->prot); freeMem(file->cacheDir); freeMem(file->bitmapFileName); freeMem(file->sparseFileName); freeMem(file->sparseReadAheadBuf); if (file->fdSparse != 0) mustCloseFd(&(file->fdSparse)); udcBitmapClose(&file->bits); } @@ -2005,15 +2012,44 @@ /* return true if file is not a http or ftp file, just a local file */ { // copied from above char *protocol = NULL, *afterProtocol = NULL, *colon; udcParseUrl(url, &protocol, &afterProtocol, &colon); freez(&protocol); freez(&afterProtocol); return colon==NULL; } boolean udcExists(char *url) /* return true if a local or remote file exists */ { return udcFileSize(url)!=-1; } + +void udcMMap(struct udcFile *file) +/* Enable access to underlying file as memory using mmap. udcMMapFetch + * must be called to actually access regions of the file. */ +{ +if (file->mmapBase != NULL) + errAbort("File is already mmaped: %s", file->url); +file->mmapBase = mmap(NULL, file->size, PROT_READ, MAP_SHARED, file->fdSparse, 0); +if (file->mmapBase == MAP_FAILED) + errnoAbort("mmap() failed for %s", file->url); +} + +void *udcMMapFetch(struct udcFile *file, bits64 offset, bits64 size) +/* Return pointer to a region of the file in memory, ensuring that regions is + * cached. udcMMap must have been called to enable access. This must be + * called for first access to a range of the file or erroneous (zeros) data + * maybe returned. Maybe called multiple times on a range or overlapping + * returns. */ +{ +if (file->mmapBase == NULL) + errAbort("udcMMap() has not been called for: %s", file->url); +if ((offset + size) > file->size) + errAbort("udcMMapFetch on offset %lld for %lld bytes exceeds length of file %lld on %s", + offset, size, file->size, file->url); +if (udcCacheEnabled() && !sameString(file->protocol, "transparent")) + udcCachePreload(file, offset, size); +return ((char*)file->mmapBase) + offset; +} +