09243c7da5f14dbfa408cf7a54e567080da81a05
markd
  Wed Oct 17 10:44:15 2018 -0700
Added ability to access UDC file in cache using virtual memory (mmap).  The test sure are nice.

diff --git src/lib/udc.c src/lib/udc.c
index 34fcaef..aa4c2dd 100644
--- src/lib/udc.c
+++ src/lib/udc.c
@@ -12,30 +12,31 @@
  * contains two files - "bitmap" and "sparseData" that contains information on which
  * parts of the URL are cached and the actual cached data respectively. The subdirectory name
  * associated with the file is constructed from the URL in a straightforward manner.
  *     http://genome.ucsc.edu/cgi-bin/hgGateway
  * gets mapped to:
  *     rootCacheDir/http/genome.ucsc.edu/cgi-bin/hgGateway/
  * The URL protocol is the first directory under the root, and the remainder of the
  * URL, with some necessary escaping, is used to define the rest of the cache directory
  * structure, with each '/' after the protocol line translating into another directory
  * level.
  *    
  * The bitmap file contains time stamp and size data as well as an array with one bit
  * for each block of the file that has been fetched.  Currently the block size is 8K. */
 
 #include <sys/file.h>
+#include <sys/mman.h>
 #include "common.h"
 #include "hash.h"
 #include "obscure.h"
 #include "bits.h"
 #include "linefile.h"
 #include "portable.h"
 #include "sig.h"
 #include "net.h"
 #include "cheapcgi.h"
 #include "udc.h"
 #include "hex.h"
 #include <dirent.h>
 #include <openssl/sha.h>
 
 /* The stdio stream we'll use to output statistics on file i/o.  Off by default. */
@@ -127,30 +128,31 @@
     bits64 size;		/* Size of file. */
     bits64 offset;		/* Current offset in file. */
     char *cacheDir;		/* Directory for cached file parts. */
     char *bitmapFileName;	/* Name of bitmap file. */
     char *sparseFileName;	/* Name of sparse data file. */
     char *redirFileName;	/* Name of redir file. */
     int fdSparse;		/* File descriptor for sparse data file. */
     boolean sparseReadAhead;    /* Read-ahead has something in the buffer */
     char *sparseReadAheadBuf;   /* Read-ahead buffer, if any */
     bits64 sparseRAOffset;      /* Read-ahead buffer offset */
     struct udcBitmap *bits;     /* udcBitMap */
     bits64 startData;		/* Start of area in file we know to have data. */
     bits64 endData;		/* End of area in file we know to have data. */
     bits32 bitmapVersion;	/* Version of associated bitmap we were opened with. */
     struct connInfo connInfo;   /* Connection info for open net connection. */
+    void *mmapBase;             /* pointer to memory address if file has been mmapped, or NULL */
     struct ios ios;             /* Statistics on file access. */
     };
 
 struct udcBitmap
 /* The control structure including the bitmap of blocks that are cached. */
     {
     struct udcBitmap *next;	/* Next in list. */
     bits32 blockSize;		/* Number of bytes per block of file. */
     bits64 remoteUpdate;	/* Remote last update time. */
     bits64 fileSize;		/* File size */
     bits32 version;		/* Version - increments each time cache is stale. */
     bits64 localUpdate;		/* Time we last fetched new data into cache. */
     bits64 localAccess;		/* Time we last accessed data. */
     boolean isSwapped;		/* If true need to swap all bytes on read. */
     int fd;			/* File descriptor for file with current block. */
@@ -1231,30 +1233,35 @@
 void udcFileClose(struct udcFile **pFile)
 /* Close down cached file. */
 {
 struct udcFile *file = *pFile;
 if (file != NULL)
     {
     if (udcLogStream)
         {
         fprintf(udcLogStream, "Close %s %s %lld %lld bit %lld %lld %lld %lld %lld sparse %lld %lld %lld %lld %lld udc  %lld %lld %lld %lld %lld net %lld %lld %lld %lld %lld \n",
            file->url, file->prot->type, file->ios.numConnects, file->ios.numReuse,
            file->ios.bit.numSeeks, file->ios.bit.numReads, file->ios.bit.bytesRead, file->ios.bit.numWrites,  file->ios.bit.bytesWritten, 
            file->ios.sparse.numSeeks, file->ios.sparse.numReads, file->ios.sparse.bytesRead, file->ios.sparse.numWrites,  file->ios.sparse.bytesWritten, 
            file->ios.udc.numSeeks, file->ios.udc.numReads, file->ios.udc.bytesRead, file->ios.udc.numWrites,  file->ios.udc.bytesWritten, 
            file->ios.net.numSeeks, file->ios.net.numReads, file->ios.net.bytesRead, file->ios.net.numWrites,  file->ios.net.bytesWritten);
         }
+    if (file->mmapBase != NULL)
+        {
+        if (munmap(file->mmapBase, file->size) < 0)
+            errnoAbort("munmap() failed on %s", file->url);
+        }
     if (file->connInfo.socket != 0)
 	mustCloseFd(&(file->connInfo.socket));
     if (file->connInfo.ctrlSocket != 0)
 	mustCloseFd(&(file->connInfo.ctrlSocket));
     freeMem(file->url);
     freeMem(file->protocol);
     udcProtocolFree(&file->prot);
     freeMem(file->cacheDir);
     freeMem(file->bitmapFileName);
     freeMem(file->sparseFileName);
     freeMem(file->sparseReadAheadBuf);
     if (file->fdSparse != 0)
         mustCloseFd(&(file->fdSparse));
     udcBitmapClose(&file->bits);
     }
@@ -2005,15 +2012,44 @@
 /* return true if file is not a http or ftp file, just a local file */
 {
 // copied from above
 char *protocol = NULL, *afterProtocol = NULL, *colon;
 udcParseUrl(url, &protocol, &afterProtocol, &colon);
 freez(&protocol);
 freez(&afterProtocol);
 return colon==NULL;
 }
 
 boolean udcExists(char *url)
 /* return true if a local or remote file exists */
 {
 return udcFileSize(url)!=-1;
 }
+
+void udcMMap(struct udcFile *file)
+/* Enable access to underlying file as memory using mmap.  udcMMapFetch
+ * must be called to actually access regions of the file. */
+{
+if (file->mmapBase != NULL)
+    errAbort("File is already mmaped: %s", file->url);
+file->mmapBase = mmap(NULL, file->size, PROT_READ, MAP_SHARED, file->fdSparse, 0);
+if (file->mmapBase == MAP_FAILED)
+    errnoAbort("mmap() failed for %s", file->url);
+}
+
+void *udcMMapFetch(struct udcFile *file, bits64 offset, bits64 size)
+/* Return pointer to a region of the file in memory, ensuring that regions is
+ * cached. udcMMap must have been called to enable access.  This must be
+ * called for first access to a range of the file or erroneous (zeros) data
+ * maybe returned.  Maybe called multiple times on a range or overlapping
+ * returns. */
+{
+if (file->mmapBase == NULL)
+    errAbort("udcMMap() has not been called for: %s", file->url);
+if ((offset + size) > file->size)
+    errAbort("udcMMapFetch on offset %lld for %lld bytes exceeds length of file %lld on %s",
+             offset, size, file->size, file->url);
+if (udcCacheEnabled() && !sameString(file->protocol, "transparent"))
+    udcCachePreload(file, offset, size);
+return ((char*)file->mmapBase) + offset;
+}
+