09243c7da5f14dbfa408cf7a54e567080da81a05
markd
  Wed Oct 17 10:44:15 2018 -0700
Added ability to access UDC file in cache using virtual memory (mmap).  The test sure are nice.

diff --git src/lib/tests/udcTest.c src/lib/tests/udcTest.c
index 3df2524..0457b97 100644
--- src/lib/tests/udcTest.c
+++ src/lib/tests/udcTest.c
@@ -7,37 +7,39 @@
 //                        2. *if* need to do own buffering, consider mmap()
 //                           (kernel handles buffering)
 
 #include <sys/wait.h>
 #include "common.h"
 #include "errAbort.h"
 #include "options.h"
 #include "portable.h"
 #include "udc.h"
 
 
 static struct optionSpec options[] = {
     {"size",     OPTION_BOOLEAN},
     {"raBuf",    OPTION_BOOLEAN},
     {"fork",     OPTION_BOOLEAN},
+    {"mmap",     OPTION_BOOLEAN},
     {"protocol", OPTION_STRING},
     {"seed",     OPTION_INT},
     {NULL, 0},
 };
 
 boolean raBuf = FALSE;   /* exercise the read-ahead buffer */
 boolean doFork = FALSE;
+boolean mmapAccess = FALSE; /* test access via mmap */
 char *protocol = "ftp";
 unsigned int seed = 0;
 int size = 0;
 
 // Local copy (reference file) and URL for testing:
 #define THOUSAND_HIVE "/hive/data/outside/1000genomes/ncbi/ftp-trace.ncbi.nih.gov/1000genomes/"
 #define THOUSAND_FTP "ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/"
 #define CHR4_SLX_BAM "NA12878/alignment/NA12878.chrom4.SLX.maq.SRP000032.2009_07.bam"
 
 // Use typical size range of bgzip-compressed data blocks:
 #define MIN_BLK_SIZE 20000
 #define MAX_BLK_SIZE 30000
 
 // Read at most this many consecutive blocks:
 #define MAX_BLOCKS 100
@@ -119,37 +121,45 @@
 if (offset > size)
     errAbort("readAndTest: Size of %s is %lld, but offset given is %lld", localCopy, size, offset);
 if (offset + len > size)
     {
     bits64 newSize = size - offset;
     warn("readAndTest: Size of %s is %lld, offset %lld + len %lld = %lld exceeds that; "
 	 "reducing len to %lld", localCopy, size, offset, len, offset+len, newSize);
     len = newSize;
     }
 verbose(2, "0x%08llx: %lldB @%lld\n", (bits64)udcf, len, offset);
 
 // Get data from the reference file:
 openSeekRead(localCopy, offset, len, bufRef);
 
 // Get data from udcFile object and compare to reference:
+if (mmapAccess)
+    {
+    char *ptr = udcMMapFetch(udcf, offset, len);
+    memcpy(bufTest, ptr, len);
+    }
+else
+    {
     udcSeek(udcf, offset);
     bits64 bytesRead = udcRead(udcf, bufTest, len);
-
     // udcRead does a mustRead, and we have checked offset+len, so this should never happen,
     // but test anyway:
     if (bytesRead < len)
         errAbort("Got %lld bytes instead of %lld from %s @%lld", bytesRead, len, url, offset);
+    }
+
 gotError |= compareBytes(bufTest, bufRef, len, url, localCopy, "url", offset);
 
 if (0) // -- Check sparseData after the dust settles.
     {
     // Get data from udcf's sparse data file and compare to reference:
     char *sparseFileName = getSparseFileName(url);
     openSeekRead(sparseFileName, offset, len, bufTest);
     gotError |= compareBytes(bufTest, bufRef, len, sparseFileName, localCopy, "sparse", offset);
     }
 return gotError;
 }
 
 INLINE double myDrand()
 /* Return something from [0.0,1.0). */
 {
@@ -242,30 +252,32 @@
 // Check bitmap bits too:
 struct udcFile *udcf = udcFileOpen(url, udcDefaultDir());
 verbose(1, "checking bitmap bits (%d..%d].\n", startBlock, endBlock);
 udcCheckCacheBits(udcf, startBlock, endBlock);
 udcFileClose(&udcf);
 return gotError;
 }
 
 boolean testReadAheadBufferMode(char *url, char *localCopy, int mode)
 /* Open a udcFile, read different random locations, and check for errors. */
 {
 boolean gotError = FALSE;
 bits64 fSize = fileSize(localCopy);
 
 struct udcFile *udcf = udcFileOpen(url, udcDefaultDir());
+if (mmapAccess)
+    udcMMap(udcf);
 bits64 offset = 0;
 if (mode == -1)
    offset = 0 + 8192 * myDrand();
 if (mode == 0)
    offset = (bits64)(fSize * myDrand());
 if (mode == 1)
    offset = fSize - 8192 * myDrand();
 
 
 int delta = 0;
 int i;
 for(i=0; i<100; ++i)
     {
 
     int size = 8192 * myDrand();
@@ -307,38 +319,42 @@
 gotError |= testReadAheadBufferMode(url, localCopy, 1);   // near end of file
 return gotError;
 }
 
 
 boolean testInterleaved(char *url, char *localCopy)
 /* Open two udcFile handles to the same file, read probably-different random locations,
  * read from probably-overlapping random locations, and check for errors. */
 {
 boolean gotError = FALSE;
 bits64 size = fileSize(localCopy);
 
 
 // First, read some bytes from udcFile udcf1.
 struct udcFile *udcf1 = udcFileOpen(url, udcDefaultDir());
+if (mmapAccess)
+    udcMMap(udcf1);
 int blksRead1 = 0;
 bits64 offset1 = randomStartOffset(size);
 
 gotError |= readAndTestBlocks(udcf1, &offset1, 2, &blksRead1, localCopy, url);
 
 // While keeping udcf1 open, create udcf2 on the same URL, and read from a 
 // (probably) different location:
 struct udcFile *udcf2 = udcFileOpen(url, udcDefaultDir());
+if (mmapAccess)
+    udcMMap(udcf2);
 int blksRead2 = 0;
 bits64 offset2 = randomStartOffset(size);
 
 gotError |= readAndTestBlocks(udcf2, &offset2, 2, &blksRead2, localCopy, url);
 // Interleave some successive-location reads:
 int i;
 for (i = 0;  i < 10;  i++)
     {
     gotError |= readAndTestBlocks(udcf1, &offset1, 1, &blksRead1, localCopy, url);
     gotError |= readAndTestBlocks(udcf2, &offset2, 1, &blksRead2, localCopy, url);
     }
 
 // Unevenly interleave reads starting from the same new random location:
 bits64 sameOffset = randomStartOffset(size);
 blksRead1 = 0;
@@ -372,69 +388,74 @@
 boolean testConcurrent(char *url, char *localCopy)
 /* Fork; then parent and child access the same locations (hopefully) concurrently. */
 {
 boolean gotErrorParent = FALSE, gotErrorChild = FALSE;
 bits64 size = fileSize(localCopy);
 bits64 sameOffset = randomStartOffset(size);
 bits64 offsetParent = sameOffset, offsetChild = sameOffset;
 
 pid_t kidPid = fork();
 if (kidPid < 0)
     errnoAbort("testConcurrent: fork failed");
 else if (kidPid == 0)
     {
     // child: access url and then exit, to pass control back to parent.
     struct udcFile *udcf = udcFileOpen(url, udcDefaultDir());
+    if (mmapAccess)
+        udcMMap(udcf);
     int blksRead = 0;
     gotErrorChild = readAndTestBlocks(udcf, &offsetParent, MAX_BLOCKS, &blksRead, localCopy, url);
     udcFileClose(&udcf);
     exit(0);
     }
 else
     {
     // parent: access url, wait for child, do post-checking.
     struct udcFile *udcf = udcFileOpen(url, udcDefaultDir());
+    if (mmapAccess)
+        udcMMap(udcf);
     int blksRead = 0;
     gotErrorParent = readAndTestBlocks(udcf, &offsetChild, MAX_BLOCKS, &blksRead, localCopy, url);
     udcFileClose(&udcf);
     // wait for child to finish:
     int childStatus;
     int retPid = waitpid(kidPid, &childStatus, 0);
     if (retPid < 0)
 	errnoAbort("testConcurrent: waitpid(%d) failed", kidPid);
     if (! WIFEXITED(childStatus))
 	warn("testConcurrent: child process did not exit() normally");
     if (WEXITSTATUS(childStatus))
 	warn("testConcurrent: child exit status = %d)", WEXITSTATUS(childStatus));
     if (gotErrorChild)
 	verbose(1, "Parent can see child got error.\n");
     gotErrorParent |= checkCacheFiles(sameOffset, max(offsetParent, offsetChild), url, localCopy);
     return (gotErrorParent || gotErrorChild);
     }
 errAbort("testConcurrent: control should never reach this point.");
 return TRUE;
 }
 
 
 int main(int argc, char *argv[])
 /* Set up test params and run tests. */
 {
 boolean gotError = FALSE;
 optionInit(&argc, argv, options);
 size = optionExists("size");
 raBuf = optionExists("raBuf");
 doFork = optionExists("fork");
+mmapAccess = optionExists("mmap");
 protocol = optionVal("protocol", protocol);
 seed = optionInt("seed", seed);
 
 char *host = getenv("HOST");
 if (host == NULL || !startsWith("hgwdev", host))
     {
     // So that we don't break "make test" on other machines, use stdout and exit 0:
     puts("Sorry, this must be run on hgwdev (with HOST=hgwdev)");
     exit(0);
     }
 errAbortDebugnPushPopErr();
 char tmp[256];
 safef(tmp, sizeof tmp, "/data/tmp/%s/udcCache", getenv("USER"));
 udcSetDefaultDir(tmp);
 if (seed == 0)