09243c7da5f14dbfa408cf7a54e567080da81a05 markd Wed Oct 17 10:44:15 2018 -0700 Added ability to access UDC file in cache using virtual memory (mmap). The test sure are nice. diff --git src/lib/tests/udcTest.c src/lib/tests/udcTest.c index 3df2524..0457b97 100644 --- src/lib/tests/udcTest.c +++ src/lib/tests/udcTest.c @@ -7,37 +7,39 @@ // 2. *if* need to do own buffering, consider mmap() // (kernel handles buffering) #include #include "common.h" #include "errAbort.h" #include "options.h" #include "portable.h" #include "udc.h" static struct optionSpec options[] = { {"size", OPTION_BOOLEAN}, {"raBuf", OPTION_BOOLEAN}, {"fork", OPTION_BOOLEAN}, + {"mmap", OPTION_BOOLEAN}, {"protocol", OPTION_STRING}, {"seed", OPTION_INT}, {NULL, 0}, }; boolean raBuf = FALSE; /* exercise the read-ahead buffer */ boolean doFork = FALSE; +boolean mmapAccess = FALSE; /* test access via mmap */ char *protocol = "ftp"; unsigned int seed = 0; int size = 0; // Local copy (reference file) and URL for testing: #define THOUSAND_HIVE "/hive/data/outside/1000genomes/ncbi/ftp-trace.ncbi.nih.gov/1000genomes/" #define THOUSAND_FTP "ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/" #define CHR4_SLX_BAM "NA12878/alignment/NA12878.chrom4.SLX.maq.SRP000032.2009_07.bam" // Use typical size range of bgzip-compressed data blocks: #define MIN_BLK_SIZE 20000 #define MAX_BLK_SIZE 30000 // Read at most this many consecutive blocks: #define MAX_BLOCKS 100 @@ -119,37 +121,45 @@ if (offset > size) errAbort("readAndTest: Size of %s is %lld, but offset given is %lld", localCopy, size, offset); if (offset + len > size) { bits64 newSize = size - offset; warn("readAndTest: Size of %s is %lld, offset %lld + len %lld = %lld exceeds that; " "reducing len to %lld", localCopy, size, offset, len, offset+len, newSize); len = newSize; } verbose(2, "0x%08llx: %lldB @%lld\n", (bits64)udcf, len, offset); // Get data from the reference file: openSeekRead(localCopy, offset, len, bufRef); // Get data from udcFile object and compare to reference: +if (mmapAccess) + { + char *ptr = udcMMapFetch(udcf, offset, len); + memcpy(bufTest, ptr, len); + } +else + { udcSeek(udcf, offset); bits64 bytesRead = udcRead(udcf, bufTest, len); - // udcRead does a mustRead, and we have checked offset+len, so this should never happen, // but test anyway: if (bytesRead < len) errAbort("Got %lld bytes instead of %lld from %s @%lld", bytesRead, len, url, offset); + } + gotError |= compareBytes(bufTest, bufRef, len, url, localCopy, "url", offset); if (0) // -- Check sparseData after the dust settles. { // Get data from udcf's sparse data file and compare to reference: char *sparseFileName = getSparseFileName(url); openSeekRead(sparseFileName, offset, len, bufTest); gotError |= compareBytes(bufTest, bufRef, len, sparseFileName, localCopy, "sparse", offset); } return gotError; } INLINE double myDrand() /* Return something from [0.0,1.0). */ { @@ -242,30 +252,32 @@ // Check bitmap bits too: struct udcFile *udcf = udcFileOpen(url, udcDefaultDir()); verbose(1, "checking bitmap bits (%d..%d].\n", startBlock, endBlock); udcCheckCacheBits(udcf, startBlock, endBlock); udcFileClose(&udcf); return gotError; } boolean testReadAheadBufferMode(char *url, char *localCopy, int mode) /* Open a udcFile, read different random locations, and check for errors. */ { boolean gotError = FALSE; bits64 fSize = fileSize(localCopy); struct udcFile *udcf = udcFileOpen(url, udcDefaultDir()); +if (mmapAccess) + udcMMap(udcf); bits64 offset = 0; if (mode == -1) offset = 0 + 8192 * myDrand(); if (mode == 0) offset = (bits64)(fSize * myDrand()); if (mode == 1) offset = fSize - 8192 * myDrand(); int delta = 0; int i; for(i=0; i<100; ++i) { int size = 8192 * myDrand(); @@ -307,38 +319,42 @@ gotError |= testReadAheadBufferMode(url, localCopy, 1); // near end of file return gotError; } boolean testInterleaved(char *url, char *localCopy) /* Open two udcFile handles to the same file, read probably-different random locations, * read from probably-overlapping random locations, and check for errors. */ { boolean gotError = FALSE; bits64 size = fileSize(localCopy); // First, read some bytes from udcFile udcf1. struct udcFile *udcf1 = udcFileOpen(url, udcDefaultDir()); +if (mmapAccess) + udcMMap(udcf1); int blksRead1 = 0; bits64 offset1 = randomStartOffset(size); gotError |= readAndTestBlocks(udcf1, &offset1, 2, &blksRead1, localCopy, url); // While keeping udcf1 open, create udcf2 on the same URL, and read from a // (probably) different location: struct udcFile *udcf2 = udcFileOpen(url, udcDefaultDir()); +if (mmapAccess) + udcMMap(udcf2); int blksRead2 = 0; bits64 offset2 = randomStartOffset(size); gotError |= readAndTestBlocks(udcf2, &offset2, 2, &blksRead2, localCopy, url); // Interleave some successive-location reads: int i; for (i = 0; i < 10; i++) { gotError |= readAndTestBlocks(udcf1, &offset1, 1, &blksRead1, localCopy, url); gotError |= readAndTestBlocks(udcf2, &offset2, 1, &blksRead2, localCopy, url); } // Unevenly interleave reads starting from the same new random location: bits64 sameOffset = randomStartOffset(size); blksRead1 = 0; @@ -372,69 +388,74 @@ boolean testConcurrent(char *url, char *localCopy) /* Fork; then parent and child access the same locations (hopefully) concurrently. */ { boolean gotErrorParent = FALSE, gotErrorChild = FALSE; bits64 size = fileSize(localCopy); bits64 sameOffset = randomStartOffset(size); bits64 offsetParent = sameOffset, offsetChild = sameOffset; pid_t kidPid = fork(); if (kidPid < 0) errnoAbort("testConcurrent: fork failed"); else if (kidPid == 0) { // child: access url and then exit, to pass control back to parent. struct udcFile *udcf = udcFileOpen(url, udcDefaultDir()); + if (mmapAccess) + udcMMap(udcf); int blksRead = 0; gotErrorChild = readAndTestBlocks(udcf, &offsetParent, MAX_BLOCKS, &blksRead, localCopy, url); udcFileClose(&udcf); exit(0); } else { // parent: access url, wait for child, do post-checking. struct udcFile *udcf = udcFileOpen(url, udcDefaultDir()); + if (mmapAccess) + udcMMap(udcf); int blksRead = 0; gotErrorParent = readAndTestBlocks(udcf, &offsetChild, MAX_BLOCKS, &blksRead, localCopy, url); udcFileClose(&udcf); // wait for child to finish: int childStatus; int retPid = waitpid(kidPid, &childStatus, 0); if (retPid < 0) errnoAbort("testConcurrent: waitpid(%d) failed", kidPid); if (! WIFEXITED(childStatus)) warn("testConcurrent: child process did not exit() normally"); if (WEXITSTATUS(childStatus)) warn("testConcurrent: child exit status = %d)", WEXITSTATUS(childStatus)); if (gotErrorChild) verbose(1, "Parent can see child got error.\n"); gotErrorParent |= checkCacheFiles(sameOffset, max(offsetParent, offsetChild), url, localCopy); return (gotErrorParent || gotErrorChild); } errAbort("testConcurrent: control should never reach this point."); return TRUE; } int main(int argc, char *argv[]) /* Set up test params and run tests. */ { boolean gotError = FALSE; optionInit(&argc, argv, options); size = optionExists("size"); raBuf = optionExists("raBuf"); doFork = optionExists("fork"); +mmapAccess = optionExists("mmap"); protocol = optionVal("protocol", protocol); seed = optionInt("seed", seed); char *host = getenv("HOST"); if (host == NULL || !startsWith("hgwdev", host)) { // So that we don't break "make test" on other machines, use stdout and exit 0: puts("Sorry, this must be run on hgwdev (with HOST=hgwdev)"); exit(0); } errAbortDebugnPushPopErr(); char tmp[256]; safef(tmp, sizeof tmp, "/data/tmp/%s/udcCache", getenv("USER")); udcSetDefaultDir(tmp); if (seed == 0)