edea316d6a5d63daeb613abf46e19c0d703a41b9 galt Tue Sep 6 12:48:55 2011 -0700 added read-ahead buffer testing diff --git src/lib/tests/udcTest.c src/lib/tests/udcTest.c index cb8237c..1870469 100644 --- src/lib/tests/udcTest.c +++ src/lib/tests/udcTest.c @@ -2,36 +2,38 @@ // suggestions from Mark: 1. try setvbuf, to make FILE * unbuffered -- does that help? // 2. *if* need to do own buffering, consider mmap() // (kernel handles buffering) #include #include "common.h" #include "errabort.h" #include "options.h" #include "portable.h" #include "udc.h" static char const rcsid[] = "$Id: udcTest.c,v 1.2 2009/12/19 01:06:27 angie Exp $"; static struct optionSpec options[] = { + {"raBuf", OPTION_BOOLEAN}, {"fork", OPTION_BOOLEAN}, {"protocol", OPTION_STRING}, {"seed", OPTION_INT}, {NULL, 0}, }; +boolean raBuf = FALSE; /* exercise the read-ahead buffer */ boolean doFork = FALSE; char *protocol = "ftp"; unsigned int seed = 0; // Local copy (reference file) and URL for testing: #define THOUSAND_HIVE "/hive/data/outside/1000genomes/ncbi/ftp-trace.ncbi.nih.gov/1000genomes/" #define THOUSAND_FTP "ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/" #define CHR3_SLX_BAM "NA12878/alignment/NA12878.chrom3.SLX.maq.SRP000032.2009_07.bam" #define CHR4_SLX_BAM "NA12878/alignment/NA12878.chrom4.SLX.maq.SRP000032.2009_07.bam" // Use typical size range of bgzip-compressed data blocks: #define MIN_BLK_SIZE 20000 #define MAX_BLK_SIZE 30000 // Read at most this many consecutive blocks: @@ -116,30 +118,31 @@ if (offset + len > size) { bits64 newSize = size - offset; warn("readAndTest: Size of %s is %lld, offset %lld + len %lld = %lld exceeds that; " "reducing len to %lld", localCopy, size, offset, len, offset+len, newSize); len = newSize; } verbose(2, "0x%08llx: %lldB @%lld\n", (bits64)udcf, len, offset); // Get data from the reference file: openSeekRead(localCopy, offset, len, bufRef); // Get data from udcFile object and compare to reference: udcSeek(udcf, offset); bits64 bytesRead = udcRead(udcf, bufTest, len); + // udcRead does a mustRead, and we have checked offset+len, so this should never happen, // but test anyway: if (bytesRead < len) errAbort("Got %lld bytes instead of %lld from %s @%lld", bytesRead, len, url, offset); gotError |= compareBytes(bufTest, bufRef, len, url, localCopy, "url", offset); if (0) // -- Check sparseData after the dust settles. { // Get data from udcf's sparse data file and compare to reference: char *sparseFileName = getSparseFileName(url); openSeekRead(sparseFileName, offset, len, bufTest); gotError |= compareBytes(bufTest, bufRef, len, sparseFileName, localCopy, "sparse", offset); } return gotError; } @@ -229,47 +232,107 @@ char testDesc[64]; safef(testDesc, sizeof(testDesc), "SPARSE %lld blk %d", offset, i); gotError |= compareBytes(bufSparse, bufRef, udcBlockSize, sparseFileName, localCopy, testDesc, offset); } mustCloseFd(&fdLocal); mustCloseFd(&fdSparse); // Check bitmap bits too: struct udcFile *udcf = udcFileOpen(url, udcDefaultDir()); verbose(1, "checking bitmap bits (%d..%d].\n", startBlock, endBlock); udcCheckCacheBits(udcf, startBlock, endBlock); udcFileClose(&udcf); return gotError; } +boolean testReadAheadBufferMode(char *url, char *localCopy, int mode) +/* Open a udcFile, read different random locations, and check for errors. */ +{ +boolean gotError = FALSE; +bits64 fSize = fileSize(localCopy); + +struct udcFile *udcf = udcFileOpen(url, udcDefaultDir()); +bits64 offset = 0; +if (mode == -1) + offset = 0 + 8192 * myDrand(); +if (mode == 0) + offset = (bits64)(fSize * myDrand()); +if (mode == 1) + offset = fSize - 8192 * myDrand(); + + +int delta = 0; +int i; +for(i=0; i<100; ++i) + { + + int size = 8192 * myDrand(); + + if ((offset + size) > fSize) + size = fSize - offset; + + gotError |= readAndTest(udcf, offset, size, localCopy, url); + + delta = -6000 + (12000 * myDrand()); // -6000 to +6000 + + if (delta < 0) // do not let unsigned offset go below 0 + if (-delta > offset) + delta = -offset; + + offset += delta; + + if (offset > fSize) + offset = fSize; + + } + +udcFileClose(&udcf); +return gotError; + +} +boolean testReadAheadBuffer(char *url, char *localCopy) +/* Open a udcFile, read different random locations, and check for errors. */ +{ +boolean gotError = FALSE; +gotError |= testReadAheadBufferMode(url, localCopy, -1); // near beginning of file +gotError |= testReadAheadBufferMode(url, localCopy, 0); // anywherer in file +gotError |= testReadAheadBufferMode(url, localCopy, 1); // near end of file +return gotError; +} + + boolean testInterleaved(char *url, char *localCopy) /* Open two udcFile handles to the same file, read probably-different random locations, * read from probably-overlapping random locations, and check for errors. */ { boolean gotError = FALSE; bits64 size = fileSize(localCopy); + // First, read some bytes from udcFile udcf1. struct udcFile *udcf1 = udcFileOpen(url, udcDefaultDir()); int blksRead1 = 0; bits64 offset1 = randomStartOffset(size); + gotError |= readAndTestBlocks(udcf1, &offset1, 2, &blksRead1, localCopy, url); + // While keeping udcf1 open, create udcf2 on the same URL, and read from a // (probably) different location: struct udcFile *udcf2 = udcFileOpen(url, udcDefaultDir()); int blksRead2 = 0; bits64 offset2 = randomStartOffset(size); + gotError |= readAndTestBlocks(udcf2, &offset2, 2, &blksRead2, localCopy, url); // Interleave some successive-location reads: int i; for (i = 0; i < 10; i++) { gotError |= readAndTestBlocks(udcf1, &offset1, 1, &blksRead1, localCopy, url); gotError |= readAndTestBlocks(udcf2, &offset2, 1, &blksRead2, localCopy, url); } // Unevenly interleave reads starting from the same new random location: bits64 sameOffset = randomStartOffset(size); blksRead1 = 0; offset1 = sameOffset; blksRead2 = 0; offset2 = sameOffset; @@ -280,30 +343,31 @@ if (blksRead1 < MAX_BLOCKS) { int n = 1 + (int)(5 * myDrand()); n = min(MAX_BLOCKS - blksRead1, n); gotError |= readAndTestBlocks(udcf1, &offset1, n, &blksRead1, localCopy, url); } if (blksRead2 < MAX_BLOCKS) { int n = 1 + (int)(5 * myDrand()); n = min(MAX_BLOCKS - blksRead2, n); gotError |= readAndTestBlocks(udcf2, &offset2, n, &blksRead2, localCopy, url); } } udcFileClose(&udcf1); udcFileClose(&udcf2); +verbose(1,"checkCacheFiles\n"); gotError |= checkCacheFiles(sameOffset, max(offset1, offset2), url, localCopy); return gotError; } boolean testConcurrent(char *url, char *localCopy) /* Fork; then parent and child access the same locations (hopefully) concurrently. */ { boolean gotErrorParent = FALSE, gotErrorChild = FALSE; bits64 size = fileSize(localCopy); bits64 sameOffset = randomStartOffset(size); bits64 offsetParent = sameOffset, offsetChild = sameOffset; pid_t kidPid = fork(); if (kidPid < 0) errnoAbort("testConcurrent: fork failed"); @@ -335,62 +399,69 @@ if (gotErrorChild) verbose(1, "Parent can see child got error.\n"); gotErrorParent |= checkCacheFiles(sameOffset, max(offsetParent, offsetChild), url, localCopy); return (gotErrorParent || gotErrorChild); } errAbort("testConcurrent: control should never reach this point."); return TRUE; } int main(int argc, char *argv[]) /* Set up test params and run tests. */ { boolean gotError = FALSE; optionInit(&argc, argv, options); +raBuf = optionExists("raBuf"); doFork = optionExists("fork"); protocol = optionVal("protocol", protocol); seed = optionInt("seed", seed); char *host = getenv("HOST"); if (host == NULL || !startsWith("hgwdev", host)) { // So that we don't break "make test" on other machines, use stdout and exit 0: puts("Sorry, this must be run on hgwdev (with HOST=hgwdev)"); exit(0); } errAbortDebugnPushPopErr(); -udcSetDefaultDir("/data/tmp/angie/udcCache"); +char tmp[256]; +safef(tmp, sizeof tmp, "/data/tmp/%s/udcCache", getenv("USER")); +udcSetDefaultDir(tmp); if (seed == 0) { long now = clock1(); printf("Seeding random with unix time %ld\n", now); srand(now); } else { printf("Seeding random with option -seed=%d\n", seed); srand(seed); } if (sameString(protocol, "http")) { char *httpUrl = "http://hgwdev.cse.ucsc.edu/~angie/wgEncodeCshlRnaSeqAlignmentsK562ChromatinShort.bb"; char *httpLocalCopy = "/gbdb/hg18/bbi/wgEncodeCshlRnaSeqAlignmentsK562ChromatinShort.bb"; - if (doFork) + if (raBuf) + gotError |= testReadAheadBuffer(httpUrl, httpLocalCopy); + else if (doFork) gotError |= testConcurrent(httpUrl, httpLocalCopy); else gotError |= testInterleaved(httpUrl, httpLocalCopy); } else if (sameString(protocol, "ftp")) { char *ftpUrl = THOUSAND_FTP CHR4_SLX_BAM; char *ftpLocalCopy = THOUSAND_HIVE CHR4_SLX_BAM; - if (doFork) + if (raBuf) + gotError |= testReadAheadBuffer(ftpUrl, ftpLocalCopy); + else if (doFork) gotError |= testConcurrent(ftpUrl, ftpLocalCopy); else gotError |= testInterleaved(ftpUrl, ftpLocalCopy); } else errAbort("Unrecognized protocol '%s'", protocol); return gotError; }