edea316d6a5d63daeb613abf46e19c0d703a41b9
galt
  Tue Sep 6 12:48:55 2011 -0700
added read-ahead buffer testing
diff --git src/lib/tests/udcTest.c src/lib/tests/udcTest.c
index cb8237c..1870469 100644
--- src/lib/tests/udcTest.c
+++ src/lib/tests/udcTest.c
@@ -2,36 +2,38 @@
 
 // suggestions from Mark: 1. try setvbuf, to make FILE * unbuffered -- does that help?
 //                        2. *if* need to do own buffering, consider mmap()
 //                           (kernel handles buffering)
 
 #include <sys/wait.h>
 #include "common.h"
 #include "errabort.h"
 #include "options.h"
 #include "portable.h"
 #include "udc.h"
 
 static char const rcsid[] = "$Id: udcTest.c,v 1.2 2009/12/19 01:06:27 angie Exp $";
 
 static struct optionSpec options[] = {
+    {"raBuf",    OPTION_BOOLEAN},
     {"fork",     OPTION_BOOLEAN},
     {"protocol", OPTION_STRING},
     {"seed",     OPTION_INT},
     {NULL, 0},
 };
 
+boolean raBuf = FALSE;   /* exercise the read-ahead buffer */
 boolean doFork = FALSE;
 char *protocol = "ftp";
 unsigned int seed = 0;
 
 // Local copy (reference file) and URL for testing:
 #define THOUSAND_HIVE "/hive/data/outside/1000genomes/ncbi/ftp-trace.ncbi.nih.gov/1000genomes/"
 #define THOUSAND_FTP "ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/"
 #define CHR3_SLX_BAM "NA12878/alignment/NA12878.chrom3.SLX.maq.SRP000032.2009_07.bam"
 #define CHR4_SLX_BAM "NA12878/alignment/NA12878.chrom4.SLX.maq.SRP000032.2009_07.bam"
 
 // Use typical size range of bgzip-compressed data blocks:
 #define MIN_BLK_SIZE 20000
 #define MAX_BLK_SIZE 30000
 
 // Read at most this many consecutive blocks:
@@ -116,30 +118,31 @@
 if (offset + len > size)
     {
     bits64 newSize = size - offset;
     warn("readAndTest: Size of %s is %lld, offset %lld + len %lld = %lld exceeds that; "
 	 "reducing len to %lld", localCopy, size, offset, len, offset+len, newSize);
     len = newSize;
     }
 verbose(2, "0x%08llx: %lldB @%lld\n", (bits64)udcf, len, offset);
 
 // Get data from the reference file:
 openSeekRead(localCopy, offset, len, bufRef);
 
 // Get data from udcFile object and compare to reference:
 udcSeek(udcf, offset);
 bits64 bytesRead = udcRead(udcf, bufTest, len);
+
 // udcRead does a mustRead, and we have checked offset+len, so this should never happen,
 // but test anyway:
 if (bytesRead < len)
     errAbort("Got %lld bytes instead of %lld from %s @%lld", bytesRead, len, url, offset);
 gotError |= compareBytes(bufTest, bufRef, len, url, localCopy, "url", offset);
 
 if (0) // -- Check sparseData after the dust settles.
     {
     // Get data from udcf's sparse data file and compare to reference:
     char *sparseFileName = getSparseFileName(url);
     openSeekRead(sparseFileName, offset, len, bufTest);
     gotError |= compareBytes(bufTest, bufRef, len, sparseFileName, localCopy, "sparse", offset);
     }
 return gotError;
 }
@@ -229,47 +232,107 @@
     char testDesc[64];
     safef(testDesc, sizeof(testDesc), "SPARSE %lld blk %d", offset, i);
     gotError |= compareBytes(bufSparse, bufRef, udcBlockSize, sparseFileName, localCopy,
 			     testDesc, offset);
     }
 mustCloseFd(&fdLocal);
 mustCloseFd(&fdSparse);
 // Check bitmap bits too:
 struct udcFile *udcf = udcFileOpen(url, udcDefaultDir());
 verbose(1, "checking bitmap bits (%d..%d].\n", startBlock, endBlock);
 udcCheckCacheBits(udcf, startBlock, endBlock);
 udcFileClose(&udcf);
 return gotError;
 }
 
+boolean testReadAheadBufferMode(char *url, char *localCopy, int mode)
+/* Open a udcFile, read different random locations, and check for errors. */
+{
+boolean gotError = FALSE;
+bits64 fSize = fileSize(localCopy);
+
+struct udcFile *udcf = udcFileOpen(url, udcDefaultDir());
+bits64 offset = 0;
+if (mode == -1)
+   offset = 0 + 8192 * myDrand();
+if (mode == 0)
+   offset = (bits64)(fSize * myDrand());
+if (mode == 1)
+   offset = fSize - 8192 * myDrand();
+
+
+int delta = 0;
+int i;
+for(i=0; i<100; ++i)
+    {
+
+    int size = 8192 * myDrand();
+
+    if ((offset + size) > fSize)
+	size = fSize - offset;
+
+    gotError |= readAndTest(udcf, offset, size, localCopy, url);
+
+    delta = -6000 + (12000 * myDrand());   // -6000 to +6000
+
+    if (delta < 0)  // do not let unsigned offset go below 0
+	if (-delta > offset)
+    	    delta = -offset;  
+
+    offset += delta;
+
+    if (offset > fSize)
+	offset = fSize;
+
+    }
+
+udcFileClose(&udcf);
+return gotError;
+
+}
+boolean testReadAheadBuffer(char *url, char *localCopy)
+/* Open a udcFile, read different random locations, and check for errors. */
+{
+boolean gotError = FALSE;
+gotError |= testReadAheadBufferMode(url, localCopy, -1);  // near beginning of file
+gotError |= testReadAheadBufferMode(url, localCopy, 0);   // anywherer in file
+gotError |= testReadAheadBufferMode(url, localCopy, 1);   // near end of file
+return gotError;
+}
+
+
 boolean testInterleaved(char *url, char *localCopy)
 /* Open two udcFile handles to the same file, read probably-different random locations,
  * read from probably-overlapping random locations, and check for errors. */
 {
 boolean gotError = FALSE;
 bits64 size = fileSize(localCopy);
 
+
 // First, read some bytes from udcFile udcf1.
 struct udcFile *udcf1 = udcFileOpen(url, udcDefaultDir());
 int blksRead1 = 0;
 bits64 offset1 = randomStartOffset(size);
+
 gotError |= readAndTestBlocks(udcf1, &offset1, 2, &blksRead1, localCopy, url);
+
 // While keeping udcf1 open, create udcf2 on the same URL, and read from a 
 // (probably) different location:
 struct udcFile *udcf2 = udcFileOpen(url, udcDefaultDir());
 int blksRead2 = 0;
 bits64 offset2 = randomStartOffset(size);
+
 gotError |= readAndTestBlocks(udcf2, &offset2, 2, &blksRead2, localCopy, url);
 // Interleave some successive-location reads:
 int i;
 for (i = 0;  i < 10;  i++)
     {
     gotError |= readAndTestBlocks(udcf1, &offset1, 1, &blksRead1, localCopy, url);
     gotError |= readAndTestBlocks(udcf2, &offset2, 1, &blksRead2, localCopy, url);
     }
 
 // Unevenly interleave reads starting from the same new random location:
 bits64 sameOffset = randomStartOffset(size);
 blksRead1 = 0;
 offset1 = sameOffset;
 blksRead2 = 0;
 offset2 = sameOffset;
@@ -280,30 +343,31 @@
     if (blksRead1 < MAX_BLOCKS)
 	{
 	int n = 1 + (int)(5 * myDrand());
 	n = min(MAX_BLOCKS - blksRead1, n);
 	gotError |= readAndTestBlocks(udcf1, &offset1, n, &blksRead1, localCopy, url);
 	}
     if (blksRead2 < MAX_BLOCKS)
 	{
 	int n = 1 + (int)(5 * myDrand());
 	n = min(MAX_BLOCKS - blksRead2, n);
 	gotError |= readAndTestBlocks(udcf2, &offset2, n, &blksRead2, localCopy, url);
 	}
     }
 udcFileClose(&udcf1);
 udcFileClose(&udcf2);
+verbose(1,"checkCacheFiles\n");
 gotError |= checkCacheFiles(sameOffset, max(offset1, offset2), url, localCopy);
 return gotError;
 }
 
 boolean testConcurrent(char *url, char *localCopy)
 /* Fork; then parent and child access the same locations (hopefully) concurrently. */
 {
 boolean gotErrorParent = FALSE, gotErrorChild = FALSE;
 bits64 size = fileSize(localCopy);
 bits64 sameOffset = randomStartOffset(size);
 bits64 offsetParent = sameOffset, offsetChild = sameOffset;
 
 pid_t kidPid = fork();
 if (kidPid < 0)
     errnoAbort("testConcurrent: fork failed");
@@ -335,62 +399,69 @@
     if (gotErrorChild)
 	verbose(1, "Parent can see child got error.\n");
     gotErrorParent |= checkCacheFiles(sameOffset, max(offsetParent, offsetChild), url, localCopy);
     return (gotErrorParent || gotErrorChild);
     }
 errAbort("testConcurrent: control should never reach this point.");
 return TRUE;
 }
 
 
 int main(int argc, char *argv[])
 /* Set up test params and run tests. */
 {
 boolean gotError = FALSE;
 optionInit(&argc, argv, options);
+raBuf = optionExists("raBuf");
 doFork = optionExists("fork");
 protocol = optionVal("protocol", protocol);
 seed = optionInt("seed", seed);
 
 char *host = getenv("HOST");
 if (host == NULL || !startsWith("hgwdev", host))
     {
     // So that we don't break "make test" on other machines, use stdout and exit 0:
     puts("Sorry, this must be run on hgwdev (with HOST=hgwdev)");
     exit(0);
     }
 errAbortDebugnPushPopErr();
-udcSetDefaultDir("/data/tmp/angie/udcCache");
+char tmp[256];
+safef(tmp, sizeof tmp, "/data/tmp/%s/udcCache", getenv("USER"));
+udcSetDefaultDir(tmp);
 if (seed == 0)
     {
     long now = clock1();
     printf("Seeding random with unix time %ld\n", now);
     srand(now);
     }
 else
     {
     printf("Seeding random with option -seed=%d\n", seed);
     srand(seed);
     }
 
 if (sameString(protocol, "http"))
     {
     char *httpUrl = "http://hgwdev.cse.ucsc.edu/~angie/wgEncodeCshlRnaSeqAlignmentsK562ChromatinShort.bb";
     char *httpLocalCopy = "/gbdb/hg18/bbi/wgEncodeCshlRnaSeqAlignmentsK562ChromatinShort.bb";
-    if (doFork)
+    if (raBuf)
+	gotError |= testReadAheadBuffer(httpUrl, httpLocalCopy);
+    else if (doFork)
 	gotError |= testConcurrent(httpUrl, httpLocalCopy);
     else
 	gotError |= testInterleaved(httpUrl, httpLocalCopy);
     }
 else if (sameString(protocol, "ftp"))
     {
     char *ftpUrl = THOUSAND_FTP CHR4_SLX_BAM;
     char *ftpLocalCopy = THOUSAND_HIVE CHR4_SLX_BAM;
-    if (doFork)
+    if (raBuf)
+	gotError |= testReadAheadBuffer(ftpUrl, ftpLocalCopy);
+    else if (doFork)
 	gotError |= testConcurrent(ftpUrl, ftpLocalCopy);
     else
 	gotError |= testInterleaved(ftpUrl, ftpLocalCopy);
     }
 else
     errAbort("Unrecognized protocol '%s'", protocol);
 return gotError;
 }