 /* udcTest -- test the URL data cache */
+/* Copyright (C) 2014 The Regents of the University of California 
+ * See README in this or parent directory for licensing information. */
 // suggestions from Mark: 1. try setvbuf, to make FILE * unbuffered -- does that help?
 //                        2. *if* need to do own buffering, consider mmap()
 //                           (kernel handles buffering)
 #include <sys/wait.h>
 #include "common.h"
 #include "errabort.h"
 #include "options.h"
 #include "portable.h"
 #include "udc.h"
 static struct optionSpec options[] = {
     {"size",    OPTION_BOOLEAN},
     {"raBuf",    OPTION_BOOLEAN},
     {"fork",     OPTION_BOOLEAN},
     {"protocol", OPTION_STRING},
     {"seed",     OPTION_INT},
     {NULL, 0},
 boolean raBuf = FALSE;   /* exercise the read-ahead buffer */
 boolean doFork = FALSE;
 char *protocol = "ftp";
 unsigned int seed = 0;
 int size = 0;
 // Local copy (reference file) and URL for testing:
 #define THOUSAND_HIVE "/hive/data/outside/1000genomes/ncbi/ftp-trace.ncbi.nih.gov/1000genomes/"
 #define THOUSAND_FTP "ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/pilot_data/data/"
 #define CHR3_SLX_BAM "NA12878/alignment/NA12878.chrom3.SLX.maq.SRP000032.2009_07.bam"
 #define CHR4_SLX_BAM "NA12878/alignment/NA12878.chrom4.SLX.maq.SRP000032.2009_07.bam"
 // Use typical size range of bgzip-compressed data blocks:
 #define MIN_BLK_SIZE 20000
 #define MAX_BLK_SIZE 30000
 // Read at most this many consecutive blocks:
 #define MAX_BLOCKS 100
 void openSeekRead(char *filename, bits64 offset, bits64 len, char *buf)
 /* Read len bits starting at offset from filename into buf or die. */
 int fd = mustOpenFd(filename, O_RDONLY);
 mustLseek(fd, offset, SEEK_SET);
 mustReadFd(fd, buf, len);
 boolean compareBytes(char *bufTest, char *bufRef, bits64 len, char *testName, char *refName,
 		     char *testDesc, bits64 offset)
 /* Report any differences between bufTest and bufRef (don't errAbort). */
 boolean gotError = FALSE;
 bits64 i, difCount = 0, nonZeroDifCount = 0;
 for (i=0;  i < len;  i++)
     if (bufTest[i] != bufRef[i])
 	if (difCount == 0)
 	    warn("*** %s%s and ref first differ at offset %lld + %lld = %lld [0x%02x != 0x%02x]",
 		 (bufTest[i] != '\0' ? "NONZERO " : ""),
 		 testDesc, offset, i, offset+i, (bits8)bufTest[i], (bits8)bufRef[i]);
 	if (bufTest[i] != '\0')
 if (difCount == 0)
     verbose(3, "Success: %s = ref, %lld bytes @%lld\n", testDesc, len, offset);
     warn("--> %lld different bytes (%lld %s) total in block of %lld bytes starting at %lld",
 	 difCount, nonZeroDifCount, (nonZeroDifCount ? "NONZERO" : "nonzero"), len, offset);
     gotError = TRUE;
 return gotError;
 char *getSparseFileName(char *url)
 /* Return the path to sparseData cache file for url. */
 struct slName *sl, *cacheFiles = udcFileCacheFiles(url, udcDefaultDir());
 char *sparseFileName = NULL;
 for (sl = cacheFiles; sl != NULL; sl = sl->next)
     if (endsWith(sl->name, "sparseData"))
 	sparseFileName = sl->name;
 if (sparseFileName == NULL)
     errAbort("can't find sparseData file in udcFileCacheFiles(%s) results", url);
 return sparseFileName;
 boolean readAndTest(struct udcFile *udcf, bits64 offset, bits64 len, char *localCopy, char *url)
 /* Read len bytes starting at offset in udcf.  Compare both the bytes returned,
  * and bytes directly read from the sparseData file, with data from the same 
  * location in our local copy of the URL data. */
 boolean gotError = FALSE;
 static char *bufRef = NULL, *bufTest = NULL;
 static bits64 bufSize = 0;
 if (bufRef != NULL && bufSize < len)
 if (bufRef == NULL)
     bufSize = ((len >> 10) + 1) << 10;  // round up to next kB
     bufRef = needMem(bufSize);
     bufTest = needMem(bufSize);
 // Check offset + len against size of reference file:
 bits64 size = fileSize(localCopy);
 if (offset > size)
     errAbort("readAndTest: Size of %s is %lld, but offset given is %lld", localCopy, size, offset);
 if (offset + len > size)
     bits64 newSize = size - offset;
     warn("readAndTest: Size of %s is %lld, offset %lld + len %lld = %lld exceeds that; "
 	 "reducing len to %lld", localCopy, size, offset, len, offset+len, newSize);
     len = newSize;
 verbose(2, "0x%08llx: %lldB @%lld\n", (bits64)udcf, len, offset);
 // Get data from the reference file:
 openSeekRead(localCopy, offset, len, bufRef);
 // Get data from udcFile object and compare to reference:
 udcSeek(udcf, offset);
 bits64 bytesRead = udcRead(udcf, bufTest, len);
 // udcRead does a mustRead, and we have checked offset+len, so this should never happen,
 // but test anyway:
 if (bytesRead < len)
     errAbort("Got %lld bytes instead of %lld from %s @%lld", bytesRead, len, url, offset);
 gotError |= compareBytes(bufTest, bufRef, len, url, localCopy, "url", offset);
 if (0) // -- Check sparseData after the dust settles.
     // Get data from udcf's sparse data file and compare to reference:
     char *sparseFileName = getSparseFileName(url);
     openSeekRead(sparseFileName, offset, len, bufTest);
     gotError |= compareBytes(bufTest, bufRef, len, sparseFileName, localCopy, "sparse", offset);
 return gotError;
 INLINE double myDrand()
 /* Return something from [0.0,1.0). */
 return (double)(rand() / (RAND_MAX + 1.0));
 INLINE bits64 randomStartOffset(bits64 max)
 /* Return something from [0,max-MAX_BLOCKS*MAX_BLK_SIZE) */
 bits64 maxStart = max - (MAX_BLOCKS * MAX_BLK_SIZE);
 return (bits64)(maxStart * myDrand());
 INLINE bits64 randomBlockSize()
 /* Return something from [MIN_BLK_SIZE,MAX_BLK_SIZE) */
 return MIN_BLK_SIZE + (bits64)((MAX_BLK_SIZE - MIN_BLK_SIZE) * myDrand());
 boolean readAndTestBlocks(struct udcFile *udcf, bits64 *retOffset, int numBlks, int *retBlksRead,
 			 char *localCopy, char *url)
 /* Read numBlks randomly sized blocks from udcf starting at *retOffset and compare
  * to localCopy.  *retBlksRead starts as the number of randomly sized blocks we have
  * already read, and is incremented by the number of blocks we read. 
  * If numBlks is 0, use the number of remaining blocks before *retBlksRead reaches
  * MAX_BLOCKS.  Update *retOffset to the final offset. Return TRUE if errors. */
 boolean gotError = FALSE;
 if (*retBlksRead >= MAX_BLOCKS)
     errAbort("readAndTestBlocks: exceeded MAX_BLOCKS (%d) -- increase it?", MAX_BLOCKS);
 int blksLeft = MAX_BLOCKS - *retBlksRead;
 if (numBlks == 0)
     numBlks = blksLeft;
 if (numBlks > blksLeft)
     warn("readAndTestBlocks: %d blocks requested but we have already read %d -- "
 	 "increase MAX_BLOCKS (%d)?  truncating to %d",
 	 numBlks, *retBlksRead, MAX_BLOCKS, blksLeft);
     numBlks = blksLeft;
 int i;
 for (i = 0;  i < numBlks;  i++)
     int size = randomBlockSize();
     gotError |= readAndTest(udcf, *retOffset, size, localCopy, url);
     *retOffset += size;
 return gotError;
 // These are defined in udc.c but not udc.h:
 #define udcBlockSize (8*1024)
 boolean udcCheckCacheBits(struct udcFile *file, int startBlock, int endBlock);
 /* Warn and return TRUE (error) if any bit in (startBlock,endBlock] is not set. */
 boolean checkCacheFiles(bits64 accessStart, bits64 accessEnd, char *url, char *localCopy)
 /* Given a range of byte offsets accessed via udc, translate those into udc block
  * coords.  Check that all bytes in the sparseData offsets corresponding to the 
  * block coords are equal to the reference (very important!) and that all blocks'
  * bits are set in bitmap (not as important). */
 boolean gotError = FALSE;
 char *bufRef = needMem(udcBlockSize), *bufSparse = needMem(udcBlockSize);
 int startBlock = (int)(accessStart / udcBlockSize);
 int endBlock = (int)((accessEnd + udcBlockSize-1) / udcBlockSize);
 bits64 startOffset = (bits64)startBlock * udcBlockSize;
 verbose(1, "checking sparseData (%lld..%lld] blocks (%d..%d].\n",
 	startOffset, ((bits64)endBlock*udcBlockSize), startBlock, endBlock);
 int fdLocal = mustOpenFd(localCopy, O_RDONLY);
 mustLseek(fdLocal, startOffset, SEEK_SET);
 char *sparseFileName = getSparseFileName(url);
 int fdSparse = mustOpenFd(sparseFileName, O_RDONLY);
 mustLseek(fdSparse, startOffset, SEEK_SET);
 int i;
 for (i = startBlock;  i < endBlock;  i++)
     bits64 offset = ((bits64)i * udcBlockSize);
     memset(bufRef, 0x59, udcBlockSize);
     memset(bufSparse, 0xae, udcBlockSize);
     mustReadFd(fdLocal, bufRef, udcBlockSize);
     mustReadFd(fdSparse, bufSparse, udcBlockSize);
     char testDesc[64];
     safef(testDesc, sizeof(testDesc), "SPARSE %lld blk %d", offset, i);
     gotError |= compareBytes(bufSparse, bufRef, udcBlockSize, sparseFileName, localCopy,
 			     testDesc, offset);
 // Check bitmap bits too:
 struct udcFile *udcf = udcFileOpen(url, udcDefaultDir());
 verbose(1, "checking bitmap bits (%d..%d].\n", startBlock, endBlock);
 udcCheckCacheBits(udcf, startBlock, endBlock);
 return gotError;
 boolean testReadAheadBufferMode(char *url, char *localCopy, int mode)
 /* Open a udcFile, read different random locations, and check for errors. */
 boolean gotError = FALSE;
 bits64 fSize = fileSize(localCopy);
 struct udcFile *udcf = udcFileOpen(url, udcDefaultDir());
 bits64 offset = 0;
 if (mode == -1)
    offset = 0 + 8192 * myDrand();
 if (mode == 0)
    offset = (bits64)(fSize * myDrand());
 if (mode == 1)
    offset = fSize - 8192 * myDrand();
 int delta = 0;
 int i;
 for(i=0; i<100; ++i)
     int size = 8192 * myDrand();
     if ((offset + size) > fSize)
 	size = fSize - offset;
     gotError |= readAndTest(udcf, offset, size, localCopy, url);
     delta = -6000 + (12000 * myDrand());   // -6000 to +6000
     if (delta < 0)  // do not let unsigned offset go below 0
 	if (-delta > offset)
     	    delta = -offset;  
     offset += delta;
     if (offset > fSize)
 	offset = fSize;
 return gotError;
 bool testSize(char *url, long int  size) 
 return (udcFileSize(url)==size)   ;
 boolean testReadAheadBuffer(char *url, char *localCopy)
 /* Open a udcFile, read different random locations, and check for errors. */
 boolean gotError = FALSE;
 gotError |= testReadAheadBufferMode(url, localCopy, -1);  // near beginning of file
 gotError |= testReadAheadBufferMode(url, localCopy, 0);   // anywherer in file
 gotError |= testReadAheadBufferMode(url, localCopy, 1);   // near end of file
 return gotError;
 boolean testInterleaved(char *url, char *localCopy)
 /* Open two udcFile handles to the same file, read probably-different random locations,
  * read from probably-overlapping random locations, and check for errors. */
 boolean gotError = FALSE;
 bits64 size = fileSize(localCopy);
 // First, read some bytes from udcFile udcf1.
 struct udcFile *udcf1 = udcFileOpen(url, udcDefaultDir());
 int blksRead1 = 0;
 bits64 offset1 = randomStartOffset(size);
 gotError |= readAndTestBlocks(udcf1, &offset1, 2, &blksRead1, localCopy, url);
 // While keeping udcf1 open, create udcf2 on the same URL, and read from a 
 // (probably) different location:
 struct udcFile *udcf2 = udcFileOpen(url, udcDefaultDir());
 int blksRead2 = 0;
 bits64 offset2 = randomStartOffset(size);
 gotError |= readAndTestBlocks(udcf2, &offset2, 2, &blksRead2, localCopy, url);
 // Interleave some successive-location reads:
 int i;
 for (i = 0;  i < 10;  i++)
     gotError |= readAndTestBlocks(udcf1, &offset1, 1, &blksRead1, localCopy, url);
     gotError |= readAndTestBlocks(udcf2, &offset2, 1, &blksRead2, localCopy, url);
 // Unevenly interleave reads starting from the same new random location:
 bits64 sameOffset = randomStartOffset(size);
 blksRead1 = 0;
 offset1 = sameOffset;
 blksRead2 = 0;
 offset2 = sameOffset;
 gotError |= readAndTestBlocks(udcf1, &offset1, 1, &blksRead1, localCopy, url);
 gotError |= readAndTestBlocks(udcf2, &offset2, 3, &blksRead2, localCopy, url);
 while (blksRead1 < MAX_BLOCKS || blksRead2 < MAX_BLOCKS)
     if (blksRead1 < MAX_BLOCKS)
 	int n = 1 + (int)(5 * myDrand());
 	n = min(MAX_BLOCKS - blksRead1, n);
 	gotError |= readAndTestBlocks(udcf1, &offset1, n, &blksRead1, localCopy, url);
     if (blksRead2 < MAX_BLOCKS)
 	int n = 1 + (int)(5 * myDrand());
 	n = min(MAX_BLOCKS - blksRead2, n);
 	gotError |= readAndTestBlocks(udcf2, &offset2, n, &blksRead2, localCopy, url);
 gotError |= checkCacheFiles(sameOffset, max(offset1, offset2), url, localCopy);
 return gotError;
 boolean testConcurrent(char *url, char *localCopy)
 /* Fork; then parent and child access the same locations (hopefully) concurrently. */
 boolean gotErrorParent = FALSE, gotErrorChild = FALSE;
 bits64 size = fileSize(localCopy);
 bits64 sameOffset = randomStartOffset(size);
 bits64 offsetParent = sameOffset, offsetChild = sameOffset;
 pid_t kidPid = fork();
 if (kidPid < 0)
     errnoAbort("testConcurrent: fork failed");
 else if (kidPid == 0)
     // child: access url and then exit, to pass control back to parent.
     struct udcFile *udcf = udcFileOpen(url, udcDefaultDir());
     int blksRead = 0;
     gotErrorChild = readAndTestBlocks(udcf, &offsetParent, MAX_BLOCKS, &blksRead, localCopy, url);
     // parent: access url, wait for child, do post-checking.
     struct udcFile *udcf = udcFileOpen(url, udcDefaultDir());
     int blksRead = 0;
     gotErrorParent = readAndTestBlocks(udcf, &offsetChild, MAX_BLOCKS, &blksRead, localCopy, url);
     // wait for child to finish:
     int childStatus;
     int retPid = waitpid(kidPid, &childStatus, 0);
     if (retPid < 0)
 	errnoAbort("testConcurrent: waitpid(%d) failed", kidPid);
     if (! WIFEXITED(childStatus))
 	warn("testConcurrent: child process did not exit() normally");
     if (WEXITSTATUS(childStatus))
 	warn("testConcurrent: child exit status = %d)", WEXITSTATUS(childStatus));
     if (gotErrorChild)
 	verbose(1, "Parent can see child got error.\n");
     gotErrorParent |= checkCacheFiles(sameOffset, max(offsetParent, offsetChild), url, localCopy);
     return (gotErrorParent || gotErrorChild);
 errAbort("testConcurrent: control should never reach this point.");
 return TRUE;
 int main(int argc, char *argv[])
 /* Set up test params and run tests. */
 boolean gotError = FALSE;
 optionInit(&argc, argv, options);
 size = optionExists("size");
 raBuf = optionExists("raBuf");
 doFork = optionExists("fork");
 protocol = optionVal("protocol", protocol);
 seed = optionInt("seed", seed);
 char *host = getenv("HOST");
 if (host == NULL || !startsWith("hgwdev", host))
     // So that we don't break "make test" on other machines, use stdout and exit 0:
     puts("Sorry, this must be run on hgwdev (with HOST=hgwdev)");
 char tmp[256];
 safef(tmp, sizeof tmp, "/data/tmp/%s/udcCache", getenv("USER"));
 if (seed == 0)
     long now = clock1();
     printf("Seeding random with unix time %ld\n", now);
     printf("Seeding random with option -seed=%d\n", seed);
 if (sameString(protocol, "http"))
     char *httpUrl = "http://hgwdev.cse.ucsc.edu/~angie/wgEncodeCshlRnaSeqAlignmentsK562ChromatinShort.bb";
     char *httpLocalCopy = "/gbdb/hg18/bbi/wgEncodeCshlRnaSeqAlignmentsK562ChromatinShort.bb";
     if (size)
 	gotError |= testSize(httpUrl, fileSize(httpLocalCopy));
     else if (raBuf)
 	gotError |= testReadAheadBuffer(httpUrl, httpLocalCopy);
     else if (doFork)
 	gotError |= testConcurrent(httpUrl, httpLocalCopy);
 	gotError |= testInterleaved(httpUrl, httpLocalCopy);
 else if (sameString(protocol, "ftp"))
     char *ftpUrl = THOUSAND_FTP CHR4_SLX_BAM;
     char *ftpLocalCopy = THOUSAND_HIVE CHR4_SLX_BAM;
     if (raBuf)
 	gotError |= testReadAheadBuffer(ftpUrl, ftpLocalCopy);
     else if (doFork)
 	gotError |= testConcurrent(ftpUrl, ftpLocalCopy);
 	gotError |= testInterleaved(ftpUrl, ftpLocalCopy);
     errAbort("Unrecognized protocol '%s'", protocol);
 return gotError;