0df94f90604087383b6ee79051e4f2bd48c8fc71
galt
  Fri Jul 22 15:31:07 2011 -0700
speeding up udc by not re-opening bitmap and sparseData files unnecessarilly; also combined udcCacheContains with fetchMissing so that the bitmap would not be getting read twice upon cahe-misses; tested with udcTest both interleave and fork.
diff --git src/lib/udc.c src/lib/udc.c
index 0e7b23e..15420fe 100644
--- src/lib/udc.c
+++ src/lib/udc.c
@@ -73,30 +73,31 @@
 
 struct udcFile
 /* A file handle for our caching system. */
     {
     struct udcFile *next;	/* Next in list. */
     char *url;			/* Name of file - includes protocol */
     char *protocol;		/* The URL up to the first colon.  http: etc. */
     struct udcProtocol *prot;	/* Protocol specific data and methods. */
     time_t updateTime;		/* Last modified timestamp. */
     bits64 size;		/* Size of file. */
     bits64 offset;		/* Current offset in file. */
     char *cacheDir;		/* Directory for cached file parts. */
     char *bitmapFileName;	/* Name of bitmap file. */
     char *sparseFileName;	/* Name of sparse data file. */
     int fdSparse;		/* File descriptor for sparse data file. */
+    struct udcBitmap *bits;     /* udcBitMap */
     bits64 startData;		/* Start of area in file we know to have data. */
     bits64 endData;		/* End of area in file we know to have data. */
     bits32 bitmapVersion;	/* Version of associated bitmap we were opened with. */
     struct connInfo connInfo;   /* Connection info for open net connection. */
     };
 
 struct udcBitmap
 /* The control structure including the bitmap of blocks that are cached. */
     {
     struct udcBitmap *next;	/* Next in list. */
     bits32 blockSize;		/* Number of bytes per block of file. */
     bits64 remoteUpdate;	/* Remote last update time. */
     bits64 fileSize;		/* File size */
     bits32 version;		/* Version - increments each time cache is stale. */
     bits64 localUpdate;		/* Time we last fetched new data into cache. */
@@ -589,42 +590,47 @@
     prot->fetchInfo = udcInfoViaTransparent;
     }
 else
     {
     errAbort("Unrecognized protocol %s in udcProtNew", upToColon);
     }
 return prot;
 }
 
 static void udcProtocolFree(struct udcProtocol **pProt)
 /* Free up protocol resources. */
 {
 freez(pProt);
 }
 
-static void setInitialCachedDataBounds(struct udcFile *file)
+static void setInitialCachedDataBounds(struct udcFile *file, boolean useCacheInfo)
 /* Open up bitmap file and read a little bit of it to see if cache is stale,
  * and if not to see if the initial part is cached.  Sets the data members
  * startData, and endData.  If the case is stale it makes fresh empty
  * cacheDir/sparseData and cacheDir/bitmap files. */
 {
 bits32 version = 0;
 
 /* Get existing bitmap, and if it's stale clean up. */
 struct udcBitmap *bits = udcBitmapOpen(file->bitmapFileName);
 if (bits != NULL)
     {
+    if (useCacheInfo)
+	{
+	file->size = bits->fileSize;
+	file->updateTime = bits->remoteUpdate;
+	}
     version = bits->version;
     if (bits->remoteUpdate != file->updateTime || bits->fileSize != file->size ||
 	!fileExists(file->sparseFileName))
 	{
 	verbose(2, "removing stale version (%lld! = %lld or %lld! = %lld or %s doesn't exist), "
 		"new version %d\n",
 		bits->remoteUpdate, (long long)file->updateTime, bits->fileSize, file->size,
 		file->sparseFileName, version);
         udcBitmapClose(&bits);
 	remove(file->bitmapFileName);
 	remove(file->sparseFileName);
 	++version;
 	}
     }
 else
@@ -633,39 +639,42 @@
 /* If no bitmap, then create one, and also an empty sparse data file. */
 if (bits == NULL)
     {
     udcNewCreateBitmapAndSparse(file, file->updateTime, file->size, version);
     bits = udcBitmapOpen(file->bitmapFileName);
     if (bits == NULL)
         errAbort("Unable to open bitmap file %s", file->bitmapFileName);
     }
 
 file->bitmapVersion = bits->version;
 
 /* Read in a little bit from bitmap while we have it open to see if we have anything cached. */
 if (file->size > 0)
     {
     Bits b;
+    off_t wasAt = lseek(bits->fd, 0, SEEK_CUR);
     mustReadOneFd(bits->fd, b);
     int endBlock = (file->size + udcBlockSize - 1)/udcBlockSize;
     if (endBlock > 8)
         endBlock = 8;
     int initialCachedBlocks = bitFindClear(&b, 0, endBlock);
     file->endData = initialCachedBlocks * udcBlockSize;
+    mustLseek(bits->fd, wasAt, SEEK_SET);
     }
 
-udcBitmapClose(&bits);
+file->bits = bits;
+
 }
 
 static boolean qEscaped(char c)
 /* Returns TRUE if character needs to be escaped in q-encoding. */
 {
 if (isalnum(c))
     return c == 'Q';
 else
     return c != '_' && c != '-' && c != '/' && c != '.';
 }
 
 static char *qEncode(char *input)
 /* Do a simple encoding to convert input string into "normal" characters.
  * Abnormal letters, and '!' get converted into Q followed by two hexadecimal digits. */
 {
@@ -827,51 +836,49 @@
 file->protocol = protocol;
 file->prot = prot;
 if (isTransparent)
     {
     /* If transparent dummy up things so that the "sparse" file pointer is actually
      * the file itself, which appears to be completely loaded in cache. */
     int fd = file->fdSparse = mustOpenFd(url, O_RDONLY);
     struct stat status;
     fstat(fd, &status);
     file->startData = 0;
     file->endData = file->size = status.st_size;
     }
 else
     {
     udcPathAndFileNames(file, cacheDir, protocol, afterProtocol);
-    if (useCacheInfo)
-	{
-	file->size = udcSizeAndModTimeFromBitmap(file->bitmapFileName, &(file->updateTime));
-	}
-    else
+    if (!useCacheInfo)
 	{
 	file->updateTime = info.updateTime;
 	file->size = info.size;
 	memcpy(&(file->connInfo), &(info.ci), sizeof(struct connInfo));
 	// update cache file mod times, so if we're caching we won't do this again
 	// until the timeout has expired again:
 	if (udcCacheTimeout() > 0 && fileExists(file->bitmapFileName))
 	    (void)maybeTouchFile(file->bitmapFileName);
 	}
 
     /* Make directory. */
     makeDirsOnPath(file->cacheDir);
 
-    /* Figure out a little bit about the extent of the good cached data if any. */
-    setInitialCachedDataBounds(file);
+    /* Figure out a little bit about the extent of the good cached data if any. Open bits bitmap. */
+    setInitialCachedDataBounds(file, useCacheInfo);
+
     file->fdSparse = mustOpenFd(file->sparseFileName, O_RDWR);
+
     }
 freeMem(afterProtocol);
 return file;
 }
 
 struct udcFile *udcFileOpen(char *url, char *cacheDir)
 /* Open up a cached file.  cacheDir may be null in which case udcDefaultDir() will be
  * used.  Abort if if file doesn't exist. */
 {
 struct udcFile *udcFile = udcFileMayOpen(url, cacheDir);
 if (udcFile == NULL)
     errAbort("Couldn't open %s", url);
 return udcFile;
 }
 
@@ -904,30 +911,31 @@
 {
 struct udcFile *file = *pFile;
 if (file != NULL)
     {
     if (file->connInfo.socket != 0)
 	mustCloseFd(&(file->connInfo.socket));
     if (file->connInfo.ctrlSocket != 0)
 	mustCloseFd(&(file->connInfo.ctrlSocket));
     freeMem(file->url);
     freeMem(file->protocol);
     udcProtocolFree(&file->prot);
     freeMem(file->cacheDir);
     freeMem(file->bitmapFileName);
     freeMem(file->sparseFileName);
     mustCloseFd(&(file->fdSparse));
+    udcBitmapClose(&file->bits);
     }
 freez(pFile);
 }
 
 static void qDecode(const char *input, char *buf, size_t size)
 /* Reverse the qEncode performed on afterProcotol above into buf or abort. */
 {
 safecpy(buf, size, input);
 char c, *r = buf, *w = buf;
 while ((c = *r++) != '\0')
     {
     if (c == 'Q')
 	{
 	int q;
 	if (sscanf(r, "%02X", &q))
@@ -1015,44 +1023,37 @@
 static void readBitsIntoBuf(int fd, int headerSize, int bitStart, int bitEnd,
 	Bits **retBits, int *retPartOffset)
 /* Do some bit-to-byte offset conversions and read in all the bytes that
  * have information in the bits we're interested in. */
 {
 int byteStart = bitStart/8;
 int byteEnd = bitToByteSize(bitEnd);
 int byteSize = byteEnd - byteStart;
 Bits *bits = needLargeMem(byteSize);
 mustLseek(fd, headerSize + byteStart, SEEK_SET);
 mustReadFd(fd, bits, byteSize);
 *retBits = bits;
 *retPartOffset = byteStart*8;
 }
 
-static boolean allBitsSetInFile(int fd, int headerSize, int bitStart, int bitEnd)
+static boolean allBitsSetInFile(int bitStart, int bitEnd, int partOffset, Bits *bits)
 /* Return TRUE if all bits in file between start and end are set. */
 {
-int partOffset;
-Bits *bits;
-
-readBitsIntoBuf(fd, headerSize, bitStart, bitEnd, &bits, &partOffset);
-
 int partBitStart = bitStart - partOffset;
 int partBitEnd = bitEnd - partOffset;
 int nextClearBit = bitFindClear(bits, partBitStart, partBitEnd);
 boolean allSet = (nextClearBit >= partBitEnd);
-
-freeMem(bits);
 return allSet;
 }
 
 // For tests/udcTest.c debugging: not declared in udc.h, but not static either:
 boolean udcCheckCacheBits(struct udcFile *file, int startBlock, int endBlock)
 /* Warn and return TRUE if any bit in (startBlock,endBlock] is not set. */
 {
 boolean gotUnset = FALSE;
 struct udcBitmap *bitmap = udcBitmapOpen(file->bitmapFileName);
 int partOffset;
 Bits *bits;
 readBitsIntoBuf(bitmap->fd, udcBitmapHeaderSize, startBlock, endBlock, &bits, &partOffset);
 
 int partBitStart = startBlock - partOffset;
 int partBitEnd = endBlock - partOffset;
@@ -1070,50 +1071,56 @@
 return gotUnset;
 }
 
 static void fetchMissingBlocks(struct udcFile *file, struct udcBitmap *bits, 
 	int startBlock, int blockCount, int blockSize)
 /* Fetch missing blocks from remote and put them into file.  errAbort if trouble. */
 {
 bits64 startPos = (bits64)startBlock * blockSize;
 bits64 endPos = startPos + (bits64)blockCount * blockSize;
 if (endPos > file->size)
     endPos = file->size;
 if (endPos > startPos)
     {
     bits64 readSize = endPos - startPos;
     void *buf = needLargeMem(readSize);
+    
     int actualSize = file->prot->fetchData(file->url, startPos, readSize, buf, &(file->connInfo));
     if (actualSize != readSize)
 	errAbort("unable to fetch %lld bytes from %s @%lld (got %d bytes)",
 		 readSize, file->url, startPos, actualSize);
     mustLseek(file->fdSparse, startPos, SEEK_SET);
     mustWriteFd(file->fdSparse, buf, readSize);
     freez(&buf);
     }
 }
 
-static void fetchMissingBits(struct udcFile *file, struct udcBitmap *bits,
+static boolean fetchMissingBits(struct udcFile *file, struct udcBitmap *bits,
 	bits64 start, bits64 end, bits64 *retFetchedStart, bits64 *retFetchedEnd)
 /* Scan through relevant parts of bitmap, fetching blocks we don't already have. */
 {
 /* Fetch relevant part of bitmap into memory */
 int partOffset;
 Bits *b;
 int startBlock = start / bits->blockSize;
 int endBlock = (end + bits->blockSize - 1) / bits->blockSize;
 readBitsIntoBuf(bits->fd, udcBitmapHeaderSize, startBlock, endBlock, &b, &partOffset);
+if (allBitsSetInFile(startBlock, endBlock, partOffset, b))
+    {  // it is already in the cache
+    freeMem(b);
+    return TRUE;
+    }
 
 /* Loop around first skipping set bits, then fetching clear bits. */
 boolean dirty = FALSE;
 int s = startBlock - partOffset;
 int e = endBlock - partOffset;
 for (;;)
     {
     int nextClearBit = bitFindClear(b, s, e);
     if (nextClearBit >= e)
         break;
     int nextSetBit = bitFindSet(b, nextClearBit, e);
     int clearSize =  nextSetBit - nextClearBit;
 
     fetchMissingBlocks(file, bits, nextClearBit + partOffset, clearSize, bits->blockSize);
     bitSetRange(b, nextClearBit, clearSize);
@@ -1125,125 +1132,126 @@
     }
 
 if (dirty)
     {
     /* Update bitmap on disk.... */
     int byteStart = startBlock/8;
     int byteEnd = bitToByteSize(endBlock);
     int byteSize = byteEnd - byteStart;
     mustLseek(bits->fd, byteStart + udcBitmapHeaderSize, SEEK_SET);
     mustWriteFd(bits->fd, b, byteSize);
     }
 
 freeMem(b);
 *retFetchedStart = startBlock * bits->blockSize;
 *retFetchedEnd = endBlock * bits->blockSize;
+return FALSE;
 }
 
-static boolean udcCacheContains(struct udcBitmap *bits, bits64 offset, int size)
-/* Return TRUE if cache already contains region. */
-{
-bits64 endOffset = offset + size;
-int startBlock = offset / bits->blockSize;
-int endBlock = (endOffset + bits->blockSize - 1) / bits->blockSize;
-return allBitsSetInFile(bits->fd, udcBitmapHeaderSize, startBlock, endBlock);
+static boolean rangeIntersectOrTouch64(bits64 start1, bits64 end1, bits64 start2, bits64 end2)
+/* Return true if two 64-bit ranges intersect or touch. */
+{  // cannot use the version of this function that is in common.c since it only handles integers.
+bits64 s = max(start1,start2);
+bits64 e = min(end1,end2);
+return e >= s;
 }
 
 
 static void udcFetchMissing(struct udcFile *file, struct udcBitmap *bits, bits64 start, bits64 end)
 /* Fetch missing pieces of data from file */
 {
 /* Call lower level routine fetch remote data that is not already here. */
 bits64 fetchedStart, fetchedEnd;
-fetchMissingBits(file, bits, start, end, &fetchedStart, &fetchedEnd);
+if (fetchMissingBits(file, bits, start, end, &fetchedStart, &fetchedEnd))
+    return;
 
 /* Update file startData/endData members to include new data (and old as well if
  * the new data overlaps the old). */
-if (rangeIntersection(file->startData, file->endData, fetchedStart, fetchedEnd) >= 0)
+if (rangeIntersectOrTouch64(file->startData, file->endData, fetchedStart, fetchedEnd))
     {
     if (fetchedStart > file->startData)
         fetchedStart = file->startData;
     if (fetchedEnd < file->endData)
         fetchedEnd = file->endData;
     }
 file->startData = fetchedStart;
 file->endData = fetchedEnd;
 }
 
 static boolean udcCachePreload(struct udcFile *file, bits64 offset, int size)
 /* Make sure that given data is in cache - fetching it remotely if need be. 
  * Return TRUE on success. */
 {
 boolean ok = TRUE;
 /* We'll break this operation into blocks of a reasonable size to allow
  * other processes to get cache access, since we have to lock the cache files. */
 bits64 s,e, endPos=offset+size;
 for (s = offset; s < endPos; s = e)
     {
     /* Figure out bounds of this section. */
     e = s + udcMaxBytesPerRemoteFetch;
     if (e > endPos)
 	e = endPos;
 
-    struct udcBitmap *bits = udcBitmapOpen(file->bitmapFileName);
+    struct udcBitmap *bits = file->bits;
     if (bits->version == file->bitmapVersion)
 	{
-	if (!udcCacheContains(bits, s, e-s))
 	    udcFetchMissing(file, bits, s, e);
 	}
     else
 	{
 	ok = FALSE;
 	verbose(2, "udcCachePreload version check failed %d vs %d", 
 		bits->version, file->bitmapVersion);
 	}
-    udcBitmapClose(&bits);
     if (!ok)
         break;
     }
 return ok;
 }
 
 int udcRead(struct udcFile *file, void *buf, int size)
 /* Read a block from file.  Return amount actually read. */
 {
 /* Figure out region of file we're going to read, and clip it against file size. */
 bits64 start = file->offset;
 if (start > file->size)
     return 0;
 bits64 end = start + size;
 if (end > file->size)
     end = file->size;
 size = end - start;
 
+
 /* If we're outside of the window of file we already know is good, then have to
  * consult cache on disk, and maybe even fetch data remotely! */
 if (start < file->startData || end > file->endData)
     {
+
     if (!udcCachePreload(file, start, size))
 	{
 	verbose(2, "udcCachePreload failed");
 	return 0;
 	}
-    // Even with this, the changes we wrote to fdSparse may not be visible when we read!
-    mustCloseFd(&(file->fdSparse));
-    file->fdSparse = mustOpenFd(file->sparseFileName, O_RDWR);
+
     /* Currently only need fseek here.  Would be safer, but possibly
      * slower to move fseek so it is always executed in front of read, in
      * case other code is moving around file pointer. */
+
     mustLseek(file->fdSparse, start, SEEK_SET);
     }
+
 mustReadFd(file->fdSparse, buf, size);
 file->offset += size;
 return size;
 }
 
 void udcMustRead(struct udcFile *file, void *buf, int size)
 /* Read a block from file.  Abort if any problem, including EOF before size is read. */
 {
 int sizeRead = udcRead(file, buf, size);
 if (sizeRead < size)
     errAbort("udc couldn't read %d bytes from %s, did read %d", size, file->url, sizeRead);
 }
 
 int udcGetChar(struct udcFile *file)
 /* Get next character from file or die trying. */