5a4c9d70e2416f53241781d17549445b2bc80d42
galt
  Sat Aug 6 00:09:25 2011 -0700
adding 4k readAhead buffer for sparseData reduces thousands of reads down to a dozen
diff --git src/lib/udc.c src/lib/udc.c
index 15420fe..e94877e 100644
--- src/lib/udc.c
+++ src/lib/udc.c
@@ -73,30 +73,32 @@
 
 struct udcFile
 /* A file handle for our caching system. */
     {
     struct udcFile *next;	/* Next in list. */
     char *url;			/* Name of file - includes protocol */
     char *protocol;		/* The URL up to the first colon.  http: etc. */
     struct udcProtocol *prot;	/* Protocol specific data and methods. */
     time_t updateTime;		/* Last modified timestamp. */
     bits64 size;		/* Size of file. */
     bits64 offset;		/* Current offset in file. */
     char *cacheDir;		/* Directory for cached file parts. */
     char *bitmapFileName;	/* Name of bitmap file. */
     char *sparseFileName;	/* Name of sparse data file. */
     int fdSparse;		/* File descriptor for sparse data file. */
+    char *sparseReadAhead;      /* Read-ahead buffer, if any */
+    bits64 sparseRAOffset;      /* Read-ahead buffer offset */
     struct udcBitmap *bits;     /* udcBitMap */
     bits64 startData;		/* Start of area in file we know to have data. */
     bits64 endData;		/* End of area in file we know to have data. */
     bits32 bitmapVersion;	/* Version of associated bitmap we were opened with. */
     struct connInfo connInfo;   /* Connection info for open net connection. */
     };
 
 struct udcBitmap
 /* The control structure including the bitmap of blocks that are cached. */
     {
     struct udcBitmap *next;	/* Next in list. */
     bits32 blockSize;		/* Number of bytes per block of file. */
     bits64 remoteUpdate;	/* Remote last update time. */
     bits64 fileSize;		/* File size */
     bits32 version;		/* Version - increments each time cache is stale. */
@@ -910,30 +912,31 @@
 /* Close down cached file. */
 {
 struct udcFile *file = *pFile;
 if (file != NULL)
     {
     if (file->connInfo.socket != 0)
 	mustCloseFd(&(file->connInfo.socket));
     if (file->connInfo.ctrlSocket != 0)
 	mustCloseFd(&(file->connInfo.ctrlSocket));
     freeMem(file->url);
     freeMem(file->protocol);
     udcProtocolFree(&file->prot);
     freeMem(file->cacheDir);
     freeMem(file->bitmapFileName);
     freeMem(file->sparseFileName);
+    freeMem(file->sparseReadAhead);
     mustCloseFd(&(file->fdSparse));
     udcBitmapClose(&file->bits);
     }
 freez(pFile);
 }
 
 static void qDecode(const char *input, char *buf, size_t size)
 /* Reverse the qEncode performed on afterProcotol above into buf or abort. */
 {
 safecpy(buf, size, input);
 char c, *r = buf, *w = buf;
 while ((c = *r++) != '\0')
     {
     if (c == 'Q')
 	{
@@ -1197,64 +1200,126 @@
 	{
         udcFetchMissing(file, bits, s, e);
 	}
     else
 	{
 	ok = FALSE;
 	verbose(2, "udcCachePreload version check failed %d vs %d", 
 		bits->version, file->bitmapVersion);
 	}
     if (!ok)
         break;
     }
 return ok;
 }
 
+#define READAHEADBUFSIZE 4096
 int udcRead(struct udcFile *file, void *buf, int size)
 /* Read a block from file.  Return amount actually read. */
 {
+
 /* Figure out region of file we're going to read, and clip it against file size. */
 bits64 start = file->offset;
 if (start > file->size)
     return 0;
 bits64 end = start + size;
 if (end > file->size)
     end = file->size;
 size = end - start;
 
+/* use read-ahead buffer if present */
+int bytesRead = 0;
+
+bits64 raStart;
+bits64 raEnd;
+while(TRUE)
+    {
+    raStart = file->sparseRAOffset;
+    raEnd = raStart+READAHEADBUFSIZE;
+    if (file->sparseReadAhead)
+	{
+	if (start >= raStart && start < raEnd)
+	    {
+	    // copy bytes out of rabuf
+	    int endInBuf = min(raEnd, end);
+	    int sizeInBuf = endInBuf - start;
+	    memcpy(buf, file->sparseReadAhead + (start-raStart), sizeInBuf);
+	    buf += sizeInBuf;
+	    bytesRead += sizeInBuf;
+	    start = raEnd; 
+	    size -= sizeInBuf;
+	    file->offset += sizeInBuf;
+	    mustLseek(file->fdSparse, start, SEEK_SET);
+	    if (size == 0)
+		break;
+	    }
+	else
+	    {
+	    freez(&file->sparseReadAhead);
+	    }
+	}
+
+    int saveEnd = end;
+    if (size < READAHEADBUFSIZE)
+	{
+	file->sparseReadAhead = needMem(READAHEADBUFSIZE);
+	file->sparseRAOffset = start;
+	size = READAHEADBUFSIZE;
+	end = start + size;
+	if (end > file->size)
+	    {
+	    end = file->size;
+	    size = end - start;
+	    }
+	}
+
 
 /* If we're outside of the window of file we already know is good, then have to
  * consult cache on disk, and maybe even fetch data remotely! */
 if (start < file->startData || end > file->endData)
     {
 
     if (!udcCachePreload(file, start, size))
 	{
 	verbose(2, "udcCachePreload failed");
-	return 0;
+	    bytesRead = 0;
+	    break;
 	}
 
     /* Currently only need fseek here.  Would be safer, but possibly
      * slower to move fseek so it is always executed in front of read, in
      * case other code is moving around file pointer. */
 
     mustLseek(file->fdSparse, start, SEEK_SET);
     }
 
+    if (file->sparseReadAhead)
+	{
+	mustReadFd(file->fdSparse, file->sparseReadAhead, size);
+	end = saveEnd;
+	size = end - start;
+	}
+    else
+	{
 mustReadFd(file->fdSparse, buf, size);
 file->offset += size;
-return size;
+	bytesRead += size;
+	break;
+	}
+    }
+
+return bytesRead;
 }
 
 void udcMustRead(struct udcFile *file, void *buf, int size)
 /* Read a block from file.  Abort if any problem, including EOF before size is read. */
 {
 int sizeRead = udcRead(file, buf, size);
 if (sizeRead < size)
     errAbort("udc couldn't read %d bytes from %s, did read %d", size, file->url, sizeRead);
 }
 
 int udcGetChar(struct udcFile *file)
 /* Get next character from file or die trying. */
 {
 UBYTE b;
 udcMustRead(file, &b, 1);