src/lib/udc.c 1.31
1.31 2009/12/19 00:57:09 angie
Changed all FILE * vars and operations (fopen etc) to integer file descriptors and 'man 2' operations (open etc), for unbuffered I/O which should make it harder for the bitmap file to get out of sync with the sparseData file when there are multiple udcFile*'s accessing the same part of a URL and those files. Unfortunately this doesn't completely prevent udcTest failures (try seed=1261180550), just reduces frequency a bit. Also added a speed optimization: to avoid heavy reconnect penalty when we need to skip a few udc blocks because they have already been cached, just read and discard those blocks' data.
Index: src/lib/udc.c
===================================================================
RCS file: /projects/compbio/cvsroot/kent/src/lib/udc.c,v
retrieving revision 1.30
retrieving revision 1.31
diff -b -B -U 4 -r1.30 -r1.31
--- src/lib/udc.c 20 Nov 2009 17:45:32 -0000 1.30
+++ src/lib/udc.c 19 Dec 2009 00:57:09 -0000 1.31
@@ -82,9 +82,9 @@
bits64 offset; /* Current offset in file. */
char *cacheDir; /* Directory for cached file parts. */
char *bitmapFileName; /* Name of bitmap file. */
char *sparseFileName; /* Name of sparse data file. */
- FILE *fSparse; /* File handle for sparse data file. */
+ int fdSparse; /* File descriptor for sparse data file. */
bits64 startData; /* Start of area in file we know to have data. */
bits64 endData; /* End of area in file we know to have data. */
bits32 bitmapVersion; /* Version of associated bitmap we were opened with. */
struct connInfo connInfo; /* Connection info for open net connection. */
@@ -100,27 +100,59 @@
bits32 version; /* Version - increments each time cache is stale. */
bits64 localUpdate; /* Time we last fetched new data into cache. */
bits64 localAccess; /* Time we last accessed data. */
boolean isSwapped; /* If true need to swap all bytes on read. */
- FILE *f; /* File handle for file with current block. */
+ int fd; /* File descriptor for file with current block. */
};
static char *bitmapName = "bitmap";
static char *sparseDataName = "sparseData";
#define udcBitmapHeaderSize (64)
static int cacheTimeout = 0;
+#define MAX_SKIP_TO_SAVE_RECONNECT (udcMaxBytesPerRemoteFetch / 2)
+
+static void readAndIgnore(int sd, bits64 size)
+/* Read size bytes from sd and return. */
+{
+static char *buf = NULL;
+if (buf == NULL)
+ buf = needMem(udcBlockSize);
+bits64 remaining = size, total = 0;
+while (remaining > 0)
+ {
+ bits64 chunkSize = min(remaining, udcBlockSize);
+ bits64 rd = read(sd, buf, chunkSize);
+ if (rd < 0)
+ errnoAbort("readAndIgnore: error reading socket after %lld bytes", total);
+ remaining -= rd;
+ total += rd;
+ }
+if (total < size)
+ errAbort("readAndIgnore: got EOF at %lld bytes (wanted %lld)", total, size);
+}
+
static int connInfoGetSocket(struct connInfo *ci, char *url, bits64 offset, int size)
/* If ci has an open socket and the given offset matches ci's current offset,
* reuse ci->socket. Otherwise close the socket, open a new one, and update ci. */
{
if (ci != NULL && ci->socket != 0 && ci->offset != offset)
{
+ bits64 skipSize = (offset - ci->offset);
+ if (skipSize > 0 && skipSize <= MAX_SKIP_TO_SAVE_RECONNECT)
+ {
+ verbose(2, "!! skipping %lld bytes @%lld to avoid reconnect\n", skipSize, ci->offset);
+ readAndIgnore(ci->socket, skipSize);
+ ci->offset = offset;
+ }
+ else
+ {
verbose(2, "Offset mismatch (ci %lld != new %lld), reopening.\n", ci->offset, offset);
- close(ci->socket);
+ mustCloseFd(&(ci->socket));
if (ci->ctrlSocket != 0)
- close(ci->ctrlSocket);
+ mustCloseFd(&(ci->ctrlSocket));
ZeroVar(ci);
}
+ }
int sd;
if (ci == NULL || ci->socket == 0)
{
char rangeUrl[2048];
@@ -305,9 +337,9 @@
}
if (rd == -1)
errnoAbort("udcDataViaHttpOrFtp: error reading socket");
if (ci == NULL)
- close(sd);
+ mustCloseFd(&sd);
else
ci->offset += total;
return total;
}
@@ -406,61 +438,69 @@
static void udcNewCreateBitmapAndSparse(struct udcFile *file,
bits64 remoteUpdate, bits64 remoteSize, bits32 version)
/* Create a new bitmap file around the given remoteUpdate time. */
{
-FILE *f = mustOpen(file->bitmapFileName, "wb");
+int fd = mustOpenFd(file->bitmapFileName, O_WRONLY | O_CREAT | O_TRUNC);
bits32 sig = udcBitmapSig;
bits32 blockSize = udcBlockSize;
bits64 reserved64 = 0;
bits32 reserved32 = 0;
int blockCount = (remoteSize + udcBlockSize - 1)/udcBlockSize;
int bitmapSize = bitToByteSize(blockCount);
-int i;
/* Write out fixed part of header. */
-writeOne(f, sig);
-writeOne(f, blockSize);
-writeOne(f, remoteUpdate);
-writeOne(f, remoteSize);
-writeOne(f, version);
-writeOne(f, reserved32);
-writeOne(f, reserved64);
-writeOne(f, reserved64);
-writeOne(f, reserved64);
-writeOne(f, reserved64);
-if (ftell(f) != udcBitmapHeaderSize)
- errAbort("ftell(f=%s) is %lld, not expected udcBitmapHeaderSize %d",
- file->bitmapFileName, (long long)ftell(f), udcBitmapHeaderSize);
-
-/* Write out initial all-zero bitmap. */
-for (i=0; i<bitmapSize; ++i)
- putc(0, f);
+writeOneFd(fd, sig);
+writeOneFd(fd, blockSize);
+writeOneFd(fd, remoteUpdate);
+writeOneFd(fd, remoteSize);
+writeOneFd(fd, version);
+writeOneFd(fd, reserved32);
+writeOneFd(fd, reserved64);
+writeOneFd(fd, reserved64);
+writeOneFd(fd, reserved64);
+writeOneFd(fd, reserved64);
+long long offset = mustLseek(fd, 0, SEEK_CUR);
+if (offset != udcBitmapHeaderSize)
+ errAbort("offset in fd=%d, f=%s is %lld, not expected udcBitmapHeaderSize %d",
+ fd, file->bitmapFileName, offset, udcBitmapHeaderSize);
+
+/* Write out initial all-zero bitmap, using sparse-file method: write 0 to final address. */
+unsigned char zero = 0;
+mustLseek(fd, bitmapSize-1, SEEK_CUR);
+mustWriteFd(fd, &zero, 1);
/* Clean up bitmap file and name. */
-carefulClose(&f);
+mustCloseFd(&fd);
/* Create an empty data file. */
-f = mustOpen(file->sparseFileName, "wb");
-carefulClose(&f);
+fd = mustOpenFd(file->sparseFileName, O_WRONLY | O_CREAT | O_TRUNC);
+mustCloseFd(&fd);
}
static struct udcBitmap *udcBitmapOpen(char *fileName)
/* Open up a bitmap file and read and verify header. Return NULL if file doesn't
* exist, abort on error. */
{
/* Open file, returning NULL if can't. */
-FILE *f = fopen(fileName, "r+b");
-if (f == NULL)
+int fd = open(fileName, O_RDWR);
+if (fd < 0)
+ {
+ if (errno == ENOENT)
return NULL;
+ else
+ errnoAbort("Can't open(%s, O_RDWR)", fileName);
+ }
/* Get status info from file. */
struct stat status;
-fstat(fileno(f), &status);
+fstat(fd, &status);
/* Read signature and decide if byte-swapping is needed. */
+// TODO: maybe buffer the I/O for performance? Don't read past header -
+// fd offset needs to point to first data block when we return.
bits32 magic;
boolean isSwapped = FALSE;
-mustReadOne(f, magic);
+mustReadOneFd(fd, magic);
if (magic != udcBitmapSig)
{
magic = byteSwap32(magic);
isSwapped = TRUE;
@@ -472,21 +512,21 @@
bits32 reserved32;
bits64 reserved64;
struct udcBitmap *bits;
AllocVar(bits);
-bits->blockSize = readBits32(f, isSwapped);
-bits->remoteUpdate = readBits64(f, isSwapped);
-bits->fileSize = readBits64(f, isSwapped);
-bits->version = readBits32(f, isSwapped);
-reserved32 = readBits32(f, isSwapped);
-reserved64 = readBits64(f, isSwapped);
-reserved64 = readBits64(f, isSwapped);
-reserved64 = readBits64(f, isSwapped);
-reserved64 = readBits64(f, isSwapped);
+bits->blockSize = fdReadBits32(fd, isSwapped);
+bits->remoteUpdate = fdReadBits64(fd, isSwapped);
+bits->fileSize = fdReadBits64(fd, isSwapped);
+bits->version = fdReadBits32(fd, isSwapped);
+reserved32 = fdReadBits32(fd, isSwapped);
+reserved64 = fdReadBits64(fd, isSwapped);
+reserved64 = fdReadBits64(fd, isSwapped);
+reserved64 = fdReadBits64(fd, isSwapped);
+reserved64 = fdReadBits64(fd, isSwapped);
bits->localUpdate = status.st_mtime;
bits->localAccess = status.st_atime;
bits->isSwapped = isSwapped;
-bits->f = f;
+bits->fd = fd;
return bits;
}
@@ -495,9 +535,9 @@
{
struct udcBitmap *bits = *pBits;
if (bits != NULL)
{
- carefulClose(&bits->f);
+ mustCloseFd(&(bits->fd));
freez(pBits);
}
}
@@ -562,27 +602,32 @@
udcBitmapClose(&bits);
remove(file->bitmapFileName);
remove(file->sparseFileName);
++version;
- verbose(2, "removing stale version, new version %d\n", version);
+ verbose(2, "removing stale version (%lld! = %lld or %lld! = %lld), new version %d\n",
+ bits->remoteUpdate, (long long)file->updateTime, bits->fileSize, file->size,
+ version);
}
}
+else
+ verbose(2, "bitmap file %s does not already exist, creating.\n", file->bitmapFileName);
/* If no bitmap, then create one, and also an empty sparse data file. */
if (bits == NULL)
{
udcNewCreateBitmapAndSparse(file, file->updateTime, file->size, version);
bits = udcBitmapOpen(file->bitmapFileName);
if (bits == NULL)
- internalErr();
+ errAbort("Unable to open bitmap file %s", file->bitmapFileName);
}
file->bitmapVersion = bits->version;
/* Read in a little bit from bitmap while we have it open to see if we have anything cached. */
if (file->size > 0)
{
- Bits b = fgetc(bits->f);
+ Bits b;
+ mustReadOneFd(bits->fd, b);
int endBlock = (file->size + udcBlockSize - 1)/udcBlockSize;
if (endBlock > 8)
endBlock = 8;
int initialCachedBlocks = bitFindClear(&b, 0, endBlock);
@@ -757,11 +802,11 @@
if (isTransparent)
{
/* If transparent dummy up things so that the "sparse" file pointer is actually
* the file itself, which appears to be completely loaded in cache. */
- FILE *f = file->fSparse = mustOpen(url, "rb");
+ int fd = file->fdSparse = mustOpenFd(url, O_RDONLY);
struct stat status;
- fstat(fileno(f), &status);
+ fstat(fd, &status);
file->startData = 0;
file->endData = file->size = status.st_size;
}
else
@@ -783,9 +828,9 @@
makeDirsOnPath(file->cacheDir);
/* Figure out a little bit about the extent of the good cached data if any. */
setInitialCachedDataBounds(file);
- file->fSparse = mustOpen(file->sparseFileName, "rb+");
+ file->fdSparse = mustOpenFd(file->sparseFileName, O_RDWR);
}
freeMem(afterProtocol);
return file;
}
@@ -830,18 +875,18 @@
struct udcFile *file = *pFile;
if (file != NULL)
{
if (file->connInfo.socket != 0)
- close(file->connInfo.socket);
+ mustCloseFd(&(file->connInfo.socket));
if (file->connInfo.ctrlSocket != 0)
- close(file->connInfo.ctrlSocket);
+ mustCloseFd(&(file->connInfo.ctrlSocket));
freeMem(file->url);
freeMem(file->protocol);
udcProtocolFree(&file->prot);
freeMem(file->cacheDir);
freeMem(file->bitmapFileName);
freeMem(file->sparseFileName);
- carefulClose(&file->fSparse);
+ mustCloseFd(&(file->fdSparse));
}
freez(pFile);
}
@@ -933,41 +978,66 @@
return now;
return (now - oldestTime);
}
-static void readBitsIntoBuf(FILE *f, int headerSize, int bitStart, int bitEnd,
+static void readBitsIntoBuf(int fd, int headerSize, int bitStart, int bitEnd,
Bits **retBits, int *retPartOffset)
/* Do some bit-to-byte offset conversions and read in all the bytes that
* have information in the bits we're interested in. */
{
int byteStart = bitStart/8;
int byteEnd = bitToByteSize(bitEnd);
int byteSize = byteEnd - byteStart;
Bits *bits = needLargeMem(byteSize);
-fseek(f, headerSize + byteStart, SEEK_SET);
-mustRead(f, bits, byteSize);
+mustLseek(fd, headerSize + byteStart, SEEK_SET);
+mustReadFd(fd, bits, byteSize);
*retBits = bits;
*retPartOffset = byteStart*8;
}
-static boolean allBitsSetInFile(FILE *f, int headerSize, int bitStart, int bitEnd)
+static boolean allBitsSetInFile(int fd, int headerSize, int bitStart, int bitEnd)
/* Return TRUE if all bits in file between start and end are set. */
{
int partOffset;
Bits *bits;
-readBitsIntoBuf(f, headerSize, bitStart, bitEnd, &bits, &partOffset);
+readBitsIntoBuf(fd, headerSize, bitStart, bitEnd, &bits, &partOffset);
int partBitStart = bitStart - partOffset;
int partBitEnd = bitEnd - partOffset;
-int bitSize = bitEnd - bitStart;
-int nextClearBit = bitFindClear(bits, partBitStart, bitSize);
+int nextClearBit = bitFindClear(bits, partBitStart, partBitEnd);
boolean allSet = (nextClearBit >= partBitEnd);
freeMem(bits);
return allSet;
}
+// For tests/udcTest.c debugging: not declared in udc.h, but not static either:
+boolean udcCheckCacheBits(struct udcFile *file, int startBlock, int endBlock)
+/* Warn and return TRUE if any bit in (startBlock,endBlock] is not set. */
+{
+boolean gotUnset = FALSE;
+struct udcBitmap *bitmap = udcBitmapOpen(file->bitmapFileName);
+int partOffset;
+Bits *bits;
+readBitsIntoBuf(bitmap->fd, udcBitmapHeaderSize, startBlock, endBlock, &bits, &partOffset);
+
+int partBitStart = startBlock - partOffset;
+int partBitEnd = endBlock - partOffset;
+int nextClearBit = bitFindClear(bits, partBitStart, partBitEnd);
+while (nextClearBit < partBitEnd)
+ {
+ int clearBlock = nextClearBit + partOffset;
+ warn("... udcFile 0x%08llx: bit for block %d (%lld..%lld] is not set",
+ (bits64)file, clearBlock,
+ ((long long)clearBlock * udcBlockSize), (((long long)clearBlock+1) * udcBlockSize));
+ gotUnset = TRUE;
+ int nextSetBit = bitFindSet(bits, nextClearBit, partBitEnd);
+ nextClearBit = bitFindClear(bits, nextSetBit, partBitEnd);
+ }
+return gotUnset;
+}
+
static void fetchMissingBlocks(struct udcFile *file, struct udcBitmap *bits,
int startBlock, int blockCount, int blockSize)
/* Fetch missing blocks from remote and put them into file. errAbort if trouble. */
{
@@ -982,10 +1052,10 @@
int actualSize = file->prot->fetchData(file->url, startPos, readSize, buf, &(file->connInfo));
if (actualSize != readSize)
errAbort("unable to fetch %lld bytes from %s @%lld (got %d bytes)",
readSize, file->url, startPos, actualSize);
- fseek(file->fSparse, startPos, SEEK_SET);
- mustWrite(file->fSparse, buf, readSize);
+ mustLseek(file->fdSparse, startPos, SEEK_SET);
+ mustWriteFd(file->fdSparse, buf, readSize);
freez(&buf);
}
}
@@ -997,9 +1067,9 @@
int partOffset;
Bits *b;
int startBlock = start / bits->blockSize;
int endBlock = (end + bits->blockSize - 1) / bits->blockSize;
-readBitsIntoBuf(bits->f, udcBitmapHeaderSize, startBlock, endBlock, &b, &partOffset);
+readBitsIntoBuf(bits->fd, udcBitmapHeaderSize, startBlock, endBlock, &b, &partOffset);
/* Loop around first skipping set bits, then fetching clear bits. */
boolean dirty = FALSE;
int s = startBlock - partOffset;
@@ -1026,25 +1096,24 @@
/* Update bitmap on disk.... */
int byteStart = startBlock/8;
int byteEnd = bitToByteSize(endBlock);
int byteSize = byteEnd - byteStart;
- fseek(bits->f, byteStart + udcBitmapHeaderSize, SEEK_SET);
- mustWrite(bits->f, b, byteSize);
+ mustLseek(bits->fd, byteStart + udcBitmapHeaderSize, SEEK_SET);
+ mustWriteFd(bits->fd, b, byteSize);
}
freeMem(b);
*retFetchedStart = startBlock * bits->blockSize;
*retFetchedEnd = endBlock * bits->blockSize;
}
-static boolean udcCacheContains(struct udcFile *file, struct udcBitmap *bits,
- bits64 offset, int size)
+static boolean udcCacheContains(struct udcBitmap *bits, bits64 offset, int size)
/* Return TRUE if cache already contains region. */
{
bits64 endOffset = offset + size;
int startBlock = offset / bits->blockSize;
int endBlock = (endOffset + bits->blockSize - 1) / bits->blockSize;
-return allBitsSetInFile(bits->f, udcBitmapHeaderSize, startBlock, endBlock);
+return allBitsSetInFile(bits->fd, udcBitmapHeaderSize, startBlock, endBlock);
}
static void udcFetchMissing(struct udcFile *file, struct udcBitmap *bits, bits64 start, bits64 end)
@@ -1084,9 +1153,9 @@
struct udcBitmap *bits = udcBitmapOpen(file->bitmapFileName);
if (bits->version == file->bitmapVersion)
{
- if (!udcCacheContains(file, bits, s, e-s))
+ if (!udcCacheContains(bits, s, e-s))
udcFetchMissing(file, bits, s, e);
}
else
{
@@ -1121,14 +1190,17 @@
{
verbose(2, "udcCachePreload failed");
return 0;
}
+ // Even with this, the changes we wrote to fdSparse may not be visible when we read!
+ mustCloseFd(&(file->fdSparse));
+ file->fdSparse = mustOpenFd(file->sparseFileName, O_RDWR);
/* Currently only need fseek here. Would be safer, but possibly
* slower to move fseek so it is always executed in front of read, in
* case other code is moving around file pointer. */
- fseek(file->fSparse, start, SEEK_SET);
+ mustLseek(file->fdSparse, start, SEEK_SET);
}
-mustRead(file->fSparse, buf, size);
+mustReadFd(file->fdSparse, buf, size);
file->offset += size;
return size;
}
@@ -1231,9 +1303,9 @@
void udcSeek(struct udcFile *file, bits64 offset)
/* Seek to a particular position in file. */
{
file->offset = offset;
-fseek(file->fSparse, offset, SEEK_SET);
+mustLseek(file->fdSparse, offset, SEEK_SET);
}
bits64 udcTell(struct udcFile *file)
/* Return current file position. */