6260a465e070e4370ff5fc1b361de2088274b743 galt Sat Sep 12 02:29:08 2015 -0700 Initial checkin of changes for cached udc redirect support. diff --git src/lib/udc.c src/lib/udc.c index 2ab135c..74b9d6e 100644 --- src/lib/udc.c +++ src/lib/udc.c @@ -39,30 +39,31 @@ #include <openssl/sha.h> #define udcBlockSize (8*1024) /* All fetch requests are rounded up to block size. */ #define udcMaxBytesPerRemoteFetch (udcBlockSize * 32) /* Very large remote reads are broken down into chunks this size. */ struct connInfo /* Socket descriptor and associated info, for keeping net connections open. */ { int socket; /* Socket descriptor for data connection (or 0). */ bits64 offset; /* Current file offset of socket. */ int ctrlSocket; /* (FTP only) Control socket descriptor or 0. */ + char *redirUrl; /* (HTTP(S) only) use redirected url */ }; typedef int (*UdcDataCallback)(char *url, bits64 offset, int size, void *buffer, struct connInfo *ci); /* Type for callback function that fetches file data. */ struct udcRemoteFileInfo /* Information about a remote file. */ { bits64 updateTime; /* Last update in seconds since 1970 */ bits64 size; /* Remote file size */ struct connInfo ci; /* Connection info for open net connection */ }; typedef boolean (*UdcInfoCallback)(char *url, struct udcRemoteFileInfo *retInfo); @@ -77,56 +78,58 @@ }; struct udcFile /* A file handle for our caching system. */ { struct udcFile *next; /* Next in list. */ char *url; /* Name of file - includes protocol */ char *protocol; /* The URL up to the first colon. http: etc. */ struct udcProtocol *prot; /* Protocol specific data and methods. */ time_t updateTime; /* Last modified timestamp. */ bits64 size; /* Size of file. */ bits64 offset; /* Current offset in file. */ char *cacheDir; /* Directory for cached file parts. */ char *bitmapFileName; /* Name of bitmap file. */ char *sparseFileName; /* Name of sparse data file. */ + char *redirFileName; /* Name of redir file. */ int fdSparse; /* File descriptor for sparse data file. */ boolean sparseReadAhead; /* Read-ahead has something in the buffer */ char *sparseReadAheadBuf; /* Read-ahead buffer, if any */ bits64 sparseRAOffset; /* Read-ahead buffer offset */ struct udcBitmap *bits; /* udcBitMap */ bits64 startData; /* Start of area in file we know to have data. */ bits64 endData; /* End of area in file we know to have data. */ bits32 bitmapVersion; /* Version of associated bitmap we were opened with. */ struct connInfo connInfo; /* Connection info for open net connection. */ }; struct udcBitmap /* The control structure including the bitmap of blocks that are cached. */ { struct udcBitmap *next; /* Next in list. */ bits32 blockSize; /* Number of bytes per block of file. */ bits64 remoteUpdate; /* Remote last update time. */ bits64 fileSize; /* File size */ bits32 version; /* Version - increments each time cache is stale. */ bits64 localUpdate; /* Time we last fetched new data into cache. */ bits64 localAccess; /* Time we last accessed data. */ boolean isSwapped; /* If true need to swap all bytes on read. */ int fd; /* File descriptor for file with current block. */ }; static char *bitmapName = "bitmap"; static char *sparseDataName = "sparseData"; +static char *redirName = "redir"; #define udcBitmapHeaderSize (64) static int cacheTimeout = 0; #define MAX_SKIP_TO_SAVE_RECONNECT (udcMaxBytesPerRemoteFetch / 2) static void readAndIgnore(int sd, bits64 size) /* Read size bytes from sd and return. */ { static char *buf = NULL; if (buf == NULL) buf = needMem(udcBlockSize); bits64 remaining = size, total = 0; while (remaining > 0) { bits64 chunkSize = min(remaining, udcBlockSize); @@ -154,30 +157,32 @@ readAndIgnore(ci->socket, skipSize); ci->offset = offset; } else { verbose(4, "Offset mismatch (ci %lld != new %lld), reopening.\n", ci->offset, offset); mustCloseFd(&(ci->socket)); if (ci->ctrlSocket > 0) mustCloseFd(&(ci->ctrlSocket)); ZeroVar(ci); } } int sd; if (ci == NULL || ci->socket <= 0) { + if (ci->redirUrl) + url = ci->redirUrl; char rangeUrl[2048]; if (ci == NULL) { safef(rangeUrl, sizeof(rangeUrl), "%s;byterange=%lld-%lld", url, offset, (offset + size - 1)); sd = netUrlOpen(rangeUrl); } else { safef(rangeUrl, sizeof(rangeUrl), "%s;byterange=%lld-", url, offset); sd = ci->socket = netUrlOpenSockets(rangeUrl, &(ci->ctrlSocket)); ci->offset = offset; } if (sd < 0) return -1; @@ -378,32 +383,52 @@ errnoAbort("udcDataViaHttpOrFtp: error reading socket"); if (ci == NULL) mustCloseFd(&sd); else ci->offset += total; return total; } boolean udcInfoViaHttp(char *url, struct udcRemoteFileInfo *retInfo) /* Gets size and last modified time of URL * and returns status of HEAD GET. */ { verbose(4, "checking http remote info on %s\n", url); struct hash *hash = newHash(0); int status = netUrlHead(url, hash); -if (status != 200) // && status != 302 && status != 301) +if (status != 200 && status != 301 && status != 302) return FALSE; +if (status == 301 && status == 302) + { + int sd = netUrlOpen(url); + if (sd < 0) + return FALSE; + int newSd = 0; + char *newUrl = NULL; + if (!netSkipHttpHeaderLinesHandlingRedirect(sd, url, &newSd, &newUrl)) + return FALSE; + if (newUrl != NULL) + { + sd = newSd; + url = newUrl; + retInfo->ci.redirUrl = newUrl; + } + close(sd); + // reread from the new redirected url + hash = newHash(0); + status = netUrlHead(url, hash); + } char *sizeString = hashFindValUpperCase(hash, "Content-Length:"); if (sizeString == NULL) { /* try to get remote file size by an alternate method */ long long retSize = netUrlSizeByRangeResponse(url); if (retSize < 0) { hashFree(&hash); errAbort("No Content-Length: returned in header for %s, can't proceed, sorry", url); } retInfo->size = retSize; } else { retInfo->size = atoll(sizeString); @@ -673,30 +698,32 @@ { file->size = bits->fileSize; file->updateTime = bits->remoteUpdate; } version = bits->version; if (bits->remoteUpdate != file->updateTime || bits->fileSize != file->size || !fileExists(file->sparseFileName)) { verbose(4, "removing stale version (%lld! = %lld or %lld! = %lld or %s doesn't exist), " "new version %d\n", bits->remoteUpdate, (long long)file->updateTime, bits->fileSize, file->size, file->sparseFileName, version); udcBitmapClose(&bits); remove(file->bitmapFileName); remove(file->sparseFileName); + if (fileExists(file->redirFileName)) + remove(file->redirFileName); ++version; } } else verbose(4, "bitmap file %s does not already exist, creating.\n", file->bitmapFileName); /* If no bitmap, then create one, and also an empty sparse data file. */ if (bits == NULL) { udcNewCreateBitmapAndSparse(file, file->updateTime, file->size, version); bits = udcBitmapOpen(file->bitmapFileName); if (bits == NULL) errAbort("Unable to open bitmap file %s", file->bitmapFileName); } @@ -856,30 +883,31 @@ } void udcPathAndFileNames(struct udcFile *file, char *cacheDir, char *protocol, char *afterProtocol) /* Initialize udcFile path and names */ { if (cacheDir==NULL) return; char *hashedAfterProtocol = longDirHash(afterProtocol); int len = strlen(cacheDir) + 1 + strlen(protocol) + 1 + strlen(hashedAfterProtocol) + 1; file->cacheDir = needMem(len); safef(file->cacheDir, len, "%s/%s/%s", cacheDir, protocol, hashedAfterProtocol); /* Create file names for bitmap and data portions. */ file->bitmapFileName = fileNameInCacheDir(file, bitmapName); file->sparseFileName = fileNameInCacheDir(file, sparseDataName); +file->redirFileName = fileNameInCacheDir(file, redirName); } static long long int udcSizeAndModTimeFromBitmap(char *bitmapFileName, time_t *retTime) /* Look up the file size from the local cache bitmap file, or -1 if there * is no cache for url. If retTime is non-null, store the remote update time in it. */ { long long int ret = -1; struct udcBitmap *bits = udcBitmapOpen(bitmapFileName); if (bits != NULL) { ret = bits->fileSize; if (retTime) *retTime = bits->remoteUpdate; } udcBitmapClose(&bits); @@ -946,41 +974,73 @@ file->startData = 0; file->endData = file->size = status.st_size; } else { udcPathAndFileNames(file, cacheDir, protocol, afterProtocol); if (!useCacheInfo) { file->updateTime = info.updateTime; file->size = info.size; memcpy(&(file->connInfo), &(info.ci), sizeof(struct connInfo)); // update cache file mod times, so if we're caching we won't do this again // until the timeout has expired again: if (udcCacheTimeout() > 0 && udcCacheEnabled() && fileExists(file->bitmapFileName)) (void)maybeTouchFile(file->bitmapFileName); + } if (udcCacheEnabled()) { /* Make directory. */ makeDirsOnPath(file->cacheDir); /* Figure out a little bit about the extent of the good cached data if any. Open bits bitmap. */ setInitialCachedDataBounds(file, useCacheInfo); file->fdSparse = mustOpenFd(file->sparseFileName, O_RDWR); + + // update redir with latest redirect status + if (startsWith("http", protocol)) + { + if (useCacheInfo) + { + // read redir from cache + if (fileExists(file->redirFileName)) + { + readInGulp(file->redirFileName, &file->connInfo.redirUrl, NULL); + } + } + else + { + if (info.ci.redirUrl) + { + // write redir to cache + char *temp = addSuffix(file->redirFileName, ".temp"); + writeGulp(temp, file->connInfo.redirUrl, strlen(file->connInfo.redirUrl)); + rename(temp, file->redirFileName); + freeMem(temp); + } + else + { + // delete redir from cache (if it exists) + if (fileExists(file->redirFileName)) + remove(file->redirFileName); + } + } + } + } } freeMem(afterProtocol); return file; } struct udcFile *udcFileOpen(char *url, char *cacheDir) /* Open up a cached file. cacheDir may be null in which case udcDefaultDir() will be * used. Abort if file doesn't exist. */ { struct udcFile *udcFile = udcFileMayOpen(url, cacheDir); if (udcFile == NULL) errAbort("Couldn't open %s", url); return udcFile; @@ -1647,30 +1707,32 @@ } } else if (sameString(file->name, bitmapName)) { if (file->size > udcBitmapHeaderSize) /* prevent failure on bitmap files of size 0 or less than header size */ verbose(4, "%ld (%ld) %s/%s\n", bitRealDataSize(file->name), (long)file->size, getCurrentDir(), file->name); if (file->lastAccess < deleteTime) { /* Remove all files when get bitmap, so that can ensure they are deleted in * right order. */ results += file->size; if (!testOnly) { remove(bitmapName); remove(sparseDataName); + if (fileExists(redirName)) + remove(redirName); } } } else if (sameString(file->name, sparseDataName)) { if (results > 0) results += file->size; } } return results; } bits64 udcCleanup(char *cacheDir, double maxDays, boolean testOnly) /* Remove cached files older than maxDays old. If testOnly is set * no clean up is done, but the size of the files that would be