6260a465e070e4370ff5fc1b361de2088274b743
galt
  Sat Sep 12 02:29:08 2015 -0700
Initial checkin of changes for cached udc redirect support.

diff --git src/lib/udc.c src/lib/udc.c
index 2ab135c..74b9d6e 100644
--- src/lib/udc.c
+++ src/lib/udc.c
@@ -39,30 +39,31 @@
 #include <openssl/sha.h>
 
 
 #define udcBlockSize (8*1024)
 /* All fetch requests are rounded up to block size. */
 
 #define udcMaxBytesPerRemoteFetch (udcBlockSize * 32)
 /* Very large remote reads are broken down into chunks this size. */
 
 struct connInfo
 /* Socket descriptor and associated info, for keeping net connections open. */
     {
     int socket;                 /* Socket descriptor for data connection (or 0). */
     bits64 offset;		/* Current file offset of socket. */
     int ctrlSocket;             /* (FTP only) Control socket descriptor or 0. */
+    char *redirUrl;             /* (HTTP(S) only) use redirected url */
     };
 
 typedef int (*UdcDataCallback)(char *url, bits64 offset, int size, void *buffer,
 			       struct connInfo *ci);
 /* Type for callback function that fetches file data. */
 
 struct udcRemoteFileInfo
 /* Information about a remote file. */
     {
     bits64 updateTime;	/* Last update in seconds since 1970 */
     bits64 size;	/* Remote file size */
     struct connInfo ci; /* Connection info for open net connection */
     };
 
 typedef boolean (*UdcInfoCallback)(char *url, struct udcRemoteFileInfo *retInfo);
@@ -77,56 +78,58 @@
     };
 
 struct udcFile
 /* A file handle for our caching system. */
     {
     struct udcFile *next;	/* Next in list. */
     char *url;			/* Name of file - includes protocol */
     char *protocol;		/* The URL up to the first colon.  http: etc. */
     struct udcProtocol *prot;	/* Protocol specific data and methods. */
     time_t updateTime;		/* Last modified timestamp. */
     bits64 size;		/* Size of file. */
     bits64 offset;		/* Current offset in file. */
     char *cacheDir;		/* Directory for cached file parts. */
     char *bitmapFileName;	/* Name of bitmap file. */
     char *sparseFileName;	/* Name of sparse data file. */
+    char *redirFileName;	/* Name of redir file. */
     int fdSparse;		/* File descriptor for sparse data file. */
     boolean sparseReadAhead;    /* Read-ahead has something in the buffer */
     char *sparseReadAheadBuf;   /* Read-ahead buffer, if any */
     bits64 sparseRAOffset;      /* Read-ahead buffer offset */
     struct udcBitmap *bits;     /* udcBitMap */
     bits64 startData;		/* Start of area in file we know to have data. */
     bits64 endData;		/* End of area in file we know to have data. */
     bits32 bitmapVersion;	/* Version of associated bitmap we were opened with. */
     struct connInfo connInfo;   /* Connection info for open net connection. */
     };
 
 struct udcBitmap
 /* The control structure including the bitmap of blocks that are cached. */
     {
     struct udcBitmap *next;	/* Next in list. */
     bits32 blockSize;		/* Number of bytes per block of file. */
     bits64 remoteUpdate;	/* Remote last update time. */
     bits64 fileSize;		/* File size */
     bits32 version;		/* Version - increments each time cache is stale. */
     bits64 localUpdate;		/* Time we last fetched new data into cache. */
     bits64 localAccess;		/* Time we last accessed data. */
     boolean isSwapped;		/* If true need to swap all bytes on read. */
     int fd;			/* File descriptor for file with current block. */
     };
 static char *bitmapName = "bitmap";
 static char *sparseDataName = "sparseData";
+static char *redirName = "redir";
 #define udcBitmapHeaderSize (64)
 static int cacheTimeout = 0;
 
 #define MAX_SKIP_TO_SAVE_RECONNECT (udcMaxBytesPerRemoteFetch / 2)
 
 static void readAndIgnore(int sd, bits64 size)
 /* Read size bytes from sd and return. */
 {
 static char *buf = NULL;
 if (buf == NULL)
     buf = needMem(udcBlockSize);
 bits64 remaining = size, total = 0;
 while (remaining > 0)
     {
     bits64 chunkSize = min(remaining, udcBlockSize);
@@ -154,30 +157,32 @@
 	readAndIgnore(ci->socket, skipSize);
 	ci->offset = offset;
 	}
     else
 	{
 	verbose(4, "Offset mismatch (ci %lld != new %lld), reopening.\n", ci->offset, offset);
 	mustCloseFd(&(ci->socket));
 	if (ci->ctrlSocket > 0)
 	    mustCloseFd(&(ci->ctrlSocket));
 	ZeroVar(ci);
 	}
     }
 int sd;
 if (ci == NULL || ci->socket <= 0)
     {
+    if (ci->redirUrl)
+	url = ci->redirUrl;
     char rangeUrl[2048];
     if (ci == NULL)
 	{
 	safef(rangeUrl, sizeof(rangeUrl), "%s;byterange=%lld-%lld",
 	      url, offset, (offset + size - 1));
 	sd = netUrlOpen(rangeUrl);
 	}
     else
 	{
 	safef(rangeUrl, sizeof(rangeUrl), "%s;byterange=%lld-", url, offset);
 	sd = ci->socket = netUrlOpenSockets(rangeUrl, &(ci->ctrlSocket));
 	ci->offset = offset;
 	}
     if (sd < 0)
 	return -1;
@@ -378,32 +383,52 @@
     errnoAbort("udcDataViaHttpOrFtp: error reading socket");
 if (ci == NULL)
     mustCloseFd(&sd);
 else
     ci->offset += total;
 return total;
 }
 
 boolean udcInfoViaHttp(char *url, struct udcRemoteFileInfo *retInfo)
 /* Gets size and last modified time of URL
  * and returns status of HEAD GET. */
 {
 verbose(4, "checking http remote info on %s\n", url);
 struct hash *hash = newHash(0);
 int status = netUrlHead(url, hash);
-if (status != 200) // && status != 302 && status != 301)
+if (status != 200  && status != 301 && status != 302)  
     return FALSE;
+if (status == 301 && status == 302)
+    {
+    int sd = netUrlOpen(url);
+    if (sd < 0)
+	return FALSE;
+    int newSd = 0;
+    char *newUrl = NULL;
+    if (!netSkipHttpHeaderLinesHandlingRedirect(sd, url, &newSd, &newUrl))
+	return FALSE;
+    if (newUrl != NULL)
+	{
+	sd = newSd;
+	url = newUrl;
+	retInfo->ci.redirUrl = newUrl; 
+	}
+    close(sd);
+    // reread from the new redirected url
+    hash = newHash(0);
+    status = netUrlHead(url, hash);
+    }
 char *sizeString = hashFindValUpperCase(hash, "Content-Length:");
 if (sizeString == NULL)
     {
     /* try to get remote file size by an alternate method */
     long long retSize = netUrlSizeByRangeResponse(url);
     if (retSize < 0)
 	{
     	hashFree(&hash);
 	errAbort("No Content-Length: returned in header for %s, can't proceed, sorry", url);
 	}
     retInfo->size = retSize;
     }
 else
     {
     retInfo->size = atoll(sizeString);
@@ -673,30 +698,32 @@
 	{
 	file->size = bits->fileSize;
 	file->updateTime = bits->remoteUpdate;
 	}
     version = bits->version;
     if (bits->remoteUpdate != file->updateTime || bits->fileSize != file->size ||
 	!fileExists(file->sparseFileName))
 	{
 	verbose(4, "removing stale version (%lld! = %lld or %lld! = %lld or %s doesn't exist), "
 		"new version %d\n",
 		bits->remoteUpdate, (long long)file->updateTime, bits->fileSize, file->size,
 		file->sparseFileName, version);
         udcBitmapClose(&bits);
 	remove(file->bitmapFileName);
 	remove(file->sparseFileName);
+	if (fileExists(file->redirFileName))
+	    remove(file->redirFileName);
 	++version;
 	}
     }
 else
     verbose(4, "bitmap file %s does not already exist, creating.\n", file->bitmapFileName);
 
 /* If no bitmap, then create one, and also an empty sparse data file. */
 if (bits == NULL)
     {
     udcNewCreateBitmapAndSparse(file, file->updateTime, file->size, version);
     bits = udcBitmapOpen(file->bitmapFileName);
     if (bits == NULL)
         errAbort("Unable to open bitmap file %s", file->bitmapFileName);
     }
 
@@ -856,30 +883,31 @@
 }
 
 void udcPathAndFileNames(struct udcFile *file, char *cacheDir, char *protocol, char *afterProtocol)
 /* Initialize udcFile path and names */
 {
 if (cacheDir==NULL)
     return;
 char *hashedAfterProtocol = longDirHash(afterProtocol);
 int len = strlen(cacheDir) + 1 + strlen(protocol) + 1 + strlen(hashedAfterProtocol) + 1;
 file->cacheDir = needMem(len);
 safef(file->cacheDir, len, "%s/%s/%s", cacheDir, protocol, hashedAfterProtocol);
 
 /* Create file names for bitmap and data portions. */
 file->bitmapFileName = fileNameInCacheDir(file, bitmapName);
 file->sparseFileName = fileNameInCacheDir(file, sparseDataName);
+file->redirFileName = fileNameInCacheDir(file, redirName);
 }
 
 static long long int udcSizeAndModTimeFromBitmap(char *bitmapFileName, time_t *retTime)
 /* Look up the file size from the local cache bitmap file, or -1 if there
  * is no cache for url. If retTime is non-null, store the remote update time in it. */
 {
 long long int ret = -1;
 struct udcBitmap *bits = udcBitmapOpen(bitmapFileName);
 if (bits != NULL)
     {
     ret = bits->fileSize;
     if (retTime)
 	*retTime = bits->remoteUpdate;
     }
 udcBitmapClose(&bits);
@@ -946,41 +974,73 @@
     file->startData = 0;
     file->endData = file->size = status.st_size;
     }
 else 
     {
     udcPathAndFileNames(file, cacheDir, protocol, afterProtocol);
     if (!useCacheInfo)
 	{
 	file->updateTime = info.updateTime;
 	file->size = info.size;
 	memcpy(&(file->connInfo), &(info.ci), sizeof(struct connInfo));
 	// update cache file mod times, so if we're caching we won't do this again
 	// until the timeout has expired again:
     	if (udcCacheTimeout() > 0 && udcCacheEnabled() && fileExists(file->bitmapFileName))
 	    (void)maybeTouchFile(file->bitmapFileName);
+
 	}
 
     if (udcCacheEnabled())
         {
         /* Make directory. */
         makeDirsOnPath(file->cacheDir);
 
         /* Figure out a little bit about the extent of the good cached data if any. Open bits bitmap. */
         setInitialCachedDataBounds(file, useCacheInfo);
 
         file->fdSparse = mustOpenFd(file->sparseFileName, O_RDWR);
+
+	// update redir with latest redirect status	
+	if (startsWith("http", protocol))
+	    {
+	    if (useCacheInfo)
+		{
+		// read redir from cache
+		if (fileExists(file->redirFileName))
+		    {
+		    readInGulp(file->redirFileName, &file->connInfo.redirUrl, NULL);
+		    }
+		}
+	    else
+		{
+		if (info.ci.redirUrl)
+		    {
+		    // write redir to cache
+		    char *temp = addSuffix(file->redirFileName, ".temp");
+		    writeGulp(temp, file->connInfo.redirUrl, strlen(file->connInfo.redirUrl));
+		    rename(temp, file->redirFileName);
+		    freeMem(temp);
+		    }
+		else
+		    {
+		    // delete redir from cache (if it exists)
+		    if (fileExists(file->redirFileName))
+			remove(file->redirFileName);
+		    }
+		}
+	    }
+	
         }
 
     }
 freeMem(afterProtocol);
 return file;
 }
 
 struct udcFile *udcFileOpen(char *url, char *cacheDir)
 /* Open up a cached file.  cacheDir may be null in which case udcDefaultDir() will be
  * used.  Abort if file doesn't exist. */
 {
 struct udcFile *udcFile = udcFileMayOpen(url, cacheDir);
 if (udcFile == NULL)
     errAbort("Couldn't open %s", url);
 return udcFile;
@@ -1647,30 +1707,32 @@
 	    }
 	}
     else if (sameString(file->name, bitmapName))
         {
 	if (file->size > udcBitmapHeaderSize) /* prevent failure on bitmap files of size 0 or less than header size */
 	    verbose(4, "%ld (%ld) %s/%s\n", bitRealDataSize(file->name), (long)file->size, getCurrentDir(), file->name);
 	if (file->lastAccess < deleteTime)
 	    {
 	    /* Remove all files when get bitmap, so that can ensure they are deleted in 
 	     * right order. */
 	    results += file->size;
 	    if (!testOnly)
 		{
 		remove(bitmapName);
 		remove(sparseDataName);
+		if (fileExists(redirName))
+		    remove(redirName);
 		}
 	    }
 	}
     else if (sameString(file->name, sparseDataName))
         {
 	if (results > 0)
 	    results += file->size;
 	}
     }
 return results;
 }
 
 bits64 udcCleanup(char *cacheDir, double maxDays, boolean testOnly)
 /* Remove cached files older than maxDays old. If testOnly is set
  * no clean up is done, but the size of the files that would be