b4e629844bbf1e3540f3ab32563aa2f02ef7f7db
max
  Fri Mar 27 08:37:59 2015 -0700
changes to UDC that allow to
1. load bigDataUrls from a specific local directory, configured in hg.conf.
this is useful in GBIB and mirrors where lots of time and space is wasted on
loading local files via the http protocol.
2. deactivate the udc cache. This is useful on OSX which
doesn't support sparse files and so quickly fills any harddisk with
hundreds of gigabytes of files full with zeros.

diff --git src/lib/udc.c src/lib/udc.c
index 499244b..3d6f46d 100644
--- src/lib/udc.c
+++ src/lib/udc.c
@@ -820,84 +820,89 @@
 
     dyStringAppend(dy, "/");
 
     name = ptr + 1;
     ptr = strchr(name, '/');
     }
 
 addElementToDy(dy, name);
 
 return dyStringCannibalize(&dy);
 }
 
 void udcPathAndFileNames(struct udcFile *file, char *cacheDir, char *protocol, char *afterProtocol)
 /* Initialize udcFile path and names */
 {
+if (cacheDir==NULL)
+    return;
 char *hashedAfterProtocol = longDirHash(afterProtocol);
 int len = strlen(cacheDir) + 1 + strlen(protocol) + 1 + strlen(hashedAfterProtocol) + 1;
 file->cacheDir = needMem(len);
 safef(file->cacheDir, len, "%s/%s/%s", cacheDir, protocol, hashedAfterProtocol);
 
 /* Create file names for bitmap and data portions. */
 file->bitmapFileName = fileNameInCacheDir(file, bitmapName);
 file->sparseFileName = fileNameInCacheDir(file, sparseDataName);
 }
 
 static long long int udcSizeAndModTimeFromBitmap(char *bitmapFileName, time_t *retTime)
 /* Look up the file size from the local cache bitmap file, or -1 if there
  * is no cache for url. If retTime is non-null, store the remote update time in it. */
 {
 long long int ret = -1;
 struct udcBitmap *bits = udcBitmapOpen(bitmapFileName);
 if (bits != NULL)
     {
     ret = bits->fileSize;
     if (retTime)
 	*retTime = bits->remoteUpdate;
     }
 udcBitmapClose(&bits);
 return ret;
 }
 
 struct udcFile *udcFileMayOpen(char *url, char *cacheDir)
 /* Open up a cached file. cacheDir may be null in which case udcDefaultDir() will be
- * used.  Return NULL if file doesn't exist. */
+ * used.  Return NULL if file doesn't exist. 
+ * Caching is inactive if defaultDir is NULL or the protocol is "transparent".
+ * */
 {
 if (cacheDir == NULL)
     cacheDir = udcDefaultDir();
 verbose(2, "udcfileOpen(%s, %s)\n", url, cacheDir);
 /* Parse out protocol.  Make it "transparent" if none specified. */
 char *protocol = NULL, *afterProtocol = NULL, *colon;
 boolean isTransparent = FALSE;
 udcParseUrl(url, &protocol, &afterProtocol, &colon);
 if (!colon)
     {
     freeMem(protocol);
     protocol = cloneString("transparent");
     freeMem(afterProtocol);
     afterProtocol = cloneString(url);
     isTransparent = TRUE;
     }
 struct udcProtocol *prot;
 prot = udcProtocolNew(protocol);
 
 /* Figure out if anything exists. */
 boolean useCacheInfo = FALSE;
 struct udcRemoteFileInfo info;
 ZeroVar(&info);
 if (!isTransparent)
     {
+    if (udcDefaultDir() != NULL)
         useCacheInfo = (udcCacheAge(url, cacheDir) < udcCacheTimeout());
     if (!useCacheInfo)
 	{
 	if (!prot->fetchInfo(url, &info))
 	    {
 	    udcProtocolFree(&prot);
 	    freeMem(protocol);
 	    freeMem(afterProtocol);
 	    return NULL;
 	    }
 	}
     }
 
 /* Allocate file object and start filling it in. */
 struct udcFile *file;
@@ -913,50 +918,53 @@
     struct stat status;
     fstat(fd, &status);
     file->startData = 0;
     file->endData = file->size = status.st_size;
     }
 else 
     {
     udcPathAndFileNames(file, cacheDir, protocol, afterProtocol);
     if (!useCacheInfo)
 	{
 	file->updateTime = info.updateTime;
 	file->size = info.size;
 	memcpy(&(file->connInfo), &(info.ci), sizeof(struct connInfo));
 	// update cache file mod times, so if we're caching we won't do this again
 	// until the timeout has expired again:
-	if (udcCacheTimeout() > 0 && fileExists(file->bitmapFileName))
+    	if (udcCacheTimeout() > 0 && (udcDefaultDir()!=NULL) && fileExists(file->bitmapFileName))
 	    (void)maybeTouchFile(file->bitmapFileName);
 	}
 
+    if (udcDefaultDir() != NULL)
+        {
         /* Make directory. */
         makeDirsOnPath(file->cacheDir);
 
         /* Figure out a little bit about the extent of the good cached data if any. Open bits bitmap. */
         setInitialCachedDataBounds(file, useCacheInfo);
 
         file->fdSparse = mustOpenFd(file->sparseFileName, O_RDWR);
+        }
 
     }
 freeMem(afterProtocol);
 return file;
 }
 
 struct udcFile *udcFileOpen(char *url, char *cacheDir)
 /* Open up a cached file.  cacheDir may be null in which case udcDefaultDir() will be
- * used.  Abort if if file doesn't exist. */
+ * used.  Abort if file doesn't exist. */
 {
 struct udcFile *udcFile = udcFileMayOpen(url, cacheDir);
 if (udcFile == NULL)
     errAbort("Couldn't open %s", url);
 return udcFile;
 }
 
 
 struct slName *udcFileCacheFiles(char *url, char *cacheDir)
 /* Return low-level list of files used in cache. */
 {
 char *protocol, *afterProtocol, *colon;
 struct udcFile *file;
 udcParseUrl(url, &protocol, &afterProtocol, &colon);
 if (colon == NULL)
@@ -981,30 +989,31 @@
 {
 struct udcFile *file = *pFile;
 if (file != NULL)
     {
     if (file->connInfo.socket != 0)
 	mustCloseFd(&(file->connInfo.socket));
     if (file->connInfo.ctrlSocket != 0)
 	mustCloseFd(&(file->connInfo.ctrlSocket));
     freeMem(file->url);
     freeMem(file->protocol);
     udcProtocolFree(&file->prot);
     freeMem(file->cacheDir);
     freeMem(file->bitmapFileName);
     freeMem(file->sparseFileName);
     freeMem(file->sparseReadAheadBuf);
+    if (file->fdSparse != 0)
         mustCloseFd(&(file->fdSparse));
     udcBitmapClose(&file->bits);
     }
 freez(pFile);
 }
 
 static void qDecode(const char *input, char *buf, size_t size)
 /* Reverse the qEncode performed on afterProcotol above into buf or abort. */
 {
 safecpy(buf, size, input);
 char c, *r = buf, *w = buf;
 while ((c = *r++) != '\0')
     {
     if (c == 'Q')
 	{
@@ -1240,30 +1249,33 @@
 if (rangeIntersectOrTouch64(file->startData, file->endData, fetchedStart, fetchedEnd))
     {
     if (fetchedStart > file->startData)
         fetchedStart = file->startData;
     if (fetchedEnd < file->endData)
         fetchedEnd = file->endData;
     }
 file->startData = fetchedStart;
 file->endData = fetchedEnd;
 }
 
 static boolean udcCachePreload(struct udcFile *file, bits64 offset, bits64 size)
 /* Make sure that given data is in cache - fetching it remotely if need be. 
  * Return TRUE on success. */
 {
+if (udcDefaultDir() == NULL)
+    return TRUE;
+
 boolean ok = TRUE;
 /* We'll break this operation into blocks of a reasonable size to allow
  * other processes to get cache access, since we have to lock the cache files. */
 bits64 s,e, endPos=offset+size;
 for (s = offset; s < endPos; s = e)
     {
     /* Figure out bounds of this section. */
     e = s + udcMaxBytesPerRemoteFetch;
     if (e > endPos)
 	e = endPos;
 
     struct udcBitmap *bits = file->bits;
     if (bits->version == file->bitmapVersion)
 	{
         udcFetchMissing(file, bits, s, e);
@@ -1272,30 +1284,37 @@
 	{
 	ok = FALSE;
 	verbose(2, "udcCachePreload version check failed %d vs %d", 
 		bits->version, file->bitmapVersion);
 	}
     if (!ok)
         break;
     }
 return ok;
 }
 
 #define READAHEADBUFSIZE 4096
 bits64 udcRead(struct udcFile *file, void *buf, bits64 size)
 /* Read a block from file.  Return amount actually read. */
 {
+// if udcDefaultDir is NULL, don't do local caching, just fetch the data
+if (udcDefaultDir()==NULL && !sameString(file->protocol, "transparent"))
+    {
+    int actualSize = file->prot->fetchData(file->url, file->offset, size, buf, &(file->connInfo));
+    file->offset += actualSize;
+    return actualSize;
+    }
 
 /* Figure out region of file we're going to read, and clip it against file size. */
 bits64 start = file->offset;
 if (start > file->size)
     return 0;
 bits64 end = start + size;
 if (end > file->size)
     end = file->size;
 size = end - start;
 char *cbuf = buf;
 
 /* use read-ahead buffer if present */
 bits64 bytesRead = 0;
 
 bits64 raStart;
@@ -1531,37 +1550,39 @@
 
 struct lineFile *udcWrapShortLineFile(char *url, char *cacheDir, size_t maxSize)
 /* Read in entire short (up to maxSize) url into memory and wrap a line file around it.
  * The cacheDir may be null in which case udcDefaultDir() will be used.  If maxSize
  * is zero then a default value (currently 64 meg) will be used. */
 {
 if (maxSize == 0) maxSize = 64 * 1024 * 1024;
 char *buf = udcFileReadAll(url, cacheDir, maxSize, NULL);
 return lineFileOnString(url, TRUE, buf);
 }
 
 void udcSeekCur(struct udcFile *file, bits64 offset)
 /* Seek to a particular position in file. */
 {
 file->offset += offset;
+if (udcDefaultDir())
     mustLseek(file->fdSparse, offset, SEEK_CUR);
 }
 
 void udcSeek(struct udcFile *file, bits64 offset)
 /* Seek to a particular position in file. */
 {
 file->offset = offset;
+if (udcDefaultDir())
     mustLseek(file->fdSparse, offset, SEEK_SET);
 }
 
 bits64 udcTell(struct udcFile *file)
 /* Return current file position. */
 {
 return file->offset;
 }
 
 static long bitRealDataSize(char *fileName)
 /* Return number of real bytes indicated by bitmaps */
 {
 struct udcBitmap *bits = udcBitmapOpen(fileName);
 int blockSize = bits->blockSize;
 long byteSize = 0;
@@ -1636,31 +1657,32 @@
 time_t deleteTime = time(NULL) - maxSeconds;
 bits64 result = rCleanup(deleteTime, testOnly);
 setCurrentDir(curPath);
 return result;
 }
 
 static char *defaultDir = "/tmp/udcCache";
 
 char *udcDefaultDir()
 /* Get default directory for cache */
 {
 return defaultDir;
 }
 
 void udcSetDefaultDir(char *path)
-/* Set default directory for cache */
+/* Set default directory for cache. 
+ * Set to NULL to deactivate the local caching. */
 {
 defaultDir = cloneString(path);
 }
 
 
 int udcCacheTimeout()
 /* Get cache timeout (if local cache files are newer than this many seconds,
  * we won't ping the remote server to check the file size and update time). */
 {
 return cacheTimeout;
 }
 
 void udcSetCacheTimeout(int timeout)
 /* Set cache timeout (if local cache files are newer than this many seconds,
  * we won't ping the remote server to check the file size and update time). */