a42073591695e7aa6428da9cbcad89a9783571a7
braney
  Thu Feb 19 11:00:55 2015 -0800
fix a bug in UDC cache when strings between slashes in a URL are longer
than 255 characters so they can't be made into a directory.  In this
case we know make a SHA1 hash of the long string and use that instead.

diff --git src/lib/udc.c src/lib/udc.c
index 39fd825..3789f37 100644
--- src/lib/udc.c
+++ src/lib/udc.c
@@ -22,30 +22,32 @@
  *    
  * The bitmap file contains time stamp and size data as well as an array with one bit
  * for each block of the file that has been fetched.  Currently the block size is 8K. */
 
 #include <sys/file.h>
 #include "common.h"
 #include "hash.h"
 #include "obscure.h"
 #include "bits.h"
 #include "linefile.h"
 #include "portable.h"
 #include "sig.h"
 #include "net.h"
 #include "cheapcgi.h"
 #include "udc.h"
+#include "hex.h"
+#include <openssl/sha.h>
 
 
 #define udcBlockSize (8*1024)
 /* All fetch requests are rounded up to block size. */
 
 #define udcMaxBytesPerRemoteFetch (udcBlockSize * 32)
 /* Very large remote reads are broken down into chunks this size. */
 
 struct connInfo
 /* Socket descriptor and associated info, for keeping net connections open. */
     {
     int socket;                 /* Socket descriptor for data connection (or 0). */
     bits64 offset;		/* Current file offset of socket. */
     int ctrlSocket;             /* (FTP only) Control socket descriptor or 0. */
     };
@@ -772,36 +774,77 @@
     *retAuth = NULL;
 afterProtocol = qEncode(afterProtocol);
 *retProtocol = protocol;
 *retAfterProtocol = afterProtocol;
 *retColon = colon;
 }
 
 void udcParseUrl(char *url, char **retProtocol, char **retAfterProtocol, char **retColon)
 /* Parse the URL into components that udc treats separately.
  * *retAfterProtocol is Q-encoded to keep special chars out of filenames.  
  * Free  *retProtocol and *retAfterProtocol but not *retColon when done. */
 {
 udcParseUrlFull(url, retProtocol, retAfterProtocol, retColon, NULL);
 }
 
+static void addElementToDy(struct dyString *dy, char *name)
+/* add one element of a path to a dyString, hashing it if it's longer 
+ * than NAME_MAX */
+{
+if (strlen(name) > NAME_MAX)
+    {
+    unsigned char hash[SHA_DIGEST_LENGTH];
+    char newName[(SHA_DIGEST_LENGTH + 1) * 2];
+
+    SHA1((const unsigned char *)name, strlen(name), hash);
+    hexBinaryString(hash,  SHA_DIGEST_LENGTH, newName, (SHA_DIGEST_LENGTH + 1) * 2);
+    
+    dyStringAppend(dy, newName);
+    }
+else
+    dyStringAppend(dy, name);
+}
+
+static char *longDirHash(char *name)
+/* take a path and hash the elements that are longer than NAME_MAX */
+{
+struct dyString *dy = newDyString(strlen(name));
+char *ptr = strchr(name, '/');
+
+while(ptr)
+    {
+    *ptr = 0;
+    addElementToDy(dy, name);
+
+    dyStringAppend(dy, "/");
+
+    name = ptr + 1;
+    ptr = strchr(name, '/');
+    }
+
+addElementToDy(dy, name);
+
+return dyStringCannibalize(&dy);
+}
+
 void udcPathAndFileNames(struct udcFile *file, char *cacheDir, char *protocol, char *afterProtocol)
 /* Initialize udcFile path and names */
 {
-int len = strlen(cacheDir) + 1 + strlen(protocol) + 1 + strlen(afterProtocol) + 1;
+char *hashedAfterProtocol = longDirHash(afterProtocol);
+int len = strlen(cacheDir) + 1 + strlen(protocol) + 1 + strlen(hashedAfterProtocol) + 1;
 file->cacheDir = needMem(len);
-safef(file->cacheDir, len, "%s/%s/%s", cacheDir, protocol, afterProtocol);
+safef(file->cacheDir, len, "%s/%s/%s", cacheDir, protocol, hashedAfterProtocol);
 
 /* Create file names for bitmap and data portions. */
 file->bitmapFileName = fileNameInCacheDir(file, bitmapName);
 file->sparseFileName = fileNameInCacheDir(file, sparseDataName);
 }
 
 static long long int udcSizeAndModTimeFromBitmap(char *bitmapFileName, time_t *retTime)
 /* Look up the file size from the local cache bitmap file, or -1 if there
  * is no cache for url. If retTime is non-null, store the remote update time in it. */
 {
 long long int ret = -1;
 struct udcBitmap *bits = udcBitmapOpen(bitmapFileName);
 if (bits != NULL)
     {
     ret = bits->fileSize;