48bb8323f9f6b874fb147860cdf9cb72de663ce0
galt
  Thu Apr 13 14:51:44 2017 -0700
Fixing backwards compatibility for udcCache after SevenBridges changes. Restores HEAD as primary date/size check.

diff --git src/lib/udc.c src/lib/udc.c
index fa5cfb9..cb1dbfc 100644
--- src/lib/udc.c
+++ src/lib/udc.c
@@ -470,61 +470,105 @@
 if (ci == NULL)
     mustCloseFd(&sd);
 else
     ci->offset += total;
 return total;
 }
 
 boolean udcInfoViaHttp(char *url, struct udcRemoteFileInfo *retInfo)
 /* Gets size and last modified time of URL
  * and returns status of HEAD GET. */
 {
 verbose(4, "checking http remote info on %s\n", url);
 int redirectCount = 0;
 struct hash *hash;
 int status;
+char *sizeString = NULL;
 while (TRUE)
     {
     hash = newHash(0);
-    // Avoiding HEAD makes it easier to work with HIPPAA compliant signed AmazonS3 URLs.
-    // In part because the URL generated for GET cannot be used with HEAD.
+    status = netUrlHead(url, hash);
+    sizeString = hashFindValUpperCase(hash, "Content-Length:");
+    if (status == 200 && sizeString)
+	break;
+    if (status == 403 || (status==200 && !sizeString))
+	{ 
+	// Avoiding HEAD makes it work with HIPPAA compliant signed AmazonS3 URLs.
+	// The signed URL generated for GET cannot be used with HEAD.
+	// There are also a few sites which support byte-ranges but do not return Content-Length with HEAD.
+	hashFree(&hash);
+	hash = newHash(0);
 	status = netUrlFakeHeadByGet(url, hash);
 	if (status == 206) 
 	    break;
+	}
     if (status != 301 && status != 302)  
 	return FALSE;
     ++redirectCount;
     if (redirectCount > 5)
 	{
 	warn("code %d redirects: exceeded limit of 5 redirects, %s", status, url);
 	return FALSE;
 	}
     char *newUrl = hashFindValUpperCase(hash, "Location:");
     retInfo->ci.redirUrl = cloneString(newUrl);
     url = transferParamsToRedirectedUrl(url, newUrl);		
     hashFree(&hash);
     }
 
-char *rangeString = hashFindValUpperCase(hash, "Content-Range:");
-if (rangeString)
+char *sizeHeader = NULL;
+if (status == 200)
     {
-    /* input pattern: Content-Range: bytes 0-99/2738262 */
-    char *slash = strchr(rangeString,'/');
-    if (slash)
+    sizeHeader = "Content-Length:";
+    // input pattern: Content-Length: 2738262
+    }
+if (status == 206)
     {
-	retInfo->size = atoll(slash+1);
+    sizeHeader = "Content-Range:";
+    // input pattern: Content-Range: bytes 0-99/2738262
     }
+
+sizeString = hashFindValUpperCase(hash, sizeHeader);
+if (sizeString)
+    {
+    char *parseString = sizeString;
+    if (status == 206)
+	{
+	parseString = strchr(sizeString, '/');
+	if (!parseString)
+	    {
+	    warn("Header value %s is missing '/' in %s in response for url %s", 
+		sizeString, sizeHeader, url);
+	    return FALSE;
+	    }
+	++parseString; // skip past slash
+	}
+    if (parseString)
+	{
+	retInfo->size = atoll(parseString);
+	}
+    else
+	{
+	warn("Header value %s is missing or invalid in %s in response for url %s", 
+	    sizeString, sizeHeader, url);
+	return FALSE;
+	}
+    }
+else
+    {
+    warn("Response is missing required header %s for url %s", sizeHeader, url);
+    return FALSE;
     }
 
 char *lastModString = hashFindValUpperCase(hash, "Last-Modified:");
 if (lastModString == NULL)
     {
     // Date is a poor substitute!  It will always appear that the cache is stale.
     // But at least we can read files from dropbox.com.
     lastModString = hashFindValUpperCase(hash, "Date:");
     if (lastModString == NULL)
 	{
 	hashFree(&hash);
 	errAbort("No Last-Modified: or Date: returned in header for %s, can't proceed, sorry", url);
 	}
     }