48bb8323f9f6b874fb147860cdf9cb72de663ce0 galt Thu Apr 13 14:51:44 2017 -0700 Fixing backwards compatibility for udcCache after SevenBridges changes. Restores HEAD as primary date/size check. diff --git src/lib/udc.c src/lib/udc.c index fa5cfb9..cb1dbfc 100644 --- src/lib/udc.c +++ src/lib/udc.c @@ -470,61 +470,105 @@ if (ci == NULL) mustCloseFd(&sd); else ci->offset += total; return total; } boolean udcInfoViaHttp(char *url, struct udcRemoteFileInfo *retInfo) /* Gets size and last modified time of URL * and returns status of HEAD GET. */ { verbose(4, "checking http remote info on %s\n", url); int redirectCount = 0; struct hash *hash; int status; +char *sizeString = NULL; while (TRUE) { hash = newHash(0); - // Avoiding HEAD makes it easier to work with HIPPAA compliant signed AmazonS3 URLs. - // In part because the URL generated for GET cannot be used with HEAD. + status = netUrlHead(url, hash); + sizeString = hashFindValUpperCase(hash, "Content-Length:"); + if (status == 200 && sizeString) + break; + if (status == 403 || (status==200 && !sizeString)) + { + // Avoiding HEAD makes it work with HIPPAA compliant signed AmazonS3 URLs. + // The signed URL generated for GET cannot be used with HEAD. + // There are also a few sites which support byte-ranges but do not return Content-Length with HEAD. + hashFree(&hash); + hash = newHash(0); status = netUrlFakeHeadByGet(url, hash); if (status == 206) break; + } if (status != 301 && status != 302) return FALSE; ++redirectCount; if (redirectCount > 5) { warn("code %d redirects: exceeded limit of 5 redirects, %s", status, url); return FALSE; } char *newUrl = hashFindValUpperCase(hash, "Location:"); retInfo->ci.redirUrl = cloneString(newUrl); url = transferParamsToRedirectedUrl(url, newUrl); hashFree(&hash); } -char *rangeString = hashFindValUpperCase(hash, "Content-Range:"); -if (rangeString) +char *sizeHeader = NULL; +if (status == 200) { - /* input pattern: Content-Range: bytes 0-99/2738262 */ - char *slash = strchr(rangeString,'/'); - if (slash) + sizeHeader = "Content-Length:"; + // input pattern: Content-Length: 2738262 + } +if (status == 206) { - retInfo->size = atoll(slash+1); + sizeHeader = "Content-Range:"; + // input pattern: Content-Range: bytes 0-99/2738262 } + +sizeString = hashFindValUpperCase(hash, sizeHeader); +if (sizeString) + { + char *parseString = sizeString; + if (status == 206) + { + parseString = strchr(sizeString, '/'); + if (!parseString) + { + warn("Header value %s is missing '/' in %s in response for url %s", + sizeString, sizeHeader, url); + return FALSE; + } + ++parseString; // skip past slash + } + if (parseString) + { + retInfo->size = atoll(parseString); + } + else + { + warn("Header value %s is missing or invalid in %s in response for url %s", + sizeString, sizeHeader, url); + return FALSE; + } + } +else + { + warn("Response is missing required header %s for url %s", sizeHeader, url); + return FALSE; } char *lastModString = hashFindValUpperCase(hash, "Last-Modified:"); if (lastModString == NULL) { // Date is a poor substitute! It will always appear that the cache is stale. // But at least we can read files from dropbox.com. lastModString = hashFindValUpperCase(hash, "Date:"); if (lastModString == NULL) { hashFree(&hash); errAbort("No Last-Modified: or Date: returned in header for %s, can't proceed, sorry", url); } }