8bb6e8cc14b264b4a32b855045151e98638267c3 galt Tue Mar 28 23:12:00 2017 -0700 Step1 Seven Bridges HIPAA-compliant Amazon Storage securely signed URLs meant for GET do not allow them to be used with another method such as HEAD. Therefore we use a GET with byterange=0-0 so that it will still work with Amazon and still return correct info about size and last-modified date of the bigDataUrl file. refs #19158 diff --git src/lib/udc.c src/lib/udc.c index 8fa03f3..6749ca6 100644 --- src/lib/udc.c +++ src/lib/udc.c @@ -471,76 +471,73 @@ ci->offset += total; return total; } boolean udcInfoViaHttp(char *url, struct udcRemoteFileInfo *retInfo) /* Gets size and last modified time of URL * and returns status of HEAD GET. */ { verbose(4, "checking http remote info on %s\n", url); int redirectCount = 0; struct hash *hash; int status; while (TRUE) { hash = newHash(0); - status = netUrlHead(url, hash); - if (status == 200 || status == 403) + // Avoiding HEAD makes it easier to work with HIPPAA compliant signed AmazonS3 URLs. + // In part because the URL generated for GET cannot be used with HEAD. + status = netUrlFakeHeadByGet(url, hash); + if (status == 206) break; if (status != 301 && status != 302) return FALSE; ++redirectCount; if (redirectCount > 5) { warn("code %d redirects: exceeded limit of 5 redirects, %s", status, url); return FALSE; } char *newUrl = hashFindValUpperCase(hash, "Location:"); retInfo->ci.redirUrl = cloneString(newUrl); url = transferParamsToRedirectedUrl(url, newUrl); hashFree(&hash); } -char *sizeString = hashFindValUpperCase(hash, "Content-Length:"); -if (sizeString == NULL) +char *rangeString = hashFindValUpperCase(hash, "Content-Range:"); +if (rangeString) { - /* try to get remote file size by an alternate method */ - long long retSize = netUrlSizeByRangeResponse(url); - if (retSize < 0) + /* input pattern: Content-Range: bytes 0-99/2738262 */ + char *slash = strchr(rangeString,'/'); + if (slash) { - hashFree(&hash); - errAbort("No Content-Length: returned in header for %s, can't proceed, sorry", url); + retInfo->size = atoll(slash+1); } - retInfo->size = retSize; - } -else - { - retInfo->size = atoll(sizeString); } char *lastModString = hashFindValUpperCase(hash, "Last-Modified:"); if (lastModString == NULL) { // Date is a poor substitute! It will always appear that the cache is stale. // But at least we can read files from dropbox.com. lastModString = hashFindValUpperCase(hash, "Date:"); if (lastModString == NULL) { hashFree(&hash); errAbort("No Last-Modified: or Date: returned in header for %s, can't proceed, sorry", url); } } + struct tm tm; time_t t; // Last-Modified: Wed, 15 Nov 1995 04:58:08 GMT // This will always be GMT if (strptime(lastModString, "%a, %d %b %Y %H:%M:%S %Z", &tm) == NULL) { /* Handle error */; hashFree(&hash); errAbort("unable to parse last-modified string [%s]", lastModString); } t = mktimeFromUtc(&tm); if (t == -1) { /* Handle error */; hashFree(&hash); errAbort("mktimeFromUtc failed while converting last-modified string [%s] from UTC time", lastModString); }