25970e38be62db2e2fe313fd84b0d2337035de97
galt
  Thu Apr 13 22:54:22 2017 -0700
Adding some explanatory notes to udc.

diff --git src/lib/udc.c src/lib/udc.c
index cb1dbfc..01ea80b4 100644
--- src/lib/udc.c
+++ src/lib/udc.c
@@ -464,49 +464,63 @@
     buf += rd;
     remaining -= rd;
     }
 if (rd == -1)
     errnoAbort("udcDataViaHttpOrFtp: error reading socket");
 struct connInfo *ci = &file->connInfo;
 if (ci == NULL)
     mustCloseFd(&sd);
 else
     ci->offset += total;
 return total;
 }
 
 boolean udcInfoViaHttp(char *url, struct udcRemoteFileInfo *retInfo)
 /* Gets size and last modified time of URL
- * and returns status of HEAD GET. */
+ * and returns status of HEAD or GET byterange 0-0. */
 {
 verbose(4, "checking http remote info on %s\n", url);
 int redirectCount = 0;
 struct hash *hash;
 int status;
 char *sizeString = NULL;
+/*
+ For caching, sites should support byte-range and last-modified.
+ However, several groups including ENCODE have made sites that use CGIs to 
+ dynamically generate hub text files such as hub.txt, genome.txt, trackDb.txt.
+ Byte-range and last-modified are difficult to support for this case,
+ so they do without them, effectively defeat caching. Every 5 minutes (udcTimeout),
+ they get re-downloaded, even when the data has not changed.  
+*/
 while (TRUE)
     {
     hash = newHash(0);
     status = netUrlHead(url, hash);
     sizeString = hashFindValUpperCase(hash, "Content-Length:");
     if (status == 200 && sizeString)
 	break;
+    /*
+    Using HEAD with HIPPAA-compliant signed AmazonS3 URLs generates 403.
+    The signed URL generated for GET cannot be used with HEAD.
+    Instead call GET with byterange=0-0 in netUrlFakeHeadByGet().
+    This supplies both size via Content-Range response header,
+    as well as Last-Modified header which is important for caching.
+    There are also sites which support byte-ranges 
+    but they do not return Content-Length with HEAD.
+    */
     if (status == 403 || (status==200 && !sizeString))
 	{ 
-	// Avoiding HEAD makes it work with HIPPAA compliant signed AmazonS3 URLs.
-	// The signed URL generated for GET cannot be used with HEAD.
-	// There are also a few sites which support byte-ranges but do not return Content-Length with HEAD.
 	hashFree(&hash);
 	hash = newHash(0);
 	status = netUrlFakeHeadByGet(url, hash);
 	if (status == 206) 
 	    break;
 	}
     if (status != 301 && status != 302)
 	return FALSE;
     ++redirectCount;
     if (redirectCount > 5)
 	{
 	warn("code %d redirects: exceeded limit of 5 redirects, %s", status, url);
 	return FALSE;
 	}
     char *newUrl = hashFindValUpperCase(hash, "Location:");