5fb174c9dcf6926682b293c420406d0c2e5cf4a3
chmalee
  Fri Jan 31 09:15:12 2020 -0800
Make hubClone use pipeline functions to call wget

diff --git src/hg/utils/hubClone/hubClone.c src/hg/utils/hubClone/hubClone.c
index 5787fd9..fa5c049 100644
--- src/hg/utils/hubClone/hubClone.c
+++ src/hg/utils/hubClone/hubClone.c
@@ -1,27 +1,28 @@
 /* hubClone - Clone the hub text files to a local copy, fixing up bigDataUrls
  * to remote location if necessary. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
 #include "trackDb.h"
 #include "cart.h" // can't include trackHub.h without this?
 #include "trackHub.h"
 #include "errCatch.h"
 #include "ra.h"
 #include "hui.h"
+#include "pipeline.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "hubClone - Clone the remote hub text files to a local copy in newDirectoryName, fixing up bigDataUrls to remote location if necessary\n"
   "usage:\n"
   "   hubClone http://url/to/hub.txt\n"
   "options:\n"
   "   -udcDir=/dir/to/udcCache   Path to udc directory\n"
   "   -download                  Download data files in addition to the hub configuration files\n"
   );
 }
 
 /* Command line validation table. */
@@ -97,62 +98,30 @@
                 if ((tdbFileName = strrchr((char *)hel->val, '/')) != NULL)
                     tdbFileName += 1;
                 else
                     tdbFileName = (char *)hel->val;
                 fprintf(out, "%s %s/%s\n", hel->name, genome, tdbFileName);
                 }
             }
         else
             fprintf(out, "%s %s\n", hel->name, (char *)hel->val);
         }
     }
 fprintf(out, "\n");
 hashElFreeList(&helList);
 }
 
-#define READ_SIZE 1024 * 1024 * 64
-int downloadFile(FILE *f, char *url)
-/* Download a file in chunks, return -1 on error. Wrap in errCatch so
- * we can keep downloading rest of hub files.
- * For now using udc to read the files, but curl or wget would be preferred.
- * The reason I'm not using them is because system() and popen don't honor
- * SIGINT.  */
-{
-int ret = 0;
-struct errCatch *errCatch = errCatchNew();
-if (errCatchStart(errCatch))
-    {
-    struct udcFile *file = udcFileOpen(url, udcDefaultDir());
-    size_t size = READ_SIZE;
-    off_t fileSize = udcFileSize(url);
-    off_t counter = 0;
-    char *buf = needLargeMem(size+1);
-    while (counter < fileSize)
-        {
-        bits64 sizeRead = udcRead(file, buf, size);
-        counter += sizeRead;
-        mustWrite(f, buf, sizeRead);
-        }
-    freeMem(buf);
-    udcFileClose(&file);
-    }
-errCatchEnd(errCatch);
-if (errCatch->gotError)
-    ret = -1;
-errCatchFree(&errCatch);
-return ret;
-}
 
 void printTrackDbStanza(struct hash *stanza, FILE *out, char *baseUrl, char *downloadDir)
 /* print a trackDb stanza but with relative references replaced by remote links */
 {
 struct hashEl *hel, *helList = hashElListHash(stanza);
 struct dyString *fname = dyStringNew(0);
 fprintf(out, "%s %s\n", "track", (char *)hashFindVal(stanza, "track"));
 for (hel = helList; hel != NULL; hel = hel->next)
     {
     if (!sameString(hel->name, "track"))
         {
         if (sameString(hel->name, "bigDataUrl") ||
             sameString(hel->name, "bigDataIndex") ||
             sameString(hel->name, "barChartMatrixUrl") ||
             sameString(hel->name, "barChartSampleUrl") ||
@@ -166,35 +135,41 @@
             char *urlToData = trackHubRelativeUrl(baseUrl, hel->val);
             if (isNotEmpty(downloadDir))
                 {
                 dyStringClear(fname);
                 char *relName = strrchr(hel->val,'/');
                 if (relName != NULL)
                     {
                     relName = relName + 1;
                     dyStringPrintf(fname, "%s%s", downloadDir, relName);
                     }
                 else
                     {
                     relName = hel->val;
                     dyStringPrintf(fname, "%s%s", downloadDir, (char *)hel->val);
                     }
-                FILE *f = mustOpen(dyStringContents(fname), "wb");
-                // download file, in chunks if necessary
-                if (downloadFile(f, urlToData) == -1)
-                    fprintf(stderr, "Error downloading file. Try again with wget or curl: %s\n", urlToData);
                 fprintf(out, "%s %s\n", hel->name, relName);
+                char *cmd[] = {"wget", "-q", "-O", dyStringContents(fname), urlToData, NULL};
+
+                // use pipelineNoAbort so the loop continues if a url is typo'd or something,
+                // but still warn the user
+                struct pipeline *pl = pipelineOpen1(cmd, pipelineWrite | pipelineNoAbort, "/dev/null", NULL);
+                int ret = pipelineWait(pl);
+                if (ret != 0)
+                    {
+                    warn("wget failed for url: %s", urlToData);
+                    }
                 }
             else
                 fprintf(out, "%s %s\n", hel->name, urlToData);
             }
         else
             fprintf(out, "%s %s\n", hel->name, (char *)hel->val);
         }
     }
 fprintf(out, "\n");
 hashElFreeList(&helList);
 }
 
 void printGenericStanza(struct hash *stanza, FILE *out, char *baseUrl)
 /* print a hash to out */
 {