c0afb74d69d2920d24cb8a92353b5cd333995326
galt
  Wed Dec 22 11:54:50 2010 -0800
adding new paraSync utility - it is a little bit like rsync with paraFetch to easily mirror a bunch of directories via http, https, or ftp over tcp which has major problems by the time you hit Europe.
diff --git src/utils/paraSync/paraSync.c src/utils/paraSync/paraSync.c
new file mode 100644
index 0000000..2f05beb
--- /dev/null
+++ src/utils/paraSync/paraSync.c
@@ -0,0 +1,165 @@
+/* paraSync - uses paraFetch to recursively mirror url to given path. */
+#include "common.h"
+#include "options.h"
+#include "dystring.h"
+#include "obscure.h"
+#include "portable.h"
+#include "net.h"
+
+void usage()
+/* Explain usage and exit */
+{
+errAbort(
+    "paraSync 1.0\n"
+    "paraSync - uses paraFetch to recursively mirror url to given path\n"
+    "usage:\n"
+    "   paraSync {options} N R URL outPath\n"
+    "   where N is the number of connections to use\n"
+    "         R is the number of retries\n"
+    "   Options:\n"
+    "    -A='ext1,ext2'  means accept only files with ext1 or ext2\n" 
+    );
+}
+
+static struct optionSpec options[] = {
+   {"A", OPTION_STRING},
+   {NULL, 0},
+};
+
+char *acceptString = NULL;
+char **acceptExtensions = NULL; 
+int acceptExtensionsCount = 0;
+
+boolean paraSync(int numConnections, int numRetries, struct dyString *url, struct dyString *outPath)
+/* Fetch given URL, send to stdout. */
+{
+// requirements:
+//   URL must end in / slash
+//   foo must end in / slash
+if (!endsWith(url->string,"/"))
+    errAbort("URL must end in slash /");
+if (!endsWith(outPath->string,"/"))
+    errAbort("outPath must end in slash /");
+// create subdir if it does not exist
+makeDir(outPath->string);
+struct dyString *dy = netSlurpUrl(url->string);
+char *p = dy->string;
+
+verbose(2,"response=[%s]\n", dy->string);
+
+boolean result = TRUE;
+
+char *pattern = "<a href=\"";
+while (TRUE)
+    {
+    char *q = strstr(p,pattern);
+    if (!q)
+	break;
+    q += strlen(pattern);
+    p = strchr(q,'"');
+    if (!p)
+	errAbort("unmatched \" in URL");
+    *p = 0;
+    ++p; // get past the terminator that we added earlier.   
+
+    // We want to skip several kinds of links
+    if (q[0] == '?') continue;
+    if (q[0] == '/') continue;
+    if (startsWith(q, "ftp:")) continue;
+    if (startsWith(q, "http:")) continue;
+    if (startsWith(q, "https:")) continue;
+    if (startsWith(q, "./")) continue;
+    if (startsWith(q, "../")) continue;
+
+    verbose(1, "%s\n", q);
+
+    int saveUrlSize = url->stringSize;
+    int saveOutPathSize = outPath->stringSize;
+
+    dyStringAppend(url, q);
+    dyStringAppend(outPath, q);
+ 
+    // URL found
+    if (endsWith(q, "/")) // directory
+	{   
+	// recursive
+	if (!paraSync(numConnections, numRetries, url, outPath))
+	    result = FALSE;
+	}
+    else    // file
+	{
+	// Test accepted extensions if applicable.
+        boolean accepted = (acceptExtensionsCount == 0);
+        int i = 0;
+        for(i=0; i<acceptExtensionsCount; ++i) 
+	    {
+	    if (endsWith(q, acceptExtensions[i]))
+		{
+		accepted = TRUE;
+		break;
+		}
+	    }
+	if (accepted)
+	    {
+	    // check to see if it needs download, i.e. file does not exist, or it needs a resume
+	    int restoreSize = outPath->stringSize;
+	    dyStringAppend(outPath, ".paraFetchStatus");
+	    boolean needsDownload = fileExists(outPath->string);
+	    dyStringResize(outPath, restoreSize);
+	    if (!fileExists(outPath->string))
+		needsDownload = TRUE;
+	    if (needsDownload)
+		{
+		if (!parallelFetch(url->string, outPath->string, numConnections, numRetries))
+		    {
+		    warn("failed to download %s\n", url->string);
+		    // write to a log that this one failed
+		    // and try to continue
+		    result = FALSE;
+		    }
+		else
+		    {
+		    verbose(1,"%s downloaded successfully\n", url->string);
+		    }
+		}
+	    }
+	}
+    
+    dyStringResize(url, saveUrlSize);
+    dyStringResize(outPath, saveOutPathSize);
+
+    }
+
+
+return result;
+
+}
+
+int main(int argc, char *argv[])
+/* Process command line. */
+{
+optionInit(&argc, argv, options);
+if (argc != 5)
+    usage();
+acceptString = optionVal("A", NULL);
+if (acceptString)
+    {
+    acceptExtensionsCount = chopByChar(acceptString, ',', NULL, 0);
+    AllocArray(acceptExtensions, acceptExtensionsCount);
+    chopByChar(acceptString, ',', acceptExtensions, acceptExtensionsCount);
+    verbose(1, "accept-option count: %d\n", acceptExtensionsCount);
+    int i = 0;
+    for(i=0; i<acceptExtensionsCount; ++i) 
+	{
+	verbose(2, "accept-option: %s\n", acceptExtensions[i]);
+	}
+    }
+struct dyString *url = dyStringNew(4096);
+struct dyString *outPath = dyStringNew(4096);
+dyStringAppend(url, argv[3]);
+dyStringAppend(outPath, argv[4]);
+if (!paraSync(atoi(argv[1]), atoi(argv[2]), url, outPath))
+    exit(1);
+return 0;
+}
+