032653c06fe43263c625613a538389fc2425976b
jcasper
  Tue Jul 16 17:16:51 2024 -0700
Updated hic straw library adapted for UCSC, with a udc-enabled curl substitute.  refs #33225

diff --git src/hg/lib/fakeCurl.c src/hg/lib/fakeCurl.c
new file mode 100644
index 0000000..360e453
--- /dev/null
+++ src/hg/lib/fakeCurl.c
@@ -0,0 +1,168 @@
+/* FakeCurl - replacements for a handful of functions in the Curl library.  Sometimes we want to use
+ * external code blocks or libraries that rely on the curl library, but we want to replace those
+ * calls with our * own UDC local-caching system without needing to significantly modify those
+ * calls.  This provides a drop-in replacement that effectively implements (a small subset of) the
+ * curl library functions using UDC.
+ * Note that user-agent and redirect-following settings are ignored here - UDC doesn't currently
+ * support user-agent strings in requests and always follows redirects.
+ */
+
+/* Copyright (C) 2024 The Regents of the University of California
+ * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */
+
+#include "common.h"
+#include "udc.h"
+#include "fakeCurl.h"
+
+CURL *curl_easy_init(void)
+/* Create a new fakeCurl object.  Dispose of this with curl_easy_cleanup().
+ */
+{
+    CURL *new = (CURL*) malloc (sizeof(CURL));
+    new->url = NULL;
+    new->range = NULL;
+    new->writeBuffer = NULL;
+    new->WriteFunction = NULL;
+    new->HeaderFunction = NULL;
+    return new;
+}
+
+void curl_easy_cleanup(CURL *curl)
+/* Free a curl object allocated with curl_easy_init().  Do not attempt
+ * to use the pointer's value after calling this function on it.
+ */
+{
+    if (curl != NULL)
+    {
+        // clear the local copies of URL strings
+        if (curl->url)
+            free(curl->url);
+        if (curl->range)
+            free(curl->range);
+        free(curl);
+    }
+}
+
+
+CURLcode curl_easy_setopt(CURL *curl, CURLoption option, ...)
+/* Configure a variety of options on a CURL object.  This supports a very small subset
+ * of the options provided by the real curl library - the supported list right now is just:
+ * CURLOPT_WRITEDATA, CURLOPT_RANGE, CURLOPT_WRITEFUNCTION, CURLOPT_HEADERFUNCTION, and
+ * CURLOPT_URL.
+ *
+ * The header function will be invoked, but only on a faked subset of what the actual
+ * header content would be - just a content range string.
+ *
+ * The CURLOPT_FOLLOWLOCATION and CURLOPT_USERAGENT settings are also accepted, but
+ * the values are ignored (the udc implementation here always follows redirects and lacks
+ * support for user-agent strings).
+ */
+{
+    va_list args;
+    va_start(args, option);
+    switch (option) {
+        case CURLOPT_WRITEDATA:
+            curl->writeBuffer = va_arg(args, void *);
+            break;
+        case CURLOPT_RANGE:
+            if (curl->range)
+                free(curl->range);
+            curl->range = cloneString(va_arg(args,char *));
+            break;
+        case CURLOPT_WRITEFUNCTION:
+            curl->WriteFunction = va_arg(args, curl_write_callback);
+            break;
+        case CURLOPT_URL:
+            if (curl->url)
+                free(curl->url);
+            curl->url = cloneString(va_arg(args,char *));
+            break;
+        case CURLOPT_FOLLOWLOCATION:
+            // ignored
+            break;
+        case CURLOPT_USERAGENT:
+            // ignored
+            break;
+        case CURLOPT_HEADERFUNCTION:
+            curl->HeaderFunction = va_arg(args, curl_write_callback);
+            break;
+        default:
+            errAbort("Unexpected curl option supplied to fakeCurl"); 
+    }
+    va_end(args); 
+    return CURLE_OK;
+}
+
+CURLcode curl_easy_perform(CURL *curl)
+/* Perform a fake curl operation via UDC, using the settings establised in the CURL object
+ * via calls to curl_easy_setopt().  The return value will be either CURLE_OK (for success)
+ * or CURLE_NOTOK (for failure).
+ *
+ * As noted in curl_easy_setopt(), the content provided to any supplied header function is
+ * a faked subset of actual header content - just a "Content-Range" string.
+ */
+{
+    // Open the file using UDC, returning an error if that fails
+    struct udcFile *udc = udcFileMayOpen(curl->url, NULL);
+    if (udc == NULL)
+        return CURLE_NOTOK;
+    long fileSize = (long) udcFileSize(curl->url);
+
+    // Set up the seek offset if there's a range supplied
+    long start = 0;
+    long end = fileSize;
+    if (curl->range != NULL)
+    {
+        start = atol(curl->range);
+        char *end_pos = strrchr(curl->range, '-');
+        if (end_pos != NULL && *(end_pos+1) != 0)
+            end = atol(end_pos+1);
+    }
+
+    // If there's a header function, fake up a Content-Range string for it to parse using the range
+    // and file size.
+    if (curl->HeaderFunction != NULL)
+    {
+        char buf[4096];
+        safef(buf, sizeof(buf), "Content-Range: bytes %ld-%ld/%ld", start, end, fileSize);
+        curl->HeaderFunction(buf, strlen(buf), 1, NULL);
+    }
+
+    char *readBuffer = (char*) malloc(end-start);
+    udcSeek(udc, start);
+    long bytesRead = udcRead(udc, readBuffer, end-start);
+    // Technically we should pay attention to the value returned by udcRead, as it might indicate
+    // that fewer bytes than requested were actually read.  Worry about that a bit later.
+
+    // If writefunction is defined, then call that on the buffer of data we got from udc.
+    // Otherwise, put it into the supplied writebuffer (which must exist).
+    if (curl->WriteFunction != NULL)
+    {
+        curl->WriteFunction(readBuffer, bytesRead, 1, curl->writeBuffer);
+    }
+    else
+    {
+        if (curl->writeBuffer == NULL)
+            errAbort("Attempting to fakeCurl fetch without specifying a write buffer first");
+        fwrite(readBuffer, bytesRead, 1, curl->writeBuffer);
+    }
+    free(readBuffer);
+
+    return CURLE_OK;
+}
+
+
+const char *curl_easy_strerror(CURLcode errornum)
+/* This converts a CURLcode error code into an associated error message string.  As this
+ * is a very barebones implementation, the only options are "Ok" for CURLE_OK and
+ * "fakeCurl failed" for CURLE_NOTOK.
+ */
+{
+    switch (errornum) {
+        case CURLE_OK:
+            return "Ok";
+            break;
+        default:
+            return "fakeCurl failed";
+    }
+}