032653c06fe43263c625613a538389fc2425976b
jcasper
  Tue Jul 16 17:16:51 2024 -0700
Updated hic straw library adapted for UCSC, with a udc-enabled curl substitute.  refs #33225

diff --git src/hg/lib/straw/new_cStraw.cpp src/hg/lib/straw/new_cStraw.cpp
new file mode 100644
index 0000000..2242452
--- /dev/null
+++ src/hg/lib/straw/new_cStraw.cpp
@@ -0,0 +1,192 @@
+
+#include <cstring>
+#include <iostream>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <map>
+#include <set>
+#include <vector>
+#include <streambuf>
+#include "zlib.h"
+#include "new_straw.h"
+using namespace std;
+
+// Supplementary functions for invoking Straw in C
+
+struct Straw {
+    string *fileName;
+    string *genome;
+    vector<string> chromNames;
+    vector<int> chromSizes;
+    int nChroms;
+    vector<int> bpResolutions;
+    int nBpRes;
+    vector<int> fragResolutions;
+    int nFragRes;
+    vector<string> attributes;
+    int nAttributes;
+};
+
+
+extern "C" char *cStrawOpen(char *fname, Straw **p)
+/* Create a Straw object based on the hic file at the provided path and set *p to point to it.
+ * On error, set *p = NULL and return a non-null string describing the error. */
+{
+    *p = (Straw*) calloc (1, sizeof(struct Straw));
+
+    (*p)->fileName = new string(fname);
+    (*p)->genome = new string();
+    string genome;
+    try {
+        getHeaderFields(fname, genome, (*p)->chromNames, (*p)->chromSizes, (*p)->bpResolutions,
+                (*p)->fragResolutions, (*p)->attributes);
+    } catch (strawException& err) {
+      char *errMsg = (char*) calloc((size_t) strlen(err.what())+1, sizeof(char));
+      strcpy(errMsg, err.what());
+      free(*p);
+      *p = NULL;
+      return errMsg;
+    }
+    (*p)->genome->assign(genome);
+    (*p)->nChroms = (*p)->chromNames.size();
+    (*p)->nBpRes = (*p)->bpResolutions.size();
+    (*p)->nFragRes = (*p)->fragResolutions.size();
+    (*p)->nAttributes = (*p)->attributes.size();
+    if ((*p)->nBpRes > 0)
+        return NULL;
+    char *errString = (char*) malloc (strlen("Unable to retrieve header data from file") + 1);
+    strcpy(errString, "Unable to retrieve header data from file");
+    return errString;
+}
+
+extern "C" void cStrawClose(Straw **hicFile)
+/* Free up a straw object created with cStrawOpen() */
+{
+    if (*hicFile != NULL)
+    {
+        delete (*hicFile)->genome;
+        (*hicFile)->chromNames.clear();
+        (*hicFile)->chromSizes.clear();
+        (*hicFile)->bpResolutions.clear();
+        (*hicFile)->fragResolutions.clear();
+        (*hicFile)->attributes.clear();
+    }
+    free(*hicFile);
+    *hicFile = NULL;
+}
+
+extern "C" char *cStraw (Straw *hicFile, char *norm, int binsize, char *chr1loc, char *chr2loc, char *unit, int **xActual, int **yActual, double **counts, int *numRecords)
+/* Wrapper function to retrieve a data chunk from a .hic file, for use by C libraries.
+ * norm is one of NONE/VC/VC_SQRT/KR.
+ * binsize is one of the supported bin sizes as determined by cStrawHeader.
+ * chr1loc and chr2loc are the two positions to retrieve interaction data for, specified as chr:start-end.
+ * unit is one of BP/FRAG.
+ * Values are returned in newly allocated arrays in xActual, yActual, and counts, with the number of
+ * returned records in numRecords.
+ * The function returns NULL unless an error was encountered, in which case the return value points
+ * to a character string explaining the error. */
+{
+    string thisnorm(norm);
+    string thischr1loc(chr1loc);
+    string thischr2loc(chr2loc);
+    string thisunit(unit);
+    vector<contactRecord> strawRecords;
+    try {
+    strawRecords = straw("observed",  thisnorm, *(hicFile->fileName),
+            thischr1loc, thischr2loc, unit, binsize);
+    } catch (strawException& err) {
+      char *errMsg = (char*) calloc((size_t) strlen(err.what())+1, sizeof(char));
+      strcpy(errMsg, err.what());
+      return errMsg;
+    }
+    *numRecords = strawRecords.size();
+    *xActual = (int*) calloc((size_t) *numRecords, sizeof(int));
+    *yActual = (int*) calloc((size_t) *numRecords, sizeof(int));
+    *counts = (double*) calloc((size_t) *numRecords, sizeof(double));
+    for (int i=0; i<*numRecords; i++)
+        {
+        (*xActual)[i] = strawRecords[i].binX;
+        (*yActual)[i] = strawRecords[i].binY;
+        (*counts)[i] = (double) strawRecords[i].counts;
+        }
+    return NULL;
+}
+
+
+extern "C" char *cStrawHeader (Straw *hicFile, char **genome, char ***chromNames, int **chromSizes, int *nChroms, char ***bpResolutions, int *nBpRes, char ***fragResolutions, int *nFragRes, char ***attributes, int *nAttributes)
+/* Wrapper function to retrieve header fields from a .hic file, for use by C libraries.
+ * This retrieves the assembly name, list of chromosome names, list of available binsize resolutions,
+ * and list of available fragment resolutions in the specific .hic file.
+ * The function returns NULL unless an error was encountered, in which case the return value points
+ * to a character string explaining the error. */
+{
+    if (genome != NULL)
+    {
+        *genome = (char*) malloc((hicFile->genome->length()+1)*sizeof(char));
+        strcpy(*genome, hicFile->genome->c_str());
+    }
+    if (nChroms != NULL)
+    {
+        *nChroms = hicFile->nChroms;
+    }
+    if (chromNames != NULL)
+    {
+        *chromNames = (char**) calloc((size_t) hicFile->chromNames.size(), sizeof(char*));
+        for (int i=0; i<hicFile->chromNames.size(); i++)
+        {
+            (*chromNames)[i] = (char*) malloc((hicFile->chromNames[i].length()+1)*sizeof(char));
+            strcpy((*chromNames)[i], hicFile->chromNames[i].c_str());
+        }
+    }
+    if (chromSizes != NULL)
+    {
+        *chromSizes = (int*) calloc((size_t) hicFile->chromSizes.size(), sizeof(int));
+        for (int i=0; i<hicFile->chromSizes.size(); i++)
+        {
+            (*chromSizes)[i] = hicFile->chromSizes[i];
+        }
+    }
+    if (nBpRes != NULL)
+    {
+        *nBpRes = hicFile->nBpRes;
+    }
+    if (bpResolutions != NULL)
+    {
+        *bpResolutions = (char**) calloc((size_t) hicFile->bpResolutions.size(), sizeof(char*));
+        for (int i=0; i<hicFile->bpResolutions.size(); i++)
+        {
+            (*bpResolutions)[i] = (char*) malloc((to_string(hicFile->bpResolutions[i]).length()+1)*sizeof(char));
+            strcpy((*bpResolutions)[i], to_string(hicFile->bpResolutions[i]).c_str());
+        }
+    }
+    // skipping fragment stuff for now
+    if (nFragRes != NULL && false)
+    {
+        *nFragRes = hicFile->nFragRes;
+    }
+    // skipping fragment stuff for now
+    if (fragResolutions != NULL && false)
+    {
+        *fragResolutions = (char**) calloc((size_t) hicFile->fragResolutions.size(), sizeof(char*));
+        for (int i=0; i<hicFile->fragResolutions.size(); i++)
+        {
+            (*fragResolutions)[i] = (char*) malloc((to_string(hicFile->fragResolutions[i]).length()+1)*sizeof(char));
+            strcpy((*fragResolutions)[i], to_string(hicFile->fragResolutions[i]).c_str());
+        }
+    }
+    if (nAttributes != NULL)
+    {
+        *nAttributes = hicFile->nAttributes;
+    }
+    if (attributes != NULL)
+    {
+        *attributes = (char**) calloc((size_t) hicFile->attributes.size(), sizeof(char*));
+        for (int i=0; i<hicFile->attributes.size(); i++)
+        {
+            (*attributes)[i] = (char*) malloc((hicFile->attributes[i].length()+1)*sizeof(char));
+            strcpy((*attributes)[i], hicFile->attributes[i].c_str());
+        }
+    }
+    return NULL;
+}