ae1ef6aa226d4436d4dc9e2b9b28120d802ab8a5
jcasper
  Thu Jul 25 11:57:53 2024 -0700
Dropping new straw library files into place, refs #33225

diff --git src/hg/lib/straw/new_cStraw.cpp src/hg/lib/straw/new_cStraw.cpp
deleted file mode 100644
index 019c6d4..0000000
--- src/hg/lib/straw/new_cStraw.cpp
+++ /dev/null
@@ -1,247 +0,0 @@
-
-#include <cstring>
-#include <iostream>
-#include <fstream>
-#include <iostream>
-#include <sstream>
-#include <map>
-#include <set>
-#include <vector>
-#include <streambuf>
-#include "zlib.h"
-#include "new_straw.h"
-using namespace std;
-
-// Supplementary functions for invoking Straw in C
-
-struct Straw {
-    string *fileName;
-    string *genome;
-    vector<string> chromNames;
-    vector<int> chromSizes;
-    int nChroms;
-    vector<int> bpResolutions;
-    int nBpRes;
-    vector<int> fragResolutions;
-    int nFragRes;
-    vector<string> attributes;
-    int nAttributes;
-    set<string> normOptions;
-};
-
-
-extern "C" char *cStrawOpen(char *fname, Straw **p)
-/* Create a Straw object based on the hic file at the provided path and set *p to point to it.
- * On error, set *p = NULL and return a non-null string describing the error. */
-{
-    *p = (Straw*) calloc (1, sizeof(struct Straw));
-
-    (*p)->fileName = new string(fname);
-    (*p)->genome = new string();
-    try {
-        getHeaderFields(fname, *((*p)->genome), (*p)->chromNames, (*p)->chromSizes, (*p)->bpResolutions,
-                (*p)->fragResolutions, (*p)->attributes);
-    } catch (strawException& err) {
-      char *errMsg = (char*) calloc((size_t) strlen(err.what())+1, sizeof(char));
-      strcpy(errMsg, err.what());
-      free(*p);
-      *p = NULL;
-      return errMsg;
-    }
-    (*p)->nChroms = (*p)->chromNames.size();
-    (*p)->nBpRes = (*p)->bpResolutions.size();
-    (*p)->nFragRes = (*p)->fragResolutions.size();
-    (*p)->nAttributes = (*p)->attributes.size();
-
-    // I seem to recall situations where getHeaderFields doesn't throw an error, but the data are
-    // bad regardless (e.g. when the structure of a header changed between .hic versions).  This
-    // should help catch those.
-    if ((*p)->nBpRes == 0)
-    {
-        char *errString = (char*) malloc (strlen("Unable to retrieve header data from file") + 1);
-        strcpy(errString, "Unable to retrieve header data from file");
-        return errString;
-    }
-
-    // Time to get the normalization options.  As a hack, we get these by making a dummy data request,
-    // having that set up a global variable with the options seen, then retrieving that.  This will
-    // miss situations where different normalization options are available at different resolutions.
-    // That is a thing that can happen, but I haven't tested the performace hit of running through every
-    // available resolution for the possible options just yet (let alone restructuring the browser side
-    // of the code to support that kind of dependency).  In the interim, this method is still better
-    // than the previous strategy of hard-coding in options that sometimes aren't available (and missing
-    // ones that are).
- 
-    vector<contactRecord> strawRecords;
-    try {
-        // using chrom[1] because [0] is always "All", which doesn't seem to play well because it
-        // may have a different set of resolution options
-        string chrPos = (*p)->chromNames[1] + ":1:1";
-        // Intentionally feed straw an empty normalization option.  This will cause an error (which we trap),
-        // but it's the easiest way to make the library load and compare the available options (the library
-        // short-circuits out early if "NONE" is provided).
-        strawRecords = straw("observed", "", string(fname),
-            chrPos, chrPos, "BP", (*p)->bpResolutions[0]);
-    } catch (strawException& err) {
-        // Do nothing - we're intentionally feeding it a bad norm option just so it'll go through the list
-        // and realize it can't find it, populating a list along the way.
-    }
-
-    // Now the list that getNormOptions() depends on should be populated
-    (*p)->normOptions = getNormOptions();
-
-    return NULL;
-}
-
-extern "C" void cStrawClose(Straw **hicFile)
-/* Free up a straw object created with cStrawOpen() */
-{
-    if (*hicFile != NULL)
-    {
-        delete (*hicFile)->genome;
-        (*hicFile)->chromNames.clear();
-        (*hicFile)->chromSizes.clear();
-        (*hicFile)->bpResolutions.clear();
-        (*hicFile)->fragResolutions.clear();
-        (*hicFile)->attributes.clear();
-        (*hicFile)->normOptions.clear();
-    }
-    free(*hicFile);
-    *hicFile = NULL;
-}
-
-extern "C" char *cStraw (Straw *hicFile, char *norm, int binsize, char *chr1loc, char *chr2loc, char *unit, int **xActual, int **yActual, double **counts, int *numRecords)
-/* Wrapper function to retrieve a data chunk from a .hic file, for use by C libraries.
- * norm is probably one of NONE/VC/VC_SQRT/KR/SCALE, but it depends on the file.
- * binsize is one of the supported bin sizes as determined by cStrawHeader.
- * chr1loc and chr2loc are the two positions to retrieve interaction data for, specified as chr:start-end.
- * unit is one of BP/FRAG.
- * Values are returned in newly allocated arrays in xActual, yActual, and counts, with the number of
- * returned records in numRecords.
- * The function returns NULL unless an error was encountered, in which case the return value points
- * to a character string explaining the error. */
-{
-    string thisnorm(norm);
-    string thischr1loc(chr1loc);
-    string thischr2loc(chr2loc);
-    string thisunit(unit);
-    vector<contactRecord> strawRecords;
-    try {
-    strawRecords = straw("observed",  thisnorm, *(hicFile->fileName),
-            thischr1loc, thischr2loc, unit, binsize);
-    } catch (strawException& err) {
-      char *errMsg = (char*) calloc((size_t) strlen(err.what())+1, sizeof(char));
-      strcpy(errMsg, err.what());
-      return errMsg;
-    }
-    *numRecords = strawRecords.size();
-    *xActual = (int*) calloc((size_t) *numRecords, sizeof(int));
-    *yActual = (int*) calloc((size_t) *numRecords, sizeof(int));
-    *counts = (double*) calloc((size_t) *numRecords, sizeof(double));
-    for (int i=0; i<*numRecords; i++)
-        {
-        (*xActual)[i] = strawRecords[i].binX;
-        (*yActual)[i] = strawRecords[i].binY;
-        (*counts)[i] = (double) strawRecords[i].counts;
-        }
-    return NULL;
-}
-
-
-extern "C" char *cStrawHeader (Straw *hicFile, char **genome, char ***chromNames, int **chromSizes, int *nChroms,
-        char ***bpResolutions, int *nBpRes, char ***fragResolutions, int *nFragRes, char ***attributes,
-        int *nAttributes, char ***normOptions, int* nNormOptions)
-/* Wrapper function to retrieve header fields from a .hic file, for use by C libraries.
- * This retrieves the assembly name, list of chromosome names, list of available binsize resolutions,
- * the list of available fragment resolutions in the specific .hic file, and a list of available
- * normalization options.  Technically the normalization options are supposed to be resolution-specific,
- * but we're not ready for a redesign in that direction just yet.
- * The function returns NULL unless an error was encountered, in which case the return value points
- * to a character string explaining the error. */
-{
-    if (genome != NULL)
-    {
-        *genome = (char*) malloc((hicFile->genome->length()+1)*sizeof(char));
-        strcpy(*genome, hicFile->genome->c_str());
-    }
-    if (nChroms != NULL)
-    {
-        *nChroms = hicFile->nChroms;
-    }
-    if (chromNames != NULL)
-    {
-        *chromNames = (char**) calloc((size_t) hicFile->chromNames.size(), sizeof(char*));
-        for (int i=0; i<hicFile->chromNames.size(); i++)
-        {
-            (*chromNames)[i] = (char*) malloc((hicFile->chromNames[i].length()+1)*sizeof(char));
-            strcpy((*chromNames)[i], hicFile->chromNames[i].c_str());
-        }
-    }
-    if (chromSizes != NULL)
-    {
-        *chromSizes = (int*) calloc((size_t) hicFile->chromSizes.size(), sizeof(int));
-        for (int i=0; i<hicFile->chromSizes.size(); i++)
-        {
-            (*chromSizes)[i] = hicFile->chromSizes[i];
-        }
-    }
-    if (nBpRes != NULL)
-    {
-        *nBpRes = hicFile->nBpRes;
-    }
-    if (bpResolutions != NULL)
-    {
-        *bpResolutions = (char**) calloc((size_t) hicFile->bpResolutions.size(), sizeof(char*));
-        for (int i=0; i<hicFile->bpResolutions.size(); i++)
-        {
-            (*bpResolutions)[i] = (char*) malloc((to_string(hicFile->bpResolutions[i]).length()+1)*sizeof(char));
-            strcpy((*bpResolutions)[i], to_string(hicFile->bpResolutions[i]).c_str());
-        }
-    }
-    // skipping fragment stuff for now; we don't use it
-    if (nFragRes != NULL && false)
-    {
-        *nFragRes = hicFile->nFragRes;
-    }
-    // skipping fragment stuff for now; we don't use it
-    if (fragResolutions != NULL && false)
-    {
-        *fragResolutions = (char**) calloc((size_t) hicFile->fragResolutions.size(), sizeof(char*));
-        for (int i=0; i<hicFile->fragResolutions.size(); i++)
-        {
-            (*fragResolutions)[i] = (char*) malloc((to_string(hicFile->fragResolutions[i]).length()+1)*sizeof(char));
-            strcpy((*fragResolutions)[i], to_string(hicFile->fragResolutions[i]).c_str());
-        }
-    }
-    if (nAttributes != NULL)
-    {
-        *nAttributes = hicFile->nAttributes;
-    }
-    if (attributes != NULL)
-    {
-        *attributes = (char**) calloc((size_t) hicFile->attributes.size(), sizeof(char*));
-        for (int i=0; i<hicFile->attributes.size(); i++)
-        {
-            (*attributes)[i] = (char*) malloc((hicFile->attributes[i].length()+1)*sizeof(char));
-            strcpy((*attributes)[i], hicFile->attributes[i].c_str());
-        }
-    }
-    if (nNormOptions != NULL)
-    {
-        // Include one extra for "NONE", which doesn't appear in normOptions
-        *nNormOptions = hicFile->normOptions.size()+1;
-    }
-    if (normOptions != NULL)
-    {
-        *normOptions = (char**) calloc((size_t) hicFile->normOptions.size()+1, sizeof(char*));
-        (*normOptions)[0] = (char*) "NONE";
-        int i = 1;
-        std::set<string>::iterator it = hicFile->normOptions.begin();
-        for (; it != hicFile->normOptions.end(); it++, i++)
-        {
-            (*normOptions)[i] = (char*) malloc(((*it).length()+1)*sizeof(char));
-            strcpy((*normOptions)[i], (*it).c_str());
-        }
-    }
-    return NULL;
-}