src/hg/lib/straw/new_straw.cpp ae1ef6aa226d4436d4dc9e2b9b28120d802ab8a5

ae1ef6aa226d4436d4dc9e2b9b28120d802ab8a5
jcasper
  Thu Jul 25 11:57:53 2024 -0700
Dropping new straw library files into place, refs #33225

diff --git src/hg/lib/straw/new_straw.cpp src/hg/lib/straw/new_straw.cpp
deleted file mode 100644
index c7428df..0000000
--- src/hg/lib/straw/new_straw.cpp
+++ /dev/null
@@ -1,1755 +0,0 @@
-/*
-  The MIT License (MIT)
-
-  Copyright (c) 2017-2021 Aiden Lab, Rice University, Baylor College of Medicine
-
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
-
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
-*/
-#include <cstring>
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <map>
-#include <cmath>
-#include <set>
-#include <utility>
-#include <vector>
-#include <streambuf>
-//#include <curl/curl.h>
-#include <iterator>
-#include <algorithm>
-#include "zlib.h"
-//#include "straw.h"
-#include "new_straw.h"
-extern "C" {
-#include "../../inc/fakeCurl.h"
-}
-
-using namespace std;
-
-
-// Added UCSC: Dirty hack, but it's hard to quickly extract a list of available normalization options.
-// This gets populated by readFooter and readFooterURL, so you have to make a quick dummy data request
-// with straw() to set it up (e.g. to the first listed chromosome for a 1 bp window).
-set<string> globalNormOptions;
-
-/*
-  Straw: fast C++ implementation of dump. Not as fully featured as the
-  Java version. Reads the .hic file, finds the appropriate matrix and slice
-  of data, and outputs as text in sparse upper triangular format.
-
-  Currently only supporting matrices.
-
-  Usage: straw [observed/oe/expected] <NONE/VC/VC_SQRT/KR> <hicFile(s)> <chr1>[:x1:x2] <chr2>[:y1:y2] <BP/FRAG> <binsize>
- */
-
-// callback for libcurl. data written to this buffer
-static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) {
-    size_t realsize = size * nmemb;
-    struct MemoryStruct *mem;
-    mem = (struct MemoryStruct *) userp;
-
-    mem->memory = static_cast<char *>(realloc(mem->memory, mem->size + realsize + 1));
-    if (mem->memory == nullptr) {
-        /* out of memory! */
-        printf("not enough memory (realloc returned NULL)\n");
-        return 0;
-    }
-
-    std::memcpy(&(mem->memory[mem->size]), contents, realsize);
-    mem->size += realsize;
-    mem->memory[mem->size] = 0;
-
-    return realsize;
-}
-
-// get a buffer that can be used as an input stream from the URL
-char *getData(CURL *curl, int64_t position, int64_t chunksize) {
-    std::ostringstream oss;
-    struct MemoryStruct chunk{};
-    chunk.memory = static_cast<char *>(malloc(1));
-    chunk.size = 0;    /* no data at this point */
-    oss << position << "-" << position + chunksize;
-    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) &chunk);
-    curl_easy_setopt(curl, CURLOPT_RANGE, oss.str().c_str());
-    // Setting FAILONERROR because otherwise this library has a bad habit of assuming data exists
-    // and parsing what it gets back, regardless of whether it has run off the end of the file.
-    curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
-    CURLcode res = curl_easy_perform(curl);
-    if (res != CURLE_OK) {
-        throw strawException("curl_easy_perform() failed: " + string(curl_easy_strerror(res)));
-        //fprintf(stderr, "curl_easy_perform() failed: %s\n",
-        //        curl_easy_strerror(res));
-    }
-
-    return chunk.memory;
-}
-
-bool readMagicString(istream &fin) {
-    string str;
-    getline(fin, str, '\0');
-    return str[0] == 'H' && str[1] == 'I' && str[2] == 'C';
-}
-
-char readCharFromFile(istream &fin) {
-    char tempChar;
-    fin.read(&tempChar, sizeof(char));
-    return tempChar;
-}
-
-int16_t readInt16FromFile(istream &fin) {
-    int16_t tempInt16;
-    fin.read((char *) &tempInt16, sizeof(int16_t));
-    return tempInt16;
-}
-
-int32_t readInt32FromFile(istream &fin) {
-    int32_t tempInt32;
-    fin.read((char *) &tempInt32, sizeof(int32_t));
-    return tempInt32;
-}
-
-int64_t readInt64FromFile(istream &fin) {
-    int64_t tempInt64;
-    fin.read((char *) &tempInt64, sizeof(int64_t));
-    return tempInt64;
-}
-
-float readFloatFromFile(istream &fin) {
-    float tempFloat;
-    fin.read((char *) &tempFloat, sizeof(float));
-    return tempFloat;
-}
-
-double readDoubleFromFile(istream &fin) {
-    double tempDouble;
-    fin.read((char *) &tempDouble, sizeof(double));
-    return tempDouble;
-}
-
-void convertGenomeToBinPos(const int64_t origRegionIndices[4], int64_t regionIndices[4], int32_t resolution) {
-    for (uint16_t q = 0; q < 4; q++) {
-        // used to find the blocks we need to access
-        regionIndices[q] = origRegionIndices[q] / resolution;
-    }
-}
-
-static CURL *initCURL(const char *url) {
-    CURL *curl = curl_easy_init();
-    if (curl) {
-        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
-        curl_easy_setopt(curl, CURLOPT_URL, url);
-        curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
-        curl_easy_setopt(curl, CURLOPT_USERAGENT, "straw");
-        curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
-    } else {
-        throw strawException("Unable to initialize curl");
-        //cerr << "Unable to initialize curl " << endl;
-        //exit(2);
-    }
-    return curl;
-}
-
-class HiCFileStream {
-public:
-    string prefix = "http"; // HTTP code
-    ifstream fin;
-    CURL *curl;
-    bool isHttp = false;
-
-    explicit HiCFileStream(const string &fileName) {
-        if (std::strncmp(fileName.c_str(), prefix.c_str(), prefix.size()) == 0) {
-            isHttp = true;
-            curl = initCURL(fileName.c_str());
-            if (!curl) {
-                throw strawException("URL " + fileName + " cannot be opened for reading");
-                //cerr << "URL " << fileName << " cannot be opened for reading" << endl;
-                //exit(3);
-            }
-        } else {
-            fin.open(fileName, fstream::in | fstream::binary);
-            if (!fin) {
-                throw strawException("File " + fileName + " cannot be opened for reading");
-                //cerr << "File " << fileName << " cannot be opened for reading" << endl;
-                //exit(4);
-            }
-        }
-    }
-
-    void close() {
-        if (isHttp) {
-            curl_easy_cleanup(curl);
-        } else {
-            fin.close();
-        }
-    }
-
-    char *readCompressedBytes(indexEntry idx) {
-        if (isHttp) {
-            return getData(curl, idx.position, idx.size);
-        } else {
-            char *buffer = new char[idx.size];
-            fin.seekg(idx.position, ios::beg);
-            fin.read(buffer, idx.size);
-            return buffer;
-        }
-    }
-};
-
-char *readCompressedBytesFromFile(const string &fileName, indexEntry idx) {
-    HiCFileStream *stream = new HiCFileStream(fileName);
-    char *compressedBytes = stream->readCompressedBytes(idx);
-    stream->close();
-    delete stream;
-    return compressedBytes;
-}
-
-// reads the header, storing the positions of the normalization vectors and returning the masterIndexPosition pointer
-map<string, chromosome> readHeader(istream &fin, int64_t &masterIndexPosition, string &genomeID,
-                                   int32_t &numChromosomes, int32_t &version, int64_t &nviPosition,
-                                   int64_t &nviLength, vector<string> &attributes) {
-    map<string, chromosome> chromosomeMap;
-    if (!readMagicString(fin)) {
-        throw strawException("Hi-C magic string is missing, does not appear to be a hic file");
-        // Not sure why this function was trying to continue after this failure (the next step is
-        // reading resolutions from the file, which clearly isn't going to work).
-        //cerr << "Hi-C magic string is missing, does not appear to be a hic file" << endl;
-        masterIndexPosition = -1;
-        return chromosomeMap;
-    }
-
-    version = readInt32FromFile(fin);
-    if (version < 6) {
-        throw strawException("Version " + to_string(version) + " no longer supported");
-        // Not sure why this function was trying to continue after this failure (the next step is
-        // reading resolutions from the file, which clearly isn't going to work).
-        //cerr << "Version " << version << " no longer supported" << endl;
-        masterIndexPosition = -1;
-        return chromosomeMap;
-    }
-    masterIndexPosition = readInt64FromFile(fin);
-    getline(fin, genomeID, '\0');
-
-    if (version > 8) {
-        nviPosition = readInt64FromFile(fin);
-        nviLength = readInt64FromFile(fin);
-    }
-
-    int32_t nattributes = readInt32FromFile(fin);
-
-    // reading attribute-value dictionary
-    for (int i = 0; i < nattributes; i++) {
-        string key, value;
-        getline(fin, key, '\0');
-        getline(fin, value, '\0');
-        attributes.insert(attributes.end(), key);
-        attributes.insert(attributes.end(), value);
-    }
-
-    numChromosomes = readInt32FromFile(fin);
-    // chromosome map for finding matrixType
-    for (int i = 0; i < numChromosomes; i++) {
-        string name;
-        int64_t length;
-        getline(fin, name, '\0');
-        if (version > 8) {
-            length = readInt64FromFile(fin);
-        } else {
-            length = (int64_t) readInt32FromFile(fin);
-        }
-
-        chromosome chr;
-        chr.index = i;
-        chr.name = name;
-        chr.length = length;
-        chromosomeMap[name] = chr;
-    }
-    return chromosomeMap;
-}
-
-vector<int32_t> readResolutionsFromHeader(istream &fin) {
-    int numBpResolutions = readInt32FromFile(fin);
-    vector<int32_t> resolutions;
-    for (int i = 0; i < numBpResolutions; i++) {
-        int32_t res = readInt32FromFile(fin);
-        resolutions.push_back(res);
-    }
-    return resolutions;
-}
-
-//https://www.techiedelight.com/get-slice-sub-vector-from-vector-cpp/
-vector<double> sliceVector(vector<double> &v, int64_t m, int64_t n) {
-    vector<double> vec;
-    copy(v.begin() + m, v.begin() + n + 1, back_inserter(vec));
-    return vec;
-}
-
-// assume always an odd number for length of vector;
-// eve if even, this calculation should be close enough
-double getMedian(vector<double> &v) {
-    size_t n = v.size() / 2;
-    nth_element(v.begin(), v.begin() + n, v.end());
-    return v[n];
-}
-
-void rollingMedian(vector<double> &initialValues, vector<double> &finalResult, int32_t window) {
-    // window is actually a ~wing-span
-    if (window < 1) {
-        finalResult = initialValues;
-        return;
-    }
-
-    /*
-    finalResult.push_back(initialValues[0]);
-    int64_t length = initialValues.size();
-    for (int64_t index = 1; index < length; index++) {
-        int64_t initialIndex;
-        int64_t finalIndex;
-        if (index < window) {
-            initialIndex = 0;
-            finalIndex = 2 * index;
-        } else {
-            initialIndex = index - window;
-            finalIndex = index + window;
-        }
-
-        if (finalIndex > length - 1) {
-            finalIndex = length - 1;
-        }
-
-        vector<double> subVector = sliceVector(initialValues, initialIndex, finalIndex);
-        finalResult.push_back(getMedian(subVector));
-    }
-    */
-    finalResult = initialValues;
-}
-
-void populateVectorWithFloats(istream &fin, vector<double> &vector, int64_t nValues) {
-    for (int j = 0; j < nValues; j++) {
-        double v = readFloatFromFile(fin);
-        vector.push_back(v);
-    }
-}
-
-void populateVectorWithDoubles(istream &fin, vector<double> &vector, int64_t nValues) {
-    for (int j = 0; j < nValues; j++) {
-        double v = readDoubleFromFile(fin);
-        vector.push_back(v);
-    }
-}
-
-int64_t readThroughExpectedVectorURL(CURL *curl, int64_t currentPointer, int32_t version, vector<double> &expectedValues, int64_t nValues,
-                               bool store, int32_t resolution) {
-    if (store) {
-        int32_t bufferSize = nValues * sizeof(double) + 10000;
-        if (version > 8) {
-            bufferSize = nValues * sizeof(float) + 10000;
-        }
-        char *buffer = getData(curl, currentPointer, bufferSize);
-        memstream fin(buffer, bufferSize);
-
-        vector<double> initialExpectedValues;
-        if (version > 8) {
-            populateVectorWithFloats(fin, initialExpectedValues, nValues);
-        } else {
-            populateVectorWithDoubles(fin, initialExpectedValues, nValues);
-        }
-        int32_t window = 5000000 / resolution;
-        rollingMedian(initialExpectedValues, expectedValues, window);
-        delete buffer;
-    }
-
-    if (version > 8) {
-        return nValues * sizeof(float);
-    } else {
-        return nValues * sizeof(double);
-    }
-}
-
-void readThroughExpectedVector(int32_t version, istream &fin, vector<double> &expectedValues, int64_t nValues,
-                               bool store, int32_t resolution) {
-    if (store) {
-        vector<double> initialExpectedValues;
-        if (version > 8) {
-            populateVectorWithFloats(fin, initialExpectedValues, nValues);
-        } else {
-            populateVectorWithDoubles(fin, initialExpectedValues, nValues);
-        }
-        int32_t window = 5000000 / resolution;
-        rollingMedian(initialExpectedValues, expectedValues, window);
-    } else if (nValues > 0) {
-        if (version > 8) {
-            fin.seekg(nValues * sizeof(float), ios_base::cur);
-        } else {
-            fin.seekg(nValues * sizeof(double), ios_base::cur);
-        }
-    }
-}
-
-int64_t readThroughNormalizationFactorsURL(CURL *curl, int64_t currentPointer, int32_t version, bool store, vector<double> &expectedValues,
-                                     int32_t c1, int32_t nNormalizationFactors) {
-
-    if (store) {
-        int32_t bufferSize = nNormalizationFactors * (sizeof(int32_t) + sizeof(double)) + 10000;
-        if (version > 8) {
-            bufferSize = nNormalizationFactors * (sizeof(int32_t) + sizeof(float )) + 10000;
-        }
-        char *buffer = getData(curl, currentPointer, bufferSize);
-        memstream fin(buffer, bufferSize);
-
-        for (int j = 0; j < nNormalizationFactors; j++) {
-            int32_t chrIdx = readInt32FromFile(fin);
-            double v;
-            if (version > 8) {
-                v = readFloatFromFile(fin);
-            } else {
-                v = readDoubleFromFile(fin);
-            }
-            if (chrIdx == c1) {
-                for (double &expectedValue : expectedValues) {
-                    expectedValue = expectedValue / v;
-                }
-            }
-        }
-        delete buffer;
-    }
-
-    if (version > 8) {
-        return nNormalizationFactors * (sizeof(int32_t) + sizeof(float));
-    } else {
-        return nNormalizationFactors * (sizeof(int32_t) + sizeof(double));
-    }
-}
-
-void readThroughNormalizationFactors(istream &fin, int32_t version, bool store, vector<double> &expectedValues,
-                                     int32_t c1) {
-    int32_t nNormalizationFactors = readInt32FromFile(fin);
-    if (store) {
-        for (int j = 0; j < nNormalizationFactors; j++) {
-            int32_t chrIdx = readInt32FromFile(fin);
-            double v;
-            if (version > 8) {
-                v = readFloatFromFile(fin);
-            } else {
-                v = readDoubleFromFile(fin);
-            }
-            if (chrIdx == c1) {
-                for (double &expectedValue : expectedValues) {
-                    expectedValue = expectedValue / v;
-                }
-            }
-        }
-    } else if (nNormalizationFactors > 0) {
-        if (version > 8) {
-            fin.seekg(nNormalizationFactors * (sizeof(int32_t) + sizeof(float)), ios_base::cur);
-        } else {
-            fin.seekg(nNormalizationFactors * (sizeof(int32_t) + sizeof(double)), ios_base::cur);
-        }
-    }
-}
-
-int64_t readStringFromURL(istream &fin, string &basicString) {
-    getline(fin, basicString, '\0');
-    return (basicString.length() + 1);
-}
-
-// reads the footer from the master pointer location. takes in the chromosomes,
-// norm, unit (BP or FRAG) and resolution or binsize, and sets the file
-// position of the matrix and the normalization vectors for those chromosomes
-// at the given normalization and resolution
-bool readFooterURL(CURL *curl, int64_t master, int32_t version, int32_t c1, int32_t c2, const string &matrixType,
-                const string &norm, const string &unit, int32_t resolution, int64_t &myFilePos,
-                indexEntry &c1NormEntry, indexEntry &c2NormEntry, vector<double> &expectedValues) {
-
-    int64_t currentPointer = master;
-
-    char *buffer = getData(curl, currentPointer, 100);
-    memstream fin1(buffer, 100);
-
-    if (version > 8) {
-        int64_t nBytes = readInt64FromFile(fin1);
-        currentPointer += 8;
-    } else {
-        int32_t nBytes = readInt32FromFile(fin1);
-        currentPointer += 4;
-    }
-
-    stringstream ss;
-    ss << c1 << "_" << c2;
-    string key = ss.str();
-
-    int32_t nEntries = readInt32FromFile(fin1);
-    currentPointer += 4;
-    delete buffer;
-
-    int32_t bufferSize0 = nEntries * 50;
-    buffer = getData(curl, currentPointer, bufferSize0);
-    memstream fin2(buffer, bufferSize0);
-
-    bool found = false;
-    for (int i = 0; i < nEntries; i++) {
-        string keyStr;
-        currentPointer += readStringFromURL(fin2, keyStr);
-
-        int64_t fpos = readInt64FromFile(fin2);
-        int32_t sizeinbytes = readInt32FromFile(fin2);
-        currentPointer += 12;
-        if (keyStr == key) {
-            myFilePos = fpos;
-            found = true;
-        }
-    }
-    delete buffer;
-    if (!found) {
-        cerr << "Remote file doesn't have the given chr_chr map " << key << endl;
-        return false;
-    }
-
-    if ((matrixType == "observed" && norm == "NONE") ||
-        ((matrixType == "oe" || matrixType == "expected") && norm == "NONE" && c1 != c2))
-        return true; // no need to read norm vector index
-
-    // read in and ignore expected value maps; don't store; reading these to
-    // get to norm vector index
-    buffer = getData(curl, currentPointer, 100);
-    memstream fin3(buffer, 100);
-
-    int32_t nExpectedValues = readInt32FromFile(fin3);
-    if (fin3.bad() || fin3.fail())
-    {
-        throw strawException("Unable to read expected values from file - it might be truncated early");
-    }
-    currentPointer += 4;
-    delete buffer;
-    for (int i = 0; i < nExpectedValues; i++) {
-
-        buffer = getData(curl, currentPointer, 1000);
-        memstream fin4(buffer, 1000);
-
-        string unit0;
-        currentPointer += readStringFromURL(fin4, unit0);
-
-        int32_t binSize = readInt32FromFile(fin4);
-        currentPointer += 4;
-
-        int64_t nValues;
-        if (version > 8) {
-            nValues = readInt64FromFile(fin4);
-            currentPointer += 8;
-        } else {
-            nValues = (int64_t) readInt32FromFile(fin4);
-            currentPointer += 4;
-        }
-
-        delete buffer;
-
-        bool store = c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm == "NONE" && unit0 == unit &&
-                     binSize == resolution;
-
-        currentPointer += readThroughExpectedVectorURL(curl, currentPointer, version, expectedValues, nValues, store, resolution);
-
-        buffer = getData(curl, currentPointer, 100);
-        memstream fin5(buffer, 100);
-        int32_t nNormalizationFactors = readInt32FromFile(fin5);
-        currentPointer += 4;
-        delete buffer;
-
-        currentPointer += readThroughNormalizationFactorsURL(curl, currentPointer, version, store, expectedValues, c1, nNormalizationFactors);
-    }
-
-    if (c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm == "NONE") {
-        if (expectedValues.empty()) {
-            cerr << "Remote file did not contain expected values vectors at " << resolution << " " << unit << endl;
-            return false;
-        }
-        return true;
-    }
-
-    buffer = getData(curl, currentPointer, 100);
-    memstream fin6(buffer, 100);
-    nExpectedValues = readInt32FromFile(fin6);
-    if (fin6.bad() || fin6.fail())
-    {
-        throw strawException("Unable to read normalized expected values from file - it might be truncated early");
-    }
-    currentPointer += 4;
-    delete buffer;
-    for (int i = 0; i < nExpectedValues; i++) {
-        buffer = getData(curl, currentPointer, 1000);
-        memstream fin7(buffer, 1000);
-
-        string nType, unit0;
-        currentPointer += readStringFromURL(fin7, nType);
-        currentPointer += readStringFromURL(fin7, unit0);
-
-        int32_t binSize = readInt32FromFile(fin7);
-        currentPointer += 4;
-
-        int64_t nValues;
-        if (version > 8) {
-            nValues = readInt64FromFile(fin7);
-            currentPointer += 8;
-        } else {
-            nValues = (int64_t) readInt32FromFile(fin7);
-            currentPointer += 4;
-        }
-        bool store = c1 == c2 && (matrixType == "oe" || matrixType == "expected") && nType == norm && unit0 == unit &&
-                     binSize == resolution;
-
-        delete buffer;
-
-        currentPointer += readThroughExpectedVectorURL(curl, currentPointer, version, expectedValues, nValues, store, resolution);
-
-        buffer = getData(curl, currentPointer, 100);
-        memstream fin8(buffer, 100);
-        int32_t nNormalizationFactors = readInt32FromFile(fin8);
-        currentPointer += 4;
-        delete buffer;
-
-        currentPointer += readThroughNormalizationFactorsURL(curl, currentPointer, version, store, expectedValues, c1, nNormalizationFactors);
-    }
-
-    if (c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm != "NONE") {
-        if (expectedValues.empty()) {
-            cerr << "Remote file did not contain normalized expected values vectors at " << resolution << " " << unit << endl;
-            return false;
-        }
-    }
-
-    buffer = getData(curl, currentPointer, 100);
-    memstream fin9(buffer, 100);
-    nEntries = readInt32FromFile(fin9);
-    if (fin9.bad() || fin9.fail())
-    {
-        throw strawException("Unable to read normalization vectors from file - it might be truncated early");
-    }
-    currentPointer += 4;
-    delete buffer;
-
-    bool found1 = false;
-    bool found2 = false;
-    int32_t bufferSize2 = nEntries*60;
-    buffer = getData(curl, currentPointer, bufferSize2);
-    memstream fin10(buffer, bufferSize2);
-
-    // added UCSC
-    globalNormOptions.clear();
-
-    for (int i = 0; i < nEntries; i++) {
-        string normtype;
-        currentPointer += readStringFromURL(fin10, normtype);
-
-        // added UCSC
-        globalNormOptions.insert(normtype);
-
-        int32_t chrIdx = readInt32FromFile(fin10);
-        currentPointer += 4;
-        string unit1;
-        currentPointer += readStringFromURL(fin10, unit1);
-
-        int32_t resolution1 = readInt32FromFile(fin10);
-        int64_t filePosition = readInt64FromFile(fin10);
-        currentPointer += 12;
-
-        int64_t sizeInBytes;
-        if (version > 8) {
-            sizeInBytes = readInt64FromFile(fin10);
-            currentPointer += 8;
-        } else {
-            sizeInBytes = (int64_t) readInt32FromFile(fin10);
-            currentPointer += 4;
-        }
-
-        if (chrIdx == c1 && normtype == norm && unit1 == unit && resolution1 == resolution) {
-            c1NormEntry.position = filePosition;
-            c1NormEntry.size = sizeInBytes;
-            found1 = true;
-        }
-        if (chrIdx == c2 && normtype == norm && unit1 == unit && resolution1 == resolution) {
-            c2NormEntry.position = filePosition;
-            c2NormEntry.size = sizeInBytes;
-            found2 = true;
-        }
-    }
-    delete buffer;
-    if (!found1 || !found2) {
-        cerr << "Remote file did not contain " << norm << " normalization vectors for one or both chromosomes at "
-             << resolution << " " << unit << endl;
-    }
-    return true;
-}
-
-bool readFooter(istream &fin, int64_t master, int32_t version, int32_t c1, int32_t c2, const string &matrixType,
-                const string &norm, const string &unit, int32_t resolution, int64_t &myFilePos,
-                indexEntry &c1NormEntry, indexEntry &c2NormEntry, vector<double> &expectedValues) {
-
-    if (version > 8) {
-        int64_t nBytes = readInt64FromFile(fin);
-    } else {
-        int32_t nBytes = readInt32FromFile(fin);
-    }
-
-    stringstream ss;
-    ss << c1 << "_" << c2;
-    string key = ss.str();
-
-    int32_t nEntries = readInt32FromFile(fin);
-    bool found = false;
-    for (int i = 0; i < nEntries; i++) {
-        string keyStr;
-        getline(fin, keyStr, '\0');
-        int64_t fpos = readInt64FromFile(fin);
-        int32_t sizeinbytes = readInt32FromFile(fin);
-        if (keyStr == key) {
-            myFilePos = fpos;
-            found = true;
-        }
-    }
-    if (!found) {
-        cerr << "File doesn't have the given chr_chr map " << key << endl;
-        return false;
-    }
-
-    if ((matrixType == "observed" && norm == "NONE") ||
-        ((matrixType == "oe" || matrixType == "expected") && norm == "NONE" && c1 != c2))
-        return true; // no need to read norm vector index
-
-    // read in and ignore expected value maps; don't store; reading these to
-    // get to norm vector index
-    int32_t nExpectedValues = readInt32FromFile(fin);
-    if (fin.bad() || fin.fail())
-    {
-        throw strawException("Unable to read expected values from file - it might be truncated early");
-    }
-    for (int i = 0; i < nExpectedValues; i++) {
-        string unit0;
-        getline(fin, unit0, '\0'); //unit
-        int32_t binSize = readInt32FromFile(fin);
-
-        int64_t nValues;
-        if (version > 8) {
-            nValues = readInt64FromFile(fin);
-        } else {
-            nValues = (int64_t) readInt32FromFile(fin);
-        }
-
-        bool store = c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm == "NONE" && unit0 == unit &&
-                     binSize == resolution;
-        readThroughExpectedVector(version, fin, expectedValues, nValues, store, resolution);
-        readThroughNormalizationFactors(fin, version, store, expectedValues, c1);
-    }
-
-    if (c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm == "NONE") {
-        if (expectedValues.empty()) {
-            cerr << "File did not contain expected values vectors at " << resolution << " " << unit << endl;
-            return false;
-        }
-        return true;
-    }
-
-    nExpectedValues = readInt32FromFile(fin);
-    if (fin.bad() || fin.fail())
-    {
-        throw strawException("Unable to read normalized expected values from file - it might be truncated early");
-    }
-
-    for (int i = 0; i < nExpectedValues; i++) {
-        string nType, unit0;
-        getline(fin, nType, '\0'); //typeString
-        getline(fin, unit0, '\0'); //unit
-        int32_t binSize = readInt32FromFile(fin);
-        int64_t nValues;
-        if (version > 8) {
-            nValues = readInt64FromFile(fin);
-        } else {
-            nValues = (int64_t) readInt32FromFile(fin);
-        }
-        bool store = c1 == c2 && (matrixType == "oe" || matrixType == "expected") && nType == norm && unit0 == unit &&
-                     binSize == resolution;
-        readThroughExpectedVector(version, fin, expectedValues, nValues, store, resolution);
-        readThroughNormalizationFactors(fin, version, store, expectedValues, c1);
-    }
-
-    if (c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm != "NONE") {
-        if (expectedValues.empty()) {
-            cerr << "File did not contain normalized expected values vectors at " << resolution << " " << unit << endl;
-            return false;
-        }
-    }
-
-    // Index of normalization vectors
-    nEntries = readInt32FromFile(fin);
-    bool found1 = false;
-    bool found2 = false;
-
-    // added UCSC
-    globalNormOptions.clear();
-    if (fin.bad() || fin.fail())
-    {
-        throw strawException("Unable to read normalization vectors from file - it might be truncated early");
-    }
-
-    for (int i = 0; i < nEntries; i++) {
-        string normtype;
-        getline(fin, normtype, '\0'); //normalization type
-        int32_t chrIdx = readInt32FromFile(fin);
-        string unit1;
-        getline(fin, unit1, '\0'); //unit
-        int32_t resolution1 = readInt32FromFile(fin);
-        int64_t filePosition = readInt64FromFile(fin);
-        int64_t sizeInBytes;
-        if (version > 8) {
-            sizeInBytes = readInt64FromFile(fin);
-        } else {
-            sizeInBytes = (int64_t) readInt32FromFile(fin);
-        }
-
-        // added UCSC
-        globalNormOptions.insert(normtype);
-
-        if (chrIdx == c1 && normtype == norm && unit1 == unit && resolution1 == resolution) {
-            c1NormEntry.position = filePosition;
-            c1NormEntry.size = sizeInBytes;
-            found1 = true;
-        }
-        if (chrIdx == c2 && normtype == norm && unit1 == unit && resolution1 == resolution) {
-            c2NormEntry.position = filePosition;
-            c2NormEntry.size = sizeInBytes;
-            found2 = true;
-        }
-    }
-    if (!found1 || !found2) {
-        cerr << "File did not contain " << norm << " normalization vectors for one or both chromosomes at "
-             << resolution << " " << unit << endl;
-    }
-    return true;
-}
-
-indexEntry readIndexEntry(istream &fin) {
-    int64_t filePosition = readInt64FromFile(fin);
-    int32_t blockSizeInBytes = readInt32FromFile(fin);
-    indexEntry entry = indexEntry();
-    entry.size = (int64_t) blockSizeInBytes;
-    entry.position = filePosition;
-    return entry;
-}
-
-void setValuesForMZD(istream &fin, const string &myunit, float &mySumCounts, int32_t &mybinsize,
-                     int32_t &myBlockBinCount, int32_t &myBlockColumnCount, bool &found) {
-    string unit;
-    getline(fin, unit, '\0'); // unit
-    readInt32FromFile(fin); // Old "zoom" index -- not used
-    float sumCounts = readFloatFromFile(fin); // sumCounts
-    readFloatFromFile(fin); // occupiedCellCount
-    readFloatFromFile(fin); // stdDev
-    readFloatFromFile(fin); // percent95
-    int32_t binSize = readInt32FromFile(fin);
-    int32_t blockBinCount = readInt32FromFile(fin);
-    int32_t blockColumnCount = readInt32FromFile(fin);
-    found = false;
-    if (myunit == unit && mybinsize == binSize) {
-        mySumCounts = sumCounts;
-        myBlockBinCount = blockBinCount;
-        myBlockColumnCount = blockColumnCount;
-        found = true;
-    }
-}
-
-void populateBlockMap(istream &fin, int32_t nBlocks, map<int32_t, indexEntry> &blockMap) {
-    for (int b = 0; b < nBlocks; b++) {
-        int32_t blockNumber = readInt32FromFile(fin);
-        blockMap[blockNumber] = readIndexEntry(fin);
-    }
-}
-
-// reads the raw binned contact matrix at specified resolution, setting the block bin count and block column count
-map<int32_t, indexEntry> readMatrixZoomData(istream &fin, const string &myunit, int32_t mybinsize, float &mySumCounts,
-                                            int32_t &myBlockBinCount, int32_t &myBlockColumnCount, bool &found) {
-
-    map<int32_t, indexEntry> blockMap;
-    setValuesForMZD(fin, myunit, mySumCounts, mybinsize, myBlockBinCount, myBlockColumnCount, found);
-
-    int32_t nBlocks = readInt32FromFile(fin);
-    if (found) {
-        populateBlockMap(fin, nBlocks, blockMap);
-    } else {
-        fin.seekg(nBlocks * (sizeof(int32_t) + sizeof(int64_t) + sizeof(int32_t)), ios_base::cur);
-    }
-    return blockMap;
-}
-
-// reads the raw binned contact matrix at specified resolution, setting the block bin count and block column count
-map<int32_t, indexEntry> readMatrixZoomDataHttp(CURL *curl, int64_t &myFilePosition, const string &myunit,
-                                                int32_t mybinsize, float &mySumCounts, int32_t &myBlockBinCount,
-                                                int32_t &myBlockColumnCount, bool &found) {
-    map<int32_t, indexEntry> blockMap;
-    int32_t header_size = 5 * sizeof(int32_t) + 4 * sizeof(float);
-    char *first = getData(curl, myFilePosition, 1);
-    if (first[0] == 'B') {
-        header_size += 3;
-    } else if (first[0] == 'F') {
-        header_size += 5;
-    } else {
-        cerr << "Unit not understood" << endl;
-        return blockMap;
-    }
-    delete first;
-    char *buffer = getData(curl, myFilePosition, header_size);
-    memstream fin(buffer, header_size);
-    setValuesForMZD(fin, myunit, mySumCounts, mybinsize, myBlockBinCount, myBlockColumnCount, found);
-    int32_t nBlocks = readInt32FromFile(fin);
-    delete buffer;
-
-    if (found) {
-        int32_t chunkSize = nBlocks * (sizeof(int32_t) + sizeof(int64_t) + sizeof(int32_t));
-        buffer = getData(curl, myFilePosition + header_size, chunkSize);
-        memstream fin2(buffer, chunkSize);
-        populateBlockMap(fin2, nBlocks, blockMap);
-        delete buffer;
-    } else {
-        myFilePosition = myFilePosition + header_size
-                         + (nBlocks * (sizeof(int32_t) + sizeof(int64_t) + sizeof(int32_t)));
-    }
-    return blockMap;
-}
-
-// goes to the specified file pointer in http and finds the raw contact matrixType at specified resolution, calling readMatrixZoomData.
-// sets blockbincount and blockcolumncount
-map<int32_t, indexEntry> readMatrixHttp(CURL *curl, int64_t myFilePosition, const string &unit, int32_t resolution,
-                                        float &mySumCounts, int32_t &myBlockBinCount, int32_t &myBlockColumnCount) {
-    int32_t size = sizeof(int32_t) * 3;
-    char *buffer = getData(curl, myFilePosition, size);
-    memstream bufin(buffer, size);
-
-    int32_t c1 = readInt32FromFile(bufin);
-    int32_t c2 = readInt32FromFile(bufin);
-    int32_t nRes = readInt32FromFile(bufin);
-    int32_t i = 0;
-    bool found = false;
-    myFilePosition = myFilePosition + size;
-    delete buffer;
-    map<int32_t, indexEntry> blockMap;
-
-    while (i < nRes && !found) {
-        // myFilePosition gets updated within call
-        blockMap = readMatrixZoomDataHttp(curl, myFilePosition, unit, resolution, mySumCounts, myBlockBinCount,
-                                          myBlockColumnCount, found);
-        i++;
-    }
-    if (!found) {
-        cerr << "Error finding block data" << endl;
-    }
-    return blockMap;
-}
-
-// goes to the specified file pointer and finds the raw contact matrixType at specified resolution, calling readMatrixZoomData.
-// sets blockbincount and blockcolumncount
-map<int32_t, indexEntry> readMatrix(istream &fin, int64_t myFilePosition, const string &unit, int32_t resolution,
-                                    float &mySumCounts, int32_t &myBlockBinCount, int32_t &myBlockColumnCount) {
-    map<int32_t, indexEntry> blockMap;
-
-    fin.seekg(myFilePosition, ios::beg);
-    int32_t c1 = readInt32FromFile(fin);
-    int32_t c2 = readInt32FromFile(fin);
-    int32_t nRes = readInt32FromFile(fin);
-    int32_t i = 0;
-    bool found = false;
-    while (i < nRes && !found) {
-        blockMap = readMatrixZoomData(fin, unit, resolution, mySumCounts, myBlockBinCount, myBlockColumnCount, found);
-        i++;
-    }
-    if (!found) {
-        cerr << "Error finding block data" << endl;
-    }
-    return blockMap;
-}
-
-// gets the blocks that need to be read for this slice of the data.  needs blockbincount, blockcolumncount, and whether
-// or not this is intrachromosomal.
-set<int32_t> getBlockNumbersForRegionFromBinPosition(const int64_t *regionIndices, int32_t blockBinCount,
-                                                     int32_t blockColumnCount, bool intra) {
-    int32_t col1, col2, row1, row2;
-    col1 = static_cast<int32_t>(regionIndices[0] / blockBinCount);
-    col2 = static_cast<int32_t>((regionIndices[1] + 1) / blockBinCount);
-    row1 = static_cast<int32_t>(regionIndices[2] / blockBinCount);
-    row2 = static_cast<int32_t>((regionIndices[3] + 1) / blockBinCount);
-
-    set<int32_t> blocksSet;
-    // first check the upper triangular matrixType
-    for (int r = row1; r <= row2; r++) {
-        for (int c = col1; c <= col2; c++) {
-            int32_t blockNumber = r * blockColumnCount + c;
-            blocksSet.insert(blockNumber);
-        }
-    }
-    // check region part that overlaps with lower left triangle but only if intrachromosomal
-    if (intra) {
-        for (int r = col1; r <= col2; r++) {
-            for (int c = row1; c <= row2; c++) {
-                int32_t blockNumber = r * blockColumnCount + c;
-                blocksSet.insert(blockNumber);
-            }
-        }
-    }
-    return blocksSet;
-}
-
-set<int32_t> getBlockNumbersForRegionFromBinPositionV9Intra(int64_t *regionIndices, int32_t blockBinCount,
-                                                            int32_t blockColumnCount) {
-    // regionIndices is binX1 binX2 binY1 binY2
-    set<int32_t> blocksSet;
-    int32_t translatedLowerPAD, translatedHigherPAD, translatedNearerDepth, translatedFurtherDepth;
-    translatedLowerPAD = static_cast<int32_t>((regionIndices[0] + regionIndices[2]) / 2 / blockBinCount);
-    translatedHigherPAD = static_cast<int32_t>((regionIndices[1] + regionIndices[3]) / 2 / blockBinCount + 1);
-    translatedNearerDepth = static_cast<int32_t>(log2(
-            1 + abs(regionIndices[0] - regionIndices[3]) / sqrt(2) / blockBinCount));
-    translatedFurtherDepth = static_cast<int32_t>(log2(
-            1 + abs(regionIndices[1] - regionIndices[2]) / sqrt(2) / blockBinCount));
-
-    // because code above assume above diagonal; but we could be below diagonal
-    int32_t nearerDepth = min(translatedNearerDepth, translatedFurtherDepth);
-    if ((regionIndices[0] > regionIndices[3] && regionIndices[1] < regionIndices[2]) ||
-        (regionIndices[1] > regionIndices[2] && regionIndices[0] < regionIndices[3])) {
-        nearerDepth = 0;
-    }
-    int32_t furtherDepth = max(translatedNearerDepth, translatedFurtherDepth) + 1; // +1; integer divide rounds down
-
-    for (int depth = nearerDepth; depth <= furtherDepth; depth++) {
-        for (int pad = translatedLowerPAD; pad <= translatedHigherPAD; pad++) {
-            int32_t blockNumber = depth * blockColumnCount + pad;
-            blocksSet.insert(blockNumber);
-        }
-    }
-
-    return blocksSet;
-}
-
-void appendRecord(vector<contactRecord> &vector, int32_t index, int32_t binX, int32_t binY, float counts) {
-    contactRecord record = contactRecord();
-    record.binX = binX;
-    record.binY = binY;
-    record.counts = counts;
-    vector[index] = record;
-}
-
-int32_t decompressBlock(indexEntry idx, char *compressedBytes, char *uncompressedBytes) {
-    z_stream infstream;
-    infstream.zalloc = Z_NULL;
-    infstream.zfree = Z_NULL;
-    infstream.opaque = Z_NULL;
-    infstream.avail_in = static_cast<uInt>(idx.size); // size of input
-    infstream.next_in = (Bytef *) compressedBytes; // input char array
-    infstream.avail_out = static_cast<uInt>(idx.size * 10); // size of output
-    infstream.next_out = (Bytef *) uncompressedBytes; // output char array
-    // the actual decompression work.
-    inflateInit(&infstream);
-    inflate(&infstream, Z_NO_FLUSH);
-    inflateEnd(&infstream);
-    int32_t uncompressedSize = static_cast<int32_t>(infstream.total_out);
-    return uncompressedSize;
-}
-
-long getNumRecordsInBlock(const string &fileName, indexEntry idx, int32_t version){
-    if (idx.size <= 0) {
-        return 0;
-    }
-    char *compressedBytes = readCompressedBytesFromFile(fileName, idx);
-    char *uncompressedBytes = new char[idx.size * 10]; //biggest seen so far is 3
-    int32_t uncompressedSize = decompressBlock(idx, compressedBytes, uncompressedBytes);
-
-    // create stream from buffer for ease of use
-    memstream bufferin(uncompressedBytes, uncompressedSize);
-    uint64_t nRecords;
-    nRecords = static_cast<uint64_t>(readInt32FromFile(bufferin));
-    delete[] compressedBytes;
-    delete[] uncompressedBytes; // don't forget to delete your heap arrays in C++!
-    return nRecords;
-}
-
-// this is the meat of reading the data.  takes in the block number and returns the set of contact records corresponding to
-// that block.  the block data is compressed and must be decompressed using the zlib library functions
-vector<contactRecord> readBlock(const string &fileName, indexEntry idx, int32_t version) {
-    if (idx.size <= 0) {
-        vector<contactRecord> v;
-        return v;
-    }
-    char *compressedBytes = readCompressedBytesFromFile(fileName, idx);
-    char *uncompressedBytes = new char[idx.size * 10]; //biggest seen so far is 3
-    int32_t uncompressedSize = decompressBlock(idx, compressedBytes, uncompressedBytes);
-
-    // create stream from buffer for ease of use
-    memstream bufferin(uncompressedBytes, uncompressedSize);
-    uint64_t nRecords;
-    nRecords = static_cast<uint64_t>(readInt32FromFile(bufferin));
-    vector<contactRecord> v(nRecords);
-    // different versions have different specific formats
-    if (version < 7) {
-        for (uInt i = 0; i < nRecords; i++) {
-            int32_t binX = readInt32FromFile(bufferin);
-            int32_t binY = readInt32FromFile(bufferin);
-            float counts = readFloatFromFile(bufferin);
-            appendRecord(v, i, binX, binY, counts);
-        }
-    } else {
-        int32_t binXOffset = readInt32FromFile(bufferin);
-        int32_t binYOffset = readInt32FromFile(bufferin);
-        bool useShort = readCharFromFile(bufferin) == 0; // yes this is opposite of usual
-
-        bool useShortBinX = true;
-        bool useShortBinY = true;
-        if (version > 8) {
-            useShortBinX = readCharFromFile(bufferin) == 0;
-            useShortBinY = readCharFromFile(bufferin) == 0;
-        }
-
-        char type = readCharFromFile(bufferin);
-        int32_t index = 0;
-        if (type == 1) {
-            if (useShortBinX && useShortBinY) {
-                int16_t rowCount = readInt16FromFile(bufferin);
-                for (int i = 0; i < rowCount; i++) {
-                    int32_t binY = binYOffset + readInt16FromFile(bufferin);
-                    int16_t colCount = readInt16FromFile(bufferin);
-                    for (int j = 0; j < colCount; j++) {
-                        int32_t binX = binXOffset + readInt16FromFile(bufferin);
-                        float counts;
-                        if (useShort) {
-                            counts = readInt16FromFile(bufferin);
-                        } else {
-                            counts = readFloatFromFile(bufferin);
-                        }
-                        appendRecord(v, index++, binX, binY, counts);
-                    }
-                }
-            } else if (useShortBinX && !useShortBinY) {
-                int32_t rowCount = readInt32FromFile(bufferin);
-                for (int i = 0; i < rowCount; i++) {
-                    int32_t binY = binYOffset + readInt32FromFile(bufferin);
-                    int16_t colCount = readInt16FromFile(bufferin);
-                    for (int j = 0; j < colCount; j++) {
-                        int32_t binX = binXOffset + readInt16FromFile(bufferin);
-                        float counts;
-                        if (useShort) {
-                            counts = readInt16FromFile(bufferin);
-                        } else {
-                            counts = readFloatFromFile(bufferin);
-                        }
-                        appendRecord(v, index++, binX, binY, counts);
-                    }
-                }
-            } else if (!useShortBinX && useShortBinY) {
-                int16_t rowCount = readInt16FromFile(bufferin);
-                for (int i = 0; i < rowCount; i++) {
-                    int32_t binY = binYOffset + readInt16FromFile(bufferin);
-                    int32_t colCount = readInt32FromFile(bufferin);
-                    for (int j = 0; j < colCount; j++) {
-                        int32_t binX = binXOffset + readInt32FromFile(bufferin);
-                        float counts;
-                        if (useShort) {
-                            counts = readInt16FromFile(bufferin);
-                        } else {
-                            counts = readFloatFromFile(bufferin);
-                        }
-                        appendRecord(v, index++, binX, binY, counts);
-                    }
-                }
-            } else {
-                int32_t rowCount = readInt32FromFile(bufferin);
-                for (int i = 0; i < rowCount; i++) {
-                    int32_t binY = binYOffset + readInt32FromFile(bufferin);
-                    int32_t colCount = readInt32FromFile(bufferin);
-                    for (int j = 0; j < colCount; j++) {
-                        int32_t binX = binXOffset + readInt32FromFile(bufferin);
-                        float counts;
-                        if (useShort) {
-                            counts = readInt16FromFile(bufferin);
-                        } else {
-                            counts = readFloatFromFile(bufferin);
-                        }
-                        appendRecord(v, index++, binX, binY, counts);
-                    }
-                }
-            }
-        } else if (type == 2) {
-            int32_t nPts = readInt32FromFile(bufferin);
-            int16_t w = readInt16FromFile(bufferin);
-
-            for (int i = 0; i < nPts; i++) {
-                //int32_t idx = (p.y - binOffset2) * w + (p.x - binOffset1);
-                int32_t row = i / w;
-                int32_t col = i - row * w;
-                int32_t bin1 = binXOffset + col;
-                int32_t bin2 = binYOffset + row;
-
-                float counts;
-                if (useShort) {
-                    int16_t c = readInt16FromFile(bufferin);
-                    if (c != -32768) {
-                        appendRecord(v, index++, bin1, bin2, c);
-                    }
-                } else {
-                    counts = readFloatFromFile(bufferin);
-                    if (!isnan(counts)) {
-                        appendRecord(v, index++, bin1, bin2, counts);
-                    }
-                }
-            }
-        }
-    }
-    delete[] compressedBytes;
-    delete[] uncompressedBytes; // don't forget to delete your heap arrays in C++!
-    return v;
-}
-
-// reads the normalization vector from the file at the specified location
-vector<double> readNormalizationVector(istream &bufferin, int32_t version) {
-    int64_t nValues;
-    if (version > 8) {
-        nValues = readInt64FromFile(bufferin);
-    } else {
-        nValues = (int64_t) readInt32FromFile(bufferin);
-    }
-
-    uint64_t numValues;
-    numValues = static_cast<uint64_t>(nValues);
-    vector<double> values(numValues);
-
-    if (version > 8) {
-        for (int i = 0; i < nValues; i++) {
-            values[i] = (double) readFloatFromFile(bufferin);
-        }
-    } else {
-        for (int i = 0; i < nValues; i++) {
-            values[i] = readDoubleFromFile(bufferin);
-        }
-    }
-
-    return values;
-}
-
-class MatrixZoomData {
-public:
-    bool isIntra;
-    string fileName;
-    int64_t myFilePos = 0LL;
-    vector<double> expectedValues;
-    bool foundFooter = false;
-    vector<double> c1Norm;
-    vector<double> c2Norm;
-    int32_t c1 = 0;
-    int32_t c2 = 0;
-    string matrixType;
-    string norm;
-    int32_t version = 0;
-    int32_t resolution = 0;
-    int32_t numBins1 = 0;
-    int32_t numBins2 = 0;
-    float sumCounts;
-    int32_t blockBinCount, blockColumnCount;
-    map<int32_t, indexEntry> blockMap;
-    double avgCount;
-
-    MatrixZoomData(const chromosome &chrom1, const chromosome &chrom2, const string &matrixType,
-                   const string &norm, const string &unit, int32_t resolution,
-                   int32_t &version, int64_t &master, int64_t &totalFileSize,
-                   const string &fileName) {
-        this->version = version;
-        this->fileName = fileName;
-        int32_t c01 = chrom1.index;
-        int32_t c02 = chrom2.index;
-        if (c01 <= c02) { // default is ok
-            this->c1 = c01;
-            this->c2 = c02;
-            this->numBins1 = static_cast<int32_t>(chrom1.length / resolution);
-            this->numBins2 = static_cast<int32_t>(chrom2.length / resolution);
-        } else { // flip
-            this->c1 = c02;
-            this->c2 = c01;
-            this->numBins1 = static_cast<int32_t>(chrom2.length / resolution);
-            this->numBins2 = static_cast<int32_t>(chrom1.length / resolution);
-        }
-        isIntra = c1 == c2;
-
-        this->matrixType = matrixType;
-        this->norm = norm;
-        this->resolution = resolution;
-
-        HiCFileStream *stream = new HiCFileStream(fileName);
-        indexEntry c1NormEntry{}, c2NormEntry{};
-
-        if (stream->isHttp) {
-            foundFooter = readFooterURL(stream->curl, master, version, c1, c2, matrixType, norm, unit,
-                                     resolution,
-                                     myFilePos,
-                                     c1NormEntry, c2NormEntry, expectedValues);
-        } else {
-            stream->fin.seekg(master, ios::beg);
-            foundFooter = readFooter(stream->fin, master, version, c1, c2, matrixType, norm,
-                                     unit,
-                                     resolution, myFilePos,
-                                     c1NormEntry, c2NormEntry, expectedValues);
-        }
-
-        if (!foundFooter) {
-            return;
-        }
-        stream->close();
-
-        if (norm != "NONE") {
-            c1Norm = readNormalizationVectorFromFooter(c1NormEntry, version, fileName);
-            if (isIntra) {
-                c2Norm = c1Norm;
-            } else {
-                c2Norm = readNormalizationVectorFromFooter(c2NormEntry, version, fileName);
-            }
-        }
-
-        HiCFileStream *stream2 = new HiCFileStream((fileName));
-        if (stream2->isHttp) {
-            // readMatrix will assign blockBinCount and blockColumnCount
-            blockMap = readMatrixHttp(stream2->curl, myFilePos, unit, resolution, sumCounts,
-                                      blockBinCount,
-                                      blockColumnCount);
-        } else {
-            // readMatrix will assign blockBinCount and blockColumnCount
-            blockMap = readMatrix(stream2->fin, myFilePos, unit, resolution, sumCounts,
-                                  blockBinCount,
-                                  blockColumnCount);
-        }
-        stream2->close();
-
-        if (!isIntra) {
-            avgCount = (sumCounts / numBins1) / numBins2;   // <= trying to avoid overflows
-        }
-    }
-
-    static vector<double> readNormalizationVectorFromFooter(indexEntry cNormEntry, int32_t &version,
-                                                            const string &fileName) {
-        if (cNormEntry.size == 0) {
-            // no normalization entries in this file.  Bow out now and avoid an allocation error
-            throw strawException("Trying to retrieve other normalization options, but none exist");
-        }
-        char *buffer = readCompressedBytesFromFile(fileName, cNormEntry);
-        memstream bufferin(buffer, cNormEntry.size);
-        vector<double> cNorm = readNormalizationVector(bufferin, version);
-        delete buffer;
-        return cNorm;
-    }
-
-    static bool isInRange(int32_t r, int32_t c, int32_t numRows, int32_t numCols) {
-        return 0 <= r && r < numRows && 0 <= c && c < numCols;
-    }
-
-    set<int32_t> getBlockNumbers(int64_t *regionIndices) const {
-        if (version > 8 && isIntra) {
-            return getBlockNumbersForRegionFromBinPositionV9Intra(regionIndices, blockBinCount, blockColumnCount);
-        } else {
-            return getBlockNumbersForRegionFromBinPosition(regionIndices, blockBinCount, blockColumnCount, isIntra);
-        }
-    }
-
-    vector<double> getNormVector(int32_t index) {
-        if (index == c1) {
-            return c1Norm;
-        } else if (index == c2) {
-            return c2Norm;
-        }
-        cerr << "Invalid index provided: " << index << endl;
-        cerr << "Should be either " << c1 << " or " << c2 << endl;
-        vector<double> v;
-        return v;
-    }
-
-    vector<double> getExpectedValues() {
-        return expectedValues;
-    }
-
-    vector<contactRecord> getRecords(int64_t gx0, int64_t gx1, int64_t gy0, int64_t gy1) {
-        if (!foundFooter) {
-            vector<contactRecord> v;
-            return v;
-        }
-        int64_t origRegionIndices[] = {gx0, gx1, gy0, gy1};
-        int64_t regionIndices[4];
-        convertGenomeToBinPos(origRegionIndices, regionIndices, resolution);
-
-        set<int32_t> blockNumbers = getBlockNumbers(regionIndices);
-        vector<contactRecord> records;
-        for (int32_t blockNumber : blockNumbers) {
-            // get contacts in this block
-            //cout << *it << " -- " << blockMap.size() << endl;
-            //cout << blockMap[*it].size << " " <<  blockMap[*it].position << endl;
-            vector<contactRecord> tmp_records = readBlock(fileName, blockMap[blockNumber], version);
-            for (contactRecord rec : tmp_records) {
-                int64_t x = rec.binX * resolution;
-                int64_t y = rec.binY * resolution;
-
-                if ((x >= origRegionIndices[0] && x <= origRegionIndices[1] &&
-                     y >= origRegionIndices[2] && y <= origRegionIndices[3]) ||
-                    // or check regions that overlap with lower left
-                    (isIntra && y >= origRegionIndices[0] && y <= origRegionIndices[1] && x >= origRegionIndices[2] &&
-                     x <= origRegionIndices[3])) {
-
-                    float c = rec.counts;
-                    if (norm != "NONE") {
-                        c = static_cast<float>(c / (c1Norm[rec.binX] * c2Norm[rec.binY]));
-                    }
-                    if (matrixType == "oe") {
-                        if (isIntra) {
-                            c = static_cast<float>(c / expectedValues[min(expectedValues.size() - 1,
-                                                                          (size_t) floor(abs(y - x) /
-                                                                                         resolution))]);
-                        } else {
-                            c = static_cast<float>(c / avgCount);
-                        }
-                    } else if (matrixType == "expected") {
-                        if (isIntra) {
-                            c = static_cast<float>(expectedValues[min(expectedValues.size() - 1,
-                                                                      (size_t) floor(abs(y - x) /
-                                                                                     resolution))]);
-                        } else {
-                            c = static_cast<float>(avgCount);
-                        }
-                    }
-
-                    if (!isnan(c) && !isinf(c)){
-                        contactRecord record = contactRecord();
-                        record.binX = static_cast<int32_t>(x);
-                        record.binY = static_cast<int32_t>(y);
-                        record.counts = c;
-                        records.push_back(record);
-                    }
-                }
-            }
-        }
-        return records;
-    }
-
-    vector<vector<float> > getRecordsAsMatrix(int64_t gx0, int64_t gx1, int64_t gy0, int64_t gy1) {
-        vector<contactRecord> records = this->getRecords(gx0, gx1, gy0, gy1);
-        if (records.empty()) {
-            vector<vector<float> > res = vector<vector<float> >(1, vector<float>(1, 0));
-            return res;
-        }
-
-        int64_t origRegionIndices[] = {gx0, gx1, gy0, gy1};
-        int64_t regionIndices[4];
-        convertGenomeToBinPos(origRegionIndices, regionIndices, resolution);
-
-        int64_t originR = regionIndices[0];
-        int64_t endR = regionIndices[1];
-        int64_t originC = regionIndices[2];
-        int64_t endC = regionIndices[3];
-        int32_t numRows = endR - originR + 1;
-        int32_t numCols = endC - originC + 1;
-        vector<vector<float> > matrix;
-        for (int32_t i = 0; i < numRows; i++) {
-            matrix.emplace_back(numCols, 0);
-        }
-
-        for (contactRecord cr : records) {
-            if (isnan(cr.counts) || isinf(cr.counts)) continue;
-            int32_t r = cr.binX / resolution - originR;
-            int32_t c = cr.binY / resolution - originC;
-            if (isInRange(r, c, numRows, numCols)) {
-                matrix[r][c] = cr.counts;
-            }
-            if (isIntra) {
-                r = cr.binY / resolution - originR;
-                c = cr.binX / resolution - originC;
-                if (isInRange(r, c, numRows, numCols)) {
-                    matrix[r][c] = cr.counts;
-                }
-            }
-        }
-        return matrix;
-    }
-
-    int64_t getNumberOfTotalRecords() {
-        if (!foundFooter) {
-            return 0;
-        }
-        int64_t regionIndices[4] = {0, numBins1, 0, numBins2};
-        set<int32_t> blockNumbers = getBlockNumbers(regionIndices);
-        int64_t total = 0;
-        for (int32_t blockNumber : blockNumbers) {
-            total += getNumRecordsInBlock(fileName, blockMap[blockNumber], version);
-        }
-        return total;
-    }
-};
-
-class HiCFile {
-public:
-    string prefix = "http"; // HTTP code
-    int64_t master = 0LL;
-    map<string, chromosome> chromosomeMap;
-    string genomeID;
-    int32_t numChromosomes = 0;
-    int32_t version = 0;
-    int64_t nviPosition = 0LL;
-    int64_t nviLength = 0LL;
-    vector<int32_t> resolutions;
-    static int64_t totalFileSize;
-    string fileName;
-    vector<string> attributes;
-
-    static size_t hdf(char *b, size_t size, size_t nitems, void *userdata) {
-        size_t numbytes = size * nitems;
-        b[numbytes + 1] = '\0';
-        string s(b);
-        int32_t found;
-        found = static_cast<int32_t>(s.find("content-range"));
-        if ((size_t) found == string::npos) {
-            found = static_cast<int32_t>(s.find("Content-Range"));
-        }
-        if ((size_t) found != string::npos) {
-            int32_t found2;
-            found2 = static_cast<int32_t>(s.find('/'));
-            //content-range: bytes 0-100000/891471462
-            if ((size_t) found2 != string::npos) {
-                string total = s.substr(found2 + 1);
-                totalFileSize = stol(total);
-            }
-        }
-
-        return numbytes;
-    }
-
-    static CURL *oneTimeInitCURL(const char *url) {
-        CURL *curl = initCURL(url);
-        curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, hdf);
-        curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
-        return curl;
-    }
-
-    explicit HiCFile(const string &fileName) {
-        this->fileName = fileName;
-
-        // read header into buffer; 100K should be sufficient
-        if (std::strncmp(fileName.c_str(), prefix.c_str(), prefix.size()) == 0) {
-            CURL *curl;
-            curl = oneTimeInitCURL(fileName.c_str());
-            char *buffer = getData(curl, 0, 100000);
-            memstream bufin(buffer, 100000);
-            chromosomeMap = readHeader(bufin, master, genomeID, numChromosomes,
-                                       version, nviPosition, nviLength, attributes);
-            resolutions = readResolutionsFromHeader(bufin);
-            curl_easy_cleanup(curl);
-            delete buffer;
-        } else {
-            ifstream fin;
-            fin.open(fileName, fstream::in | fstream::binary);
-            if (!fin) {
-                throw strawException("File " + fileName + " cannot be opened for reading");
-                //cerr << "File " << fileName << " cannot be opened for reading" << endl;
-                //exit(6);
-            }
-            chromosomeMap = readHeader(fin, master, genomeID, numChromosomes,
-                                       version, nviPosition, nviLength, attributes);
-            resolutions = readResolutionsFromHeader(fin);
-            fin.close();
-        }
-    }
-
-    string getGenomeID() const {
-        return genomeID;
-    }
-
-    vector<int32_t> getResolutions() const {
-        return resolutions;
-    }
-
-    vector<chromosome> getChromosomes() {
-        chromosome chromosomes[chromosomeMap.size()];
-        map<string, chromosome>::iterator iter = chromosomeMap.begin();
-        while (iter != chromosomeMap.end()) {
-            chromosome chrom = static_cast<chromosome>(iter->second);
-            chromosomes[chrom.index] = chrom;
-            iter++;
-        }
-
-        vector<chromosome> final_chromosomes;
-        final_chromosomes.reserve(chromosomeMap.size());
-        for(int32_t i = 0; i < chromosomeMap.size(); i++){
-            final_chromosomes.push_back(chromosomes[i]);
-        }
-        return final_chromosomes;
-    }
-
-    MatrixZoomData * getMatrixZoomData(const string &chr1, const string &chr2, const string &matrixType,
-                                       const string &norm, const string &unit, int32_t resolution) {
-        chromosome chrom1 = chromosomeMap[chr1];
-        chromosome chrom2 = chromosomeMap[chr2];
-        return new MatrixZoomData(chrom1, chrom2, (matrixType), (norm), (unit),
-                                  resolution, version, master, totalFileSize, fileName);
-    }
-};
-
-int64_t HiCFile::totalFileSize = 0LL;
-
-void parsePositions(const string &chrLoc, string &chrom, int64_t &pos1, int64_t &pos2, map<string, chromosome> map) {
-    string x, y;
-    stringstream ss(chrLoc);
-    getline(ss, chrom, ':');
-    if (map.count(chrom) == 0) {
-        throw strawException("chromosome " + chrom + " not found in the file.");
-        //cerr << "chromosome " << chrom << " not found in the file." << endl;
-        //exit(7);
-    }
-
-    if (getline(ss, x, ':') && getline(ss, y, ':')) {
-        pos1 = stol(x);
-        pos2 = stol(y);
-    } else {
-        pos1 = 0LL;
-        pos2 = map[chrom].length;
-    }
-}
-
-vector<contactRecord> straw(const string &matrixType, const string &norm, const string &fileName, const string &chr1loc,
-                            const string &chr2loc, const string &unit, int32_t binsize) {
-    if (!(unit == "BP" || unit == "FRAG")) {
-        cerr << "Norm specified incorrectly, must be one of <BP/FRAG>" << endl;
-        cerr << "Usage: straw [observed/oe/expected] <NONE/VC/VC_SQRT/KR> <hicFile(s)> <chr1>[:x1:x2] <chr2>[:y1:y2] <BP/FRAG> <binsize>"
-             << endl;
-        vector<contactRecord> v;
-        return v;
-    }
-
-    HiCFile *hiCFile = new HiCFile(fileName);
-    string chr1, chr2;
-    int64_t origRegionIndices[4] = {-100LL, -100LL, -100LL, -100LL};
-    parsePositions((chr1loc), chr1, origRegionIndices[0], origRegionIndices[1], hiCFile->chromosomeMap);
-    parsePositions((chr2loc), chr2, origRegionIndices[2], origRegionIndices[3], hiCFile->chromosomeMap);
-
-    if (hiCFile->chromosomeMap[chr1].index > hiCFile->chromosomeMap[chr2].index) {
-        MatrixZoomData *mzd = hiCFile->getMatrixZoomData(chr2, chr1, matrixType, norm, unit, binsize);
-        return mzd->getRecords(origRegionIndices[2], origRegionIndices[3], origRegionIndices[0], origRegionIndices[1]);
-    } else {
-        MatrixZoomData *mzd = hiCFile->getMatrixZoomData(chr1, chr2, matrixType, norm, unit, binsize);
-        return mzd->getRecords(origRegionIndices[0], origRegionIndices[1], origRegionIndices[2], origRegionIndices[3]);
-    }
-}
-
-vector<vector<float> > strawAsMatrix(const string &matrixType, const string &norm, const string &fileName, const string &chr1loc,
-                   const string &chr2loc, const string &unit, int32_t binsize) {
-    if (!(unit == "BP" || unit == "FRAG")) {
-        cerr << "Norm specified incorrectly, must be one of <BP/FRAG>" << endl;
-        cerr << "Usage: straw [observed/oe/expected] <NONE/VC/VC_SQRT/KR> <hicFile(s)> <chr1>[:x1:x2] <chr2>[:y1:y2] <BP/FRAG> <binsize>"
-             << endl;
-        vector<vector<float> > res = vector<vector<float> >(1, vector<float>(1, 0));
-        return res;
-    }
-
-    HiCFile *hiCFile = new HiCFile(fileName);
-    string chr1, chr2;
-    int64_t origRegionIndices[4] = {-100LL, -100LL, -100LL, -100LL};
-    parsePositions((chr1loc), chr1, origRegionIndices[0], origRegionIndices[1], hiCFile->chromosomeMap);
-    parsePositions((chr2loc), chr2, origRegionIndices[2], origRegionIndices[3], hiCFile->chromosomeMap);
-
-    if (hiCFile->chromosomeMap[chr1].index > hiCFile->chromosomeMap[chr2].index) {
-        MatrixZoomData *mzd = hiCFile->getMatrixZoomData(chr2, chr1, matrixType, norm, unit, binsize);
-        return mzd->getRecordsAsMatrix(origRegionIndices[2], origRegionIndices[3], origRegionIndices[0], origRegionIndices[1]);
-    } else {
-        MatrixZoomData *mzd = hiCFile->getMatrixZoomData(chr1, chr2, matrixType, norm, unit, binsize);
-        return mzd->getRecordsAsMatrix(origRegionIndices[0], origRegionIndices[1], origRegionIndices[2], origRegionIndices[3]);
-    }
-}
-
-int64_t getNumRecordsForFile(const string &fileName, int32_t binsize, bool interOnly) {
-    HiCFile *hiCFile = new HiCFile(fileName);
-    int64_t totalNumRecords = 0;
-
-    int32_t indexOffset = 0;
-    if (interOnly){
-        indexOffset = 1;
-    }
-
-    vector<chromosome> chromosomes = hiCFile->getChromosomes();
-    for(int32_t i = 0; i < chromosomes.size(); i++){
-        if(chromosomes[i].index <= 0) continue;
-        for(int32_t j = i + indexOffset; j < chromosomes.size(); j++){
-            if(chromosomes[j].index <= 0) continue;
-            MatrixZoomData *mzd;
-            if(chromosomes[i].index > chromosomes[j].index){
-                mzd = hiCFile->getMatrixZoomData(chromosomes[j].name, chromosomes[i].name, "observed", "NONE", "BP", binsize);
-            } else {
-                mzd = hiCFile->getMatrixZoomData(chromosomes[i].name, chromosomes[j].name, "observed", "NONE", "BP", binsize);
-            }
-            totalNumRecords += mzd->getNumberOfTotalRecords();
-        }
-    }
-
-    return totalNumRecords;
-}
-
-int64_t getNumRecordsForChromosomes(const string &fileName, int32_t binsize, bool interOnly) {
-    HiCFile *hiCFile = new HiCFile(fileName);
-    vector<chromosome> chromosomes = hiCFile->getChromosomes();
-    for(int32_t i = 0; i < chromosomes.size(); i++){
-        if(chromosomes[i].index <= 0) continue;
-        MatrixZoomData *mzd = hiCFile->getMatrixZoomData(chromosomes[i].name, chromosomes[i].name, "observed", "NONE", "BP", binsize);
-        int64_t totalNumRecords = mzd->getNumberOfTotalRecords();
-        cout << chromosomes[i].name << " " << totalNumRecords << " ";
-        cout << totalNumRecords*12/1000/1000/1000 << " GB" << endl;
-    }
-    return 0;
-}
-
-void getHeaderFields(const string &filename, string &genome, vector<string> &chromNames, vector<int> &chromSizes,
-        vector<int> &bpResolutions, vector<int> &fragResolutions, vector<string> &attributes)
-/* Fill in the provided fields with information from the header of the hic file in the supplied filename.
- * fragResolutions is left empty for now, as we're not making use of it. */
-{
-    HiCFile *hiCFile = new HiCFile(filename);
-
-    genome.assign(hiCFile->getGenomeID());
-
-    vector<chromosome> chromList = hiCFile->getChromosomes();
-    vector<string> myChromNames;
-    myChromNames.reserve(chromList.size());
-    vector<int> myChromSizes;
-    myChromSizes.reserve(chromList.size());
-    for(int32_t i = 0; i < chromList.size(); i++){
-        myChromNames.push_back(chromList[i].name);
-        myChromSizes.push_back(chromList[i].length);
-    }
-    chromNames = myChromNames;
-    chromSizes = myChromSizes;
-
-    vector<int> myBpResolutions;
-    myBpResolutions.reserve(hiCFile->resolutions.size());
-    for(int32_t i = 0; i < hiCFile->resolutions.size(); i++){
-        myBpResolutions.push_back(hiCFile->resolutions[i]);
-    }
-    bpResolutions = myBpResolutions;
-
-    attributes = hiCFile->attributes;
-
-    // Ignoring fragment resolutions for now, since they're never being used here
-}
-
-set<string> getNormOptions()
-/* Return the set of normalization options that have been encountered through footer parsing.
- * The result will be empty unless at least one straw() request has been made.
- */
-{
-    return globalNormOptions;
-}