src/hg/lib/straw/straw.cpp ae1ef6aa226d4436d4dc9e2b9b28120d802ab8a5

ae1ef6aa226d4436d4dc9e2b9b28120d802ab8a5
jcasper
  Thu Jul 25 11:57:53 2024 -0700
Dropping new straw library files into place, refs #33225

diff --git src/hg/lib/straw/straw.cpp src/hg/lib/straw/straw.cpp
new file mode 100644
index 0000000..c7428df
--- /dev/null
+++ src/hg/lib/straw/straw.cpp
@@ -0,0 +1,1755 @@
+/*
+  The MIT License (MIT)
+
+  Copyright (c) 2017-2021 Aiden Lab, Rice University, Baylor College of Medicine
+
+ Permission is hereby granted, free of charge, to any person obtaining a copy
+ of this software and associated documentation files (the "Software"), to deal
+ in the Software without restriction, including without limitation the rights
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ copies of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be included in
+ all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ THE SOFTWARE.
+*/
+#include <cstring>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <map>
+#include <cmath>
+#include <set>
+#include <utility>
+#include <vector>
+#include <streambuf>
+//#include <curl/curl.h>
+#include <iterator>
+#include <algorithm>
+#include "zlib.h"
+//#include "straw.h"
+#include "new_straw.h"
+extern "C" {
+#include "../../inc/fakeCurl.h"
+}
+
+using namespace std;
+
+
+// Added UCSC: Dirty hack, but it's hard to quickly extract a list of available normalization options.
+// This gets populated by readFooter and readFooterURL, so you have to make a quick dummy data request
+// with straw() to set it up (e.g. to the first listed chromosome for a 1 bp window).
+set<string> globalNormOptions;
+
+/*
+  Straw: fast C++ implementation of dump. Not as fully featured as the
+  Java version. Reads the .hic file, finds the appropriate matrix and slice
+  of data, and outputs as text in sparse upper triangular format.
+
+  Currently only supporting matrices.
+
+  Usage: straw [observed/oe/expected] <NONE/VC/VC_SQRT/KR> <hicFile(s)> <chr1>[:x1:x2] <chr2>[:y1:y2] <BP/FRAG> <binsize>
+ */
+
+// callback for libcurl. data written to this buffer
+static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) {
+    size_t realsize = size * nmemb;
+    struct MemoryStruct *mem;
+    mem = (struct MemoryStruct *) userp;
+
+    mem->memory = static_cast<char *>(realloc(mem->memory, mem->size + realsize + 1));
+    if (mem->memory == nullptr) {
+        /* out of memory! */
+        printf("not enough memory (realloc returned NULL)\n");
+        return 0;
+    }
+
+    std::memcpy(&(mem->memory[mem->size]), contents, realsize);
+    mem->size += realsize;
+    mem->memory[mem->size] = 0;
+
+    return realsize;
+}
+
+// get a buffer that can be used as an input stream from the URL
+char *getData(CURL *curl, int64_t position, int64_t chunksize) {
+    std::ostringstream oss;
+    struct MemoryStruct chunk{};
+    chunk.memory = static_cast<char *>(malloc(1));
+    chunk.size = 0;    /* no data at this point */
+    oss << position << "-" << position + chunksize;
+    curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) &chunk);
+    curl_easy_setopt(curl, CURLOPT_RANGE, oss.str().c_str());
+    // Setting FAILONERROR because otherwise this library has a bad habit of assuming data exists
+    // and parsing what it gets back, regardless of whether it has run off the end of the file.
+    curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
+    CURLcode res = curl_easy_perform(curl);
+    if (res != CURLE_OK) {
+        throw strawException("curl_easy_perform() failed: " + string(curl_easy_strerror(res)));
+        //fprintf(stderr, "curl_easy_perform() failed: %s\n",
+        //        curl_easy_strerror(res));
+    }
+
+    return chunk.memory;
+}
+
+bool readMagicString(istream &fin) {
+    string str;
+    getline(fin, str, '\0');
+    return str[0] == 'H' && str[1] == 'I' && str[2] == 'C';
+}
+
+char readCharFromFile(istream &fin) {
+    char tempChar;
+    fin.read(&tempChar, sizeof(char));
+    return tempChar;
+}
+
+int16_t readInt16FromFile(istream &fin) {
+    int16_t tempInt16;
+    fin.read((char *) &tempInt16, sizeof(int16_t));
+    return tempInt16;
+}
+
+int32_t readInt32FromFile(istream &fin) {
+    int32_t tempInt32;
+    fin.read((char *) &tempInt32, sizeof(int32_t));
+    return tempInt32;
+}
+
+int64_t readInt64FromFile(istream &fin) {
+    int64_t tempInt64;
+    fin.read((char *) &tempInt64, sizeof(int64_t));
+    return tempInt64;
+}
+
+float readFloatFromFile(istream &fin) {
+    float tempFloat;
+    fin.read((char *) &tempFloat, sizeof(float));
+    return tempFloat;
+}
+
+double readDoubleFromFile(istream &fin) {
+    double tempDouble;
+    fin.read((char *) &tempDouble, sizeof(double));
+    return tempDouble;
+}
+
+void convertGenomeToBinPos(const int64_t origRegionIndices[4], int64_t regionIndices[4], int32_t resolution) {
+    for (uint16_t q = 0; q < 4; q++) {
+        // used to find the blocks we need to access
+        regionIndices[q] = origRegionIndices[q] / resolution;
+    }
+}
+
+static CURL *initCURL(const char *url) {
+    CURL *curl = curl_easy_init();
+    if (curl) {
+        curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
+        curl_easy_setopt(curl, CURLOPT_URL, url);
+        curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
+        curl_easy_setopt(curl, CURLOPT_USERAGENT, "straw");
+        curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
+    } else {
+        throw strawException("Unable to initialize curl");
+        //cerr << "Unable to initialize curl " << endl;
+        //exit(2);
+    }
+    return curl;
+}
+
+class HiCFileStream {
+public:
+    string prefix = "http"; // HTTP code
+    ifstream fin;
+    CURL *curl;
+    bool isHttp = false;
+
+    explicit HiCFileStream(const string &fileName) {
+        if (std::strncmp(fileName.c_str(), prefix.c_str(), prefix.size()) == 0) {
+            isHttp = true;
+            curl = initCURL(fileName.c_str());
+            if (!curl) {
+                throw strawException("URL " + fileName + " cannot be opened for reading");
+                //cerr << "URL " << fileName << " cannot be opened for reading" << endl;
+                //exit(3);
+            }
+        } else {
+            fin.open(fileName, fstream::in | fstream::binary);
+            if (!fin) {
+                throw strawException("File " + fileName + " cannot be opened for reading");
+                //cerr << "File " << fileName << " cannot be opened for reading" << endl;
+                //exit(4);
+            }
+        }
+    }
+
+    void close() {
+        if (isHttp) {
+            curl_easy_cleanup(curl);
+        } else {
+            fin.close();
+        }
+    }
+
+    char *readCompressedBytes(indexEntry idx) {
+        if (isHttp) {
+            return getData(curl, idx.position, idx.size);
+        } else {
+            char *buffer = new char[idx.size];
+            fin.seekg(idx.position, ios::beg);
+            fin.read(buffer, idx.size);
+            return buffer;
+        }
+    }
+};
+
+char *readCompressedBytesFromFile(const string &fileName, indexEntry idx) {
+    HiCFileStream *stream = new HiCFileStream(fileName);
+    char *compressedBytes = stream->readCompressedBytes(idx);
+    stream->close();
+    delete stream;
+    return compressedBytes;
+}
+
+// reads the header, storing the positions of the normalization vectors and returning the masterIndexPosition pointer
+map<string, chromosome> readHeader(istream &fin, int64_t &masterIndexPosition, string &genomeID,
+                                   int32_t &numChromosomes, int32_t &version, int64_t &nviPosition,
+                                   int64_t &nviLength, vector<string> &attributes) {
+    map<string, chromosome> chromosomeMap;
+    if (!readMagicString(fin)) {
+        throw strawException("Hi-C magic string is missing, does not appear to be a hic file");
+        // Not sure why this function was trying to continue after this failure (the next step is
+        // reading resolutions from the file, which clearly isn't going to work).
+        //cerr << "Hi-C magic string is missing, does not appear to be a hic file" << endl;
+        masterIndexPosition = -1;
+        return chromosomeMap;
+    }
+
+    version = readInt32FromFile(fin);
+    if (version < 6) {
+        throw strawException("Version " + to_string(version) + " no longer supported");
+        // Not sure why this function was trying to continue after this failure (the next step is
+        // reading resolutions from the file, which clearly isn't going to work).
+        //cerr << "Version " << version << " no longer supported" << endl;
+        masterIndexPosition = -1;
+        return chromosomeMap;
+    }
+    masterIndexPosition = readInt64FromFile(fin);
+    getline(fin, genomeID, '\0');
+
+    if (version > 8) {
+        nviPosition = readInt64FromFile(fin);
+        nviLength = readInt64FromFile(fin);
+    }
+
+    int32_t nattributes = readInt32FromFile(fin);
+
+    // reading attribute-value dictionary
+    for (int i = 0; i < nattributes; i++) {
+        string key, value;
+        getline(fin, key, '\0');
+        getline(fin, value, '\0');
+        attributes.insert(attributes.end(), key);
+        attributes.insert(attributes.end(), value);
+    }
+
+    numChromosomes = readInt32FromFile(fin);
+    // chromosome map for finding matrixType
+    for (int i = 0; i < numChromosomes; i++) {
+        string name;
+        int64_t length;
+        getline(fin, name, '\0');
+        if (version > 8) {
+            length = readInt64FromFile(fin);
+        } else {
+            length = (int64_t) readInt32FromFile(fin);
+        }
+
+        chromosome chr;
+        chr.index = i;
+        chr.name = name;
+        chr.length = length;
+        chromosomeMap[name] = chr;
+    }
+    return chromosomeMap;
+}
+
+vector<int32_t> readResolutionsFromHeader(istream &fin) {
+    int numBpResolutions = readInt32FromFile(fin);
+    vector<int32_t> resolutions;
+    for (int i = 0; i < numBpResolutions; i++) {
+        int32_t res = readInt32FromFile(fin);
+        resolutions.push_back(res);
+    }
+    return resolutions;
+}
+
+//https://www.techiedelight.com/get-slice-sub-vector-from-vector-cpp/
+vector<double> sliceVector(vector<double> &v, int64_t m, int64_t n) {
+    vector<double> vec;
+    copy(v.begin() + m, v.begin() + n + 1, back_inserter(vec));
+    return vec;
+}
+
+// assume always an odd number for length of vector;
+// eve if even, this calculation should be close enough
+double getMedian(vector<double> &v) {
+    size_t n = v.size() / 2;
+    nth_element(v.begin(), v.begin() + n, v.end());
+    return v[n];
+}
+
+void rollingMedian(vector<double> &initialValues, vector<double> &finalResult, int32_t window) {
+    // window is actually a ~wing-span
+    if (window < 1) {
+        finalResult = initialValues;
+        return;
+    }
+
+    /*
+    finalResult.push_back(initialValues[0]);
+    int64_t length = initialValues.size();
+    for (int64_t index = 1; index < length; index++) {
+        int64_t initialIndex;
+        int64_t finalIndex;
+        if (index < window) {
+            initialIndex = 0;
+            finalIndex = 2 * index;
+        } else {
+            initialIndex = index - window;
+            finalIndex = index + window;
+        }
+
+        if (finalIndex > length - 1) {
+            finalIndex = length - 1;
+        }
+
+        vector<double> subVector = sliceVector(initialValues, initialIndex, finalIndex);
+        finalResult.push_back(getMedian(subVector));
+    }
+    */
+    finalResult = initialValues;
+}
+
+void populateVectorWithFloats(istream &fin, vector<double> &vector, int64_t nValues) {
+    for (int j = 0; j < nValues; j++) {
+        double v = readFloatFromFile(fin);
+        vector.push_back(v);
+    }
+}
+
+void populateVectorWithDoubles(istream &fin, vector<double> &vector, int64_t nValues) {
+    for (int j = 0; j < nValues; j++) {
+        double v = readDoubleFromFile(fin);
+        vector.push_back(v);
+    }
+}
+
+int64_t readThroughExpectedVectorURL(CURL *curl, int64_t currentPointer, int32_t version, vector<double> &expectedValues, int64_t nValues,
+                               bool store, int32_t resolution) {
+    if (store) {
+        int32_t bufferSize = nValues * sizeof(double) + 10000;
+        if (version > 8) {
+            bufferSize = nValues * sizeof(float) + 10000;
+        }
+        char *buffer = getData(curl, currentPointer, bufferSize);
+        memstream fin(buffer, bufferSize);
+
+        vector<double> initialExpectedValues;
+        if (version > 8) {
+            populateVectorWithFloats(fin, initialExpectedValues, nValues);
+        } else {
+            populateVectorWithDoubles(fin, initialExpectedValues, nValues);
+        }
+        int32_t window = 5000000 / resolution;
+        rollingMedian(initialExpectedValues, expectedValues, window);
+        delete buffer;
+    }
+
+    if (version > 8) {
+        return nValues * sizeof(float);
+    } else {
+        return nValues * sizeof(double);
+    }
+}
+
+void readThroughExpectedVector(int32_t version, istream &fin, vector<double> &expectedValues, int64_t nValues,
+                               bool store, int32_t resolution) {
+    if (store) {
+        vector<double> initialExpectedValues;
+        if (version > 8) {
+            populateVectorWithFloats(fin, initialExpectedValues, nValues);
+        } else {
+            populateVectorWithDoubles(fin, initialExpectedValues, nValues);
+        }
+        int32_t window = 5000000 / resolution;
+        rollingMedian(initialExpectedValues, expectedValues, window);
+    } else if (nValues > 0) {
+        if (version > 8) {
+            fin.seekg(nValues * sizeof(float), ios_base::cur);
+        } else {
+            fin.seekg(nValues * sizeof(double), ios_base::cur);
+        }
+    }
+}
+
+int64_t readThroughNormalizationFactorsURL(CURL *curl, int64_t currentPointer, int32_t version, bool store, vector<double> &expectedValues,
+                                     int32_t c1, int32_t nNormalizationFactors) {
+
+    if (store) {
+        int32_t bufferSize = nNormalizationFactors * (sizeof(int32_t) + sizeof(double)) + 10000;
+        if (version > 8) {
+            bufferSize = nNormalizationFactors * (sizeof(int32_t) + sizeof(float )) + 10000;
+        }
+        char *buffer = getData(curl, currentPointer, bufferSize);
+        memstream fin(buffer, bufferSize);
+
+        for (int j = 0; j < nNormalizationFactors; j++) {
+            int32_t chrIdx = readInt32FromFile(fin);
+            double v;
+            if (version > 8) {
+                v = readFloatFromFile(fin);
+            } else {
+                v = readDoubleFromFile(fin);
+            }
+            if (chrIdx == c1) {
+                for (double &expectedValue : expectedValues) {
+                    expectedValue = expectedValue / v;
+                }
+            }
+        }
+        delete buffer;
+    }
+
+    if (version > 8) {
+        return nNormalizationFactors * (sizeof(int32_t) + sizeof(float));
+    } else {
+        return nNormalizationFactors * (sizeof(int32_t) + sizeof(double));
+    }
+}
+
+void readThroughNormalizationFactors(istream &fin, int32_t version, bool store, vector<double> &expectedValues,
+                                     int32_t c1) {
+    int32_t nNormalizationFactors = readInt32FromFile(fin);
+    if (store) {
+        for (int j = 0; j < nNormalizationFactors; j++) {
+            int32_t chrIdx = readInt32FromFile(fin);
+            double v;
+            if (version > 8) {
+                v = readFloatFromFile(fin);
+            } else {
+                v = readDoubleFromFile(fin);
+            }
+            if (chrIdx == c1) {
+                for (double &expectedValue : expectedValues) {
+                    expectedValue = expectedValue / v;
+                }
+            }
+        }
+    } else if (nNormalizationFactors > 0) {
+        if (version > 8) {
+            fin.seekg(nNormalizationFactors * (sizeof(int32_t) + sizeof(float)), ios_base::cur);
+        } else {
+            fin.seekg(nNormalizationFactors * (sizeof(int32_t) + sizeof(double)), ios_base::cur);
+        }
+    }
+}
+
+int64_t readStringFromURL(istream &fin, string &basicString) {
+    getline(fin, basicString, '\0');
+    return (basicString.length() + 1);
+}
+
+// reads the footer from the master pointer location. takes in the chromosomes,
+// norm, unit (BP or FRAG) and resolution or binsize, and sets the file
+// position of the matrix and the normalization vectors for those chromosomes
+// at the given normalization and resolution
+bool readFooterURL(CURL *curl, int64_t master, int32_t version, int32_t c1, int32_t c2, const string &matrixType,
+                const string &norm, const string &unit, int32_t resolution, int64_t &myFilePos,
+                indexEntry &c1NormEntry, indexEntry &c2NormEntry, vector<double> &expectedValues) {
+
+    int64_t currentPointer = master;
+
+    char *buffer = getData(curl, currentPointer, 100);
+    memstream fin1(buffer, 100);
+
+    if (version > 8) {
+        int64_t nBytes = readInt64FromFile(fin1);
+        currentPointer += 8;
+    } else {
+        int32_t nBytes = readInt32FromFile(fin1);
+        currentPointer += 4;
+    }
+
+    stringstream ss;
+    ss << c1 << "_" << c2;
+    string key = ss.str();
+
+    int32_t nEntries = readInt32FromFile(fin1);
+    currentPointer += 4;
+    delete buffer;
+
+    int32_t bufferSize0 = nEntries * 50;
+    buffer = getData(curl, currentPointer, bufferSize0);
+    memstream fin2(buffer, bufferSize0);
+
+    bool found = false;
+    for (int i = 0; i < nEntries; i++) {
+        string keyStr;
+        currentPointer += readStringFromURL(fin2, keyStr);
+
+        int64_t fpos = readInt64FromFile(fin2);
+        int32_t sizeinbytes = readInt32FromFile(fin2);
+        currentPointer += 12;
+        if (keyStr == key) {
+            myFilePos = fpos;
+            found = true;
+        }
+    }
+    delete buffer;
+    if (!found) {
+        cerr << "Remote file doesn't have the given chr_chr map " << key << endl;
+        return false;
+    }
+
+    if ((matrixType == "observed" && norm == "NONE") ||
+        ((matrixType == "oe" || matrixType == "expected") && norm == "NONE" && c1 != c2))
+        return true; // no need to read norm vector index
+
+    // read in and ignore expected value maps; don't store; reading these to
+    // get to norm vector index
+    buffer = getData(curl, currentPointer, 100);
+    memstream fin3(buffer, 100);
+
+    int32_t nExpectedValues = readInt32FromFile(fin3);
+    if (fin3.bad() || fin3.fail())
+    {
+        throw strawException("Unable to read expected values from file - it might be truncated early");
+    }
+    currentPointer += 4;
+    delete buffer;
+    for (int i = 0; i < nExpectedValues; i++) {
+
+        buffer = getData(curl, currentPointer, 1000);
+        memstream fin4(buffer, 1000);
+
+        string unit0;
+        currentPointer += readStringFromURL(fin4, unit0);
+
+        int32_t binSize = readInt32FromFile(fin4);
+        currentPointer += 4;
+
+        int64_t nValues;
+        if (version > 8) {
+            nValues = readInt64FromFile(fin4);
+            currentPointer += 8;
+        } else {
+            nValues = (int64_t) readInt32FromFile(fin4);
+            currentPointer += 4;
+        }
+
+        delete buffer;
+
+        bool store = c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm == "NONE" && unit0 == unit &&
+                     binSize == resolution;
+
+        currentPointer += readThroughExpectedVectorURL(curl, currentPointer, version, expectedValues, nValues, store, resolution);
+
+        buffer = getData(curl, currentPointer, 100);
+        memstream fin5(buffer, 100);
+        int32_t nNormalizationFactors = readInt32FromFile(fin5);
+        currentPointer += 4;
+        delete buffer;
+
+        currentPointer += readThroughNormalizationFactorsURL(curl, currentPointer, version, store, expectedValues, c1, nNormalizationFactors);
+    }
+
+    if (c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm == "NONE") {
+        if (expectedValues.empty()) {
+            cerr << "Remote file did not contain expected values vectors at " << resolution << " " << unit << endl;
+            return false;
+        }
+        return true;
+    }
+
+    buffer = getData(curl, currentPointer, 100);
+    memstream fin6(buffer, 100);
+    nExpectedValues = readInt32FromFile(fin6);
+    if (fin6.bad() || fin6.fail())
+    {
+        throw strawException("Unable to read normalized expected values from file - it might be truncated early");
+    }
+    currentPointer += 4;
+    delete buffer;
+    for (int i = 0; i < nExpectedValues; i++) {
+        buffer = getData(curl, currentPointer, 1000);
+        memstream fin7(buffer, 1000);
+
+        string nType, unit0;
+        currentPointer += readStringFromURL(fin7, nType);
+        currentPointer += readStringFromURL(fin7, unit0);
+
+        int32_t binSize = readInt32FromFile(fin7);
+        currentPointer += 4;
+
+        int64_t nValues;
+        if (version > 8) {
+            nValues = readInt64FromFile(fin7);
+            currentPointer += 8;
+        } else {
+            nValues = (int64_t) readInt32FromFile(fin7);
+            currentPointer += 4;
+        }
+        bool store = c1 == c2 && (matrixType == "oe" || matrixType == "expected") && nType == norm && unit0 == unit &&
+                     binSize == resolution;
+
+        delete buffer;
+
+        currentPointer += readThroughExpectedVectorURL(curl, currentPointer, version, expectedValues, nValues, store, resolution);
+
+        buffer = getData(curl, currentPointer, 100);
+        memstream fin8(buffer, 100);
+        int32_t nNormalizationFactors = readInt32FromFile(fin8);
+        currentPointer += 4;
+        delete buffer;
+
+        currentPointer += readThroughNormalizationFactorsURL(curl, currentPointer, version, store, expectedValues, c1, nNormalizationFactors);
+    }
+
+    if (c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm != "NONE") {
+        if (expectedValues.empty()) {
+            cerr << "Remote file did not contain normalized expected values vectors at " << resolution << " " << unit << endl;
+            return false;
+        }
+    }
+
+    buffer = getData(curl, currentPointer, 100);
+    memstream fin9(buffer, 100);
+    nEntries = readInt32FromFile(fin9);
+    if (fin9.bad() || fin9.fail())
+    {
+        throw strawException("Unable to read normalization vectors from file - it might be truncated early");
+    }
+    currentPointer += 4;
+    delete buffer;
+
+    bool found1 = false;
+    bool found2 = false;
+    int32_t bufferSize2 = nEntries*60;
+    buffer = getData(curl, currentPointer, bufferSize2);
+    memstream fin10(buffer, bufferSize2);
+
+    // added UCSC
+    globalNormOptions.clear();
+
+    for (int i = 0; i < nEntries; i++) {
+        string normtype;
+        currentPointer += readStringFromURL(fin10, normtype);
+
+        // added UCSC
+        globalNormOptions.insert(normtype);
+
+        int32_t chrIdx = readInt32FromFile(fin10);
+        currentPointer += 4;
+        string unit1;
+        currentPointer += readStringFromURL(fin10, unit1);
+
+        int32_t resolution1 = readInt32FromFile(fin10);
+        int64_t filePosition = readInt64FromFile(fin10);
+        currentPointer += 12;
+
+        int64_t sizeInBytes;
+        if (version > 8) {
+            sizeInBytes = readInt64FromFile(fin10);
+            currentPointer += 8;
+        } else {
+            sizeInBytes = (int64_t) readInt32FromFile(fin10);
+            currentPointer += 4;
+        }
+
+        if (chrIdx == c1 && normtype == norm && unit1 == unit && resolution1 == resolution) {
+            c1NormEntry.position = filePosition;
+            c1NormEntry.size = sizeInBytes;
+            found1 = true;
+        }
+        if (chrIdx == c2 && normtype == norm && unit1 == unit && resolution1 == resolution) {
+            c2NormEntry.position = filePosition;
+            c2NormEntry.size = sizeInBytes;
+            found2 = true;
+        }
+    }
+    delete buffer;
+    if (!found1 || !found2) {
+        cerr << "Remote file did not contain " << norm << " normalization vectors for one or both chromosomes at "
+             << resolution << " " << unit << endl;
+    }
+    return true;
+}
+
+bool readFooter(istream &fin, int64_t master, int32_t version, int32_t c1, int32_t c2, const string &matrixType,
+                const string &norm, const string &unit, int32_t resolution, int64_t &myFilePos,
+                indexEntry &c1NormEntry, indexEntry &c2NormEntry, vector<double> &expectedValues) {
+
+    if (version > 8) {
+        int64_t nBytes = readInt64FromFile(fin);
+    } else {
+        int32_t nBytes = readInt32FromFile(fin);
+    }
+
+    stringstream ss;
+    ss << c1 << "_" << c2;
+    string key = ss.str();
+
+    int32_t nEntries = readInt32FromFile(fin);
+    bool found = false;
+    for (int i = 0; i < nEntries; i++) {
+        string keyStr;
+        getline(fin, keyStr, '\0');
+        int64_t fpos = readInt64FromFile(fin);
+        int32_t sizeinbytes = readInt32FromFile(fin);
+        if (keyStr == key) {
+            myFilePos = fpos;
+            found = true;
+        }
+    }
+    if (!found) {
+        cerr << "File doesn't have the given chr_chr map " << key << endl;
+        return false;
+    }
+
+    if ((matrixType == "observed" && norm == "NONE") ||
+        ((matrixType == "oe" || matrixType == "expected") && norm == "NONE" && c1 != c2))
+        return true; // no need to read norm vector index
+
+    // read in and ignore expected value maps; don't store; reading these to
+    // get to norm vector index
+    int32_t nExpectedValues = readInt32FromFile(fin);
+    if (fin.bad() || fin.fail())
+    {
+        throw strawException("Unable to read expected values from file - it might be truncated early");
+    }
+    for (int i = 0; i < nExpectedValues; i++) {
+        string unit0;
+        getline(fin, unit0, '\0'); //unit
+        int32_t binSize = readInt32FromFile(fin);
+
+        int64_t nValues;
+        if (version > 8) {
+            nValues = readInt64FromFile(fin);
+        } else {
+            nValues = (int64_t) readInt32FromFile(fin);
+        }
+
+        bool store = c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm == "NONE" && unit0 == unit &&
+                     binSize == resolution;
+        readThroughExpectedVector(version, fin, expectedValues, nValues, store, resolution);
+        readThroughNormalizationFactors(fin, version, store, expectedValues, c1);
+    }
+
+    if (c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm == "NONE") {
+        if (expectedValues.empty()) {
+            cerr << "File did not contain expected values vectors at " << resolution << " " << unit << endl;
+            return false;
+        }
+        return true;
+    }
+
+    nExpectedValues = readInt32FromFile(fin);
+    if (fin.bad() || fin.fail())
+    {
+        throw strawException("Unable to read normalized expected values from file - it might be truncated early");
+    }
+
+    for (int i = 0; i < nExpectedValues; i++) {
+        string nType, unit0;
+        getline(fin, nType, '\0'); //typeString
+        getline(fin, unit0, '\0'); //unit
+        int32_t binSize = readInt32FromFile(fin);
+        int64_t nValues;
+        if (version > 8) {
+            nValues = readInt64FromFile(fin);
+        } else {
+            nValues = (int64_t) readInt32FromFile(fin);
+        }
+        bool store = c1 == c2 && (matrixType == "oe" || matrixType == "expected") && nType == norm && unit0 == unit &&
+                     binSize == resolution;
+        readThroughExpectedVector(version, fin, expectedValues, nValues, store, resolution);
+        readThroughNormalizationFactors(fin, version, store, expectedValues, c1);
+    }
+
+    if (c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm != "NONE") {
+        if (expectedValues.empty()) {
+            cerr << "File did not contain normalized expected values vectors at " << resolution << " " << unit << endl;
+            return false;
+        }
+    }
+
+    // Index of normalization vectors
+    nEntries = readInt32FromFile(fin);
+    bool found1 = false;
+    bool found2 = false;
+
+    // added UCSC
+    globalNormOptions.clear();
+    if (fin.bad() || fin.fail())
+    {
+        throw strawException("Unable to read normalization vectors from file - it might be truncated early");
+    }
+
+    for (int i = 0; i < nEntries; i++) {
+        string normtype;
+        getline(fin, normtype, '\0'); //normalization type
+        int32_t chrIdx = readInt32FromFile(fin);
+        string unit1;
+        getline(fin, unit1, '\0'); //unit
+        int32_t resolution1 = readInt32FromFile(fin);
+        int64_t filePosition = readInt64FromFile(fin);
+        int64_t sizeInBytes;
+        if (version > 8) {
+            sizeInBytes = readInt64FromFile(fin);
+        } else {
+            sizeInBytes = (int64_t) readInt32FromFile(fin);
+        }
+
+        // added UCSC
+        globalNormOptions.insert(normtype);
+
+        if (chrIdx == c1 && normtype == norm && unit1 == unit && resolution1 == resolution) {
+            c1NormEntry.position = filePosition;
+            c1NormEntry.size = sizeInBytes;
+            found1 = true;
+        }
+        if (chrIdx == c2 && normtype == norm && unit1 == unit && resolution1 == resolution) {
+            c2NormEntry.position = filePosition;
+            c2NormEntry.size = sizeInBytes;
+            found2 = true;
+        }
+    }
+    if (!found1 || !found2) {
+        cerr << "File did not contain " << norm << " normalization vectors for one or both chromosomes at "
+             << resolution << " " << unit << endl;
+    }
+    return true;
+}
+
+indexEntry readIndexEntry(istream &fin) {
+    int64_t filePosition = readInt64FromFile(fin);
+    int32_t blockSizeInBytes = readInt32FromFile(fin);
+    indexEntry entry = indexEntry();
+    entry.size = (int64_t) blockSizeInBytes;
+    entry.position = filePosition;
+    return entry;
+}
+
+void setValuesForMZD(istream &fin, const string &myunit, float &mySumCounts, int32_t &mybinsize,
+                     int32_t &myBlockBinCount, int32_t &myBlockColumnCount, bool &found) {
+    string unit;
+    getline(fin, unit, '\0'); // unit
+    readInt32FromFile(fin); // Old "zoom" index -- not used
+    float sumCounts = readFloatFromFile(fin); // sumCounts
+    readFloatFromFile(fin); // occupiedCellCount
+    readFloatFromFile(fin); // stdDev
+    readFloatFromFile(fin); // percent95
+    int32_t binSize = readInt32FromFile(fin);
+    int32_t blockBinCount = readInt32FromFile(fin);
+    int32_t blockColumnCount = readInt32FromFile(fin);
+    found = false;
+    if (myunit == unit && mybinsize == binSize) {
+        mySumCounts = sumCounts;
+        myBlockBinCount = blockBinCount;
+        myBlockColumnCount = blockColumnCount;
+        found = true;
+    }
+}
+
+void populateBlockMap(istream &fin, int32_t nBlocks, map<int32_t, indexEntry> &blockMap) {
+    for (int b = 0; b < nBlocks; b++) {
+        int32_t blockNumber = readInt32FromFile(fin);
+        blockMap[blockNumber] = readIndexEntry(fin);
+    }
+}
+
+// reads the raw binned contact matrix at specified resolution, setting the block bin count and block column count
+map<int32_t, indexEntry> readMatrixZoomData(istream &fin, const string &myunit, int32_t mybinsize, float &mySumCounts,
+                                            int32_t &myBlockBinCount, int32_t &myBlockColumnCount, bool &found) {
+
+    map<int32_t, indexEntry> blockMap;
+    setValuesForMZD(fin, myunit, mySumCounts, mybinsize, myBlockBinCount, myBlockColumnCount, found);
+
+    int32_t nBlocks = readInt32FromFile(fin);
+    if (found) {
+        populateBlockMap(fin, nBlocks, blockMap);
+    } else {
+        fin.seekg(nBlocks * (sizeof(int32_t) + sizeof(int64_t) + sizeof(int32_t)), ios_base::cur);
+    }
+    return blockMap;
+}
+
+// reads the raw binned contact matrix at specified resolution, setting the block bin count and block column count
+map<int32_t, indexEntry> readMatrixZoomDataHttp(CURL *curl, int64_t &myFilePosition, const string &myunit,
+                                                int32_t mybinsize, float &mySumCounts, int32_t &myBlockBinCount,
+                                                int32_t &myBlockColumnCount, bool &found) {
+    map<int32_t, indexEntry> blockMap;
+    int32_t header_size = 5 * sizeof(int32_t) + 4 * sizeof(float);
+    char *first = getData(curl, myFilePosition, 1);
+    if (first[0] == 'B') {
+        header_size += 3;
+    } else if (first[0] == 'F') {
+        header_size += 5;
+    } else {
+        cerr << "Unit not understood" << endl;
+        return blockMap;
+    }
+    delete first;
+    char *buffer = getData(curl, myFilePosition, header_size);
+    memstream fin(buffer, header_size);
+    setValuesForMZD(fin, myunit, mySumCounts, mybinsize, myBlockBinCount, myBlockColumnCount, found);
+    int32_t nBlocks = readInt32FromFile(fin);
+    delete buffer;
+
+    if (found) {
+        int32_t chunkSize = nBlocks * (sizeof(int32_t) + sizeof(int64_t) + sizeof(int32_t));
+        buffer = getData(curl, myFilePosition + header_size, chunkSize);
+        memstream fin2(buffer, chunkSize);
+        populateBlockMap(fin2, nBlocks, blockMap);
+        delete buffer;
+    } else {
+        myFilePosition = myFilePosition + header_size
+                         + (nBlocks * (sizeof(int32_t) + sizeof(int64_t) + sizeof(int32_t)));
+    }
+    return blockMap;
+}
+
+// goes to the specified file pointer in http and finds the raw contact matrixType at specified resolution, calling readMatrixZoomData.
+// sets blockbincount and blockcolumncount
+map<int32_t, indexEntry> readMatrixHttp(CURL *curl, int64_t myFilePosition, const string &unit, int32_t resolution,
+                                        float &mySumCounts, int32_t &myBlockBinCount, int32_t &myBlockColumnCount) {
+    int32_t size = sizeof(int32_t) * 3;
+    char *buffer = getData(curl, myFilePosition, size);
+    memstream bufin(buffer, size);
+
+    int32_t c1 = readInt32FromFile(bufin);
+    int32_t c2 = readInt32FromFile(bufin);
+    int32_t nRes = readInt32FromFile(bufin);
+    int32_t i = 0;
+    bool found = false;
+    myFilePosition = myFilePosition + size;
+    delete buffer;
+    map<int32_t, indexEntry> blockMap;
+
+    while (i < nRes && !found) {
+        // myFilePosition gets updated within call
+        blockMap = readMatrixZoomDataHttp(curl, myFilePosition, unit, resolution, mySumCounts, myBlockBinCount,
+                                          myBlockColumnCount, found);
+        i++;
+    }
+    if (!found) {
+        cerr << "Error finding block data" << endl;
+    }
+    return blockMap;
+}
+
+// goes to the specified file pointer and finds the raw contact matrixType at specified resolution, calling readMatrixZoomData.
+// sets blockbincount and blockcolumncount
+map<int32_t, indexEntry> readMatrix(istream &fin, int64_t myFilePosition, const string &unit, int32_t resolution,
+                                    float &mySumCounts, int32_t &myBlockBinCount, int32_t &myBlockColumnCount) {
+    map<int32_t, indexEntry> blockMap;
+
+    fin.seekg(myFilePosition, ios::beg);
+    int32_t c1 = readInt32FromFile(fin);
+    int32_t c2 = readInt32FromFile(fin);
+    int32_t nRes = readInt32FromFile(fin);
+    int32_t i = 0;
+    bool found = false;
+    while (i < nRes && !found) {
+        blockMap = readMatrixZoomData(fin, unit, resolution, mySumCounts, myBlockBinCount, myBlockColumnCount, found);
+        i++;
+    }
+    if (!found) {
+        cerr << "Error finding block data" << endl;
+    }
+    return blockMap;
+}
+
+// gets the blocks that need to be read for this slice of the data.  needs blockbincount, blockcolumncount, and whether
+// or not this is intrachromosomal.
+set<int32_t> getBlockNumbersForRegionFromBinPosition(const int64_t *regionIndices, int32_t blockBinCount,
+                                                     int32_t blockColumnCount, bool intra) {
+    int32_t col1, col2, row1, row2;
+    col1 = static_cast<int32_t>(regionIndices[0] / blockBinCount);
+    col2 = static_cast<int32_t>((regionIndices[1] + 1) / blockBinCount);
+    row1 = static_cast<int32_t>(regionIndices[2] / blockBinCount);
+    row2 = static_cast<int32_t>((regionIndices[3] + 1) / blockBinCount);
+
+    set<int32_t> blocksSet;
+    // first check the upper triangular matrixType
+    for (int r = row1; r <= row2; r++) {
+        for (int c = col1; c <= col2; c++) {
+            int32_t blockNumber = r * blockColumnCount + c;
+            blocksSet.insert(blockNumber);
+        }
+    }
+    // check region part that overlaps with lower left triangle but only if intrachromosomal
+    if (intra) {
+        for (int r = col1; r <= col2; r++) {
+            for (int c = row1; c <= row2; c++) {
+                int32_t blockNumber = r * blockColumnCount + c;
+                blocksSet.insert(blockNumber);
+            }
+        }
+    }
+    return blocksSet;
+}
+
+set<int32_t> getBlockNumbersForRegionFromBinPositionV9Intra(int64_t *regionIndices, int32_t blockBinCount,
+                                                            int32_t blockColumnCount) {
+    // regionIndices is binX1 binX2 binY1 binY2
+    set<int32_t> blocksSet;
+    int32_t translatedLowerPAD, translatedHigherPAD, translatedNearerDepth, translatedFurtherDepth;
+    translatedLowerPAD = static_cast<int32_t>((regionIndices[0] + regionIndices[2]) / 2 / blockBinCount);
+    translatedHigherPAD = static_cast<int32_t>((regionIndices[1] + regionIndices[3]) / 2 / blockBinCount + 1);
+    translatedNearerDepth = static_cast<int32_t>(log2(
+            1 + abs(regionIndices[0] - regionIndices[3]) / sqrt(2) / blockBinCount));
+    translatedFurtherDepth = static_cast<int32_t>(log2(
+            1 + abs(regionIndices[1] - regionIndices[2]) / sqrt(2) / blockBinCount));
+
+    // because code above assume above diagonal; but we could be below diagonal
+    int32_t nearerDepth = min(translatedNearerDepth, translatedFurtherDepth);
+    if ((regionIndices[0] > regionIndices[3] && regionIndices[1] < regionIndices[2]) ||
+        (regionIndices[1] > regionIndices[2] && regionIndices[0] < regionIndices[3])) {
+        nearerDepth = 0;
+    }
+    int32_t furtherDepth = max(translatedNearerDepth, translatedFurtherDepth) + 1; // +1; integer divide rounds down
+
+    for (int depth = nearerDepth; depth <= furtherDepth; depth++) {
+        for (int pad = translatedLowerPAD; pad <= translatedHigherPAD; pad++) {
+            int32_t blockNumber = depth * blockColumnCount + pad;
+            blocksSet.insert(blockNumber);
+        }
+    }
+
+    return blocksSet;
+}
+
+void appendRecord(vector<contactRecord> &vector, int32_t index, int32_t binX, int32_t binY, float counts) {
+    contactRecord record = contactRecord();
+    record.binX = binX;
+    record.binY = binY;
+    record.counts = counts;
+    vector[index] = record;
+}
+
+int32_t decompressBlock(indexEntry idx, char *compressedBytes, char *uncompressedBytes) {
+    z_stream infstream;
+    infstream.zalloc = Z_NULL;
+    infstream.zfree = Z_NULL;
+    infstream.opaque = Z_NULL;
+    infstream.avail_in = static_cast<uInt>(idx.size); // size of input
+    infstream.next_in = (Bytef *) compressedBytes; // input char array
+    infstream.avail_out = static_cast<uInt>(idx.size * 10); // size of output
+    infstream.next_out = (Bytef *) uncompressedBytes; // output char array
+    // the actual decompression work.
+    inflateInit(&infstream);
+    inflate(&infstream, Z_NO_FLUSH);
+    inflateEnd(&infstream);
+    int32_t uncompressedSize = static_cast<int32_t>(infstream.total_out);
+    return uncompressedSize;
+}
+
+long getNumRecordsInBlock(const string &fileName, indexEntry idx, int32_t version){
+    if (idx.size <= 0) {
+        return 0;
+    }
+    char *compressedBytes = readCompressedBytesFromFile(fileName, idx);
+    char *uncompressedBytes = new char[idx.size * 10]; //biggest seen so far is 3
+    int32_t uncompressedSize = decompressBlock(idx, compressedBytes, uncompressedBytes);
+
+    // create stream from buffer for ease of use
+    memstream bufferin(uncompressedBytes, uncompressedSize);
+    uint64_t nRecords;
+    nRecords = static_cast<uint64_t>(readInt32FromFile(bufferin));
+    delete[] compressedBytes;
+    delete[] uncompressedBytes; // don't forget to delete your heap arrays in C++!
+    return nRecords;
+}
+
+// this is the meat of reading the data.  takes in the block number and returns the set of contact records corresponding to
+// that block.  the block data is compressed and must be decompressed using the zlib library functions
+vector<contactRecord> readBlock(const string &fileName, indexEntry idx, int32_t version) {
+    if (idx.size <= 0) {
+        vector<contactRecord> v;
+        return v;
+    }
+    char *compressedBytes = readCompressedBytesFromFile(fileName, idx);
+    char *uncompressedBytes = new char[idx.size * 10]; //biggest seen so far is 3
+    int32_t uncompressedSize = decompressBlock(idx, compressedBytes, uncompressedBytes);
+
+    // create stream from buffer for ease of use
+    memstream bufferin(uncompressedBytes, uncompressedSize);
+    uint64_t nRecords;
+    nRecords = static_cast<uint64_t>(readInt32FromFile(bufferin));
+    vector<contactRecord> v(nRecords);
+    // different versions have different specific formats
+    if (version < 7) {
+        for (uInt i = 0; i < nRecords; i++) {
+            int32_t binX = readInt32FromFile(bufferin);
+            int32_t binY = readInt32FromFile(bufferin);
+            float counts = readFloatFromFile(bufferin);
+            appendRecord(v, i, binX, binY, counts);
+        }
+    } else {
+        int32_t binXOffset = readInt32FromFile(bufferin);
+        int32_t binYOffset = readInt32FromFile(bufferin);
+        bool useShort = readCharFromFile(bufferin) == 0; // yes this is opposite of usual
+
+        bool useShortBinX = true;
+        bool useShortBinY = true;
+        if (version > 8) {
+            useShortBinX = readCharFromFile(bufferin) == 0;
+            useShortBinY = readCharFromFile(bufferin) == 0;
+        }
+
+        char type = readCharFromFile(bufferin);
+        int32_t index = 0;
+        if (type == 1) {
+            if (useShortBinX && useShortBinY) {
+                int16_t rowCount = readInt16FromFile(bufferin);
+                for (int i = 0; i < rowCount; i++) {
+                    int32_t binY = binYOffset + readInt16FromFile(bufferin);
+                    int16_t colCount = readInt16FromFile(bufferin);
+                    for (int j = 0; j < colCount; j++) {
+                        int32_t binX = binXOffset + readInt16FromFile(bufferin);
+                        float counts;
+                        if (useShort) {
+                            counts = readInt16FromFile(bufferin);
+                        } else {
+                            counts = readFloatFromFile(bufferin);
+                        }
+                        appendRecord(v, index++, binX, binY, counts);
+                    }
+                }
+            } else if (useShortBinX && !useShortBinY) {
+                int32_t rowCount = readInt32FromFile(bufferin);
+                for (int i = 0; i < rowCount; i++) {
+                    int32_t binY = binYOffset + readInt32FromFile(bufferin);
+                    int16_t colCount = readInt16FromFile(bufferin);
+                    for (int j = 0; j < colCount; j++) {
+                        int32_t binX = binXOffset + readInt16FromFile(bufferin);
+                        float counts;
+                        if (useShort) {
+                            counts = readInt16FromFile(bufferin);
+                        } else {
+                            counts = readFloatFromFile(bufferin);
+                        }
+                        appendRecord(v, index++, binX, binY, counts);
+                    }
+                }
+            } else if (!useShortBinX && useShortBinY) {
+                int16_t rowCount = readInt16FromFile(bufferin);
+                for (int i = 0; i < rowCount; i++) {
+                    int32_t binY = binYOffset + readInt16FromFile(bufferin);
+                    int32_t colCount = readInt32FromFile(bufferin);
+                    for (int j = 0; j < colCount; j++) {
+                        int32_t binX = binXOffset + readInt32FromFile(bufferin);
+                        float counts;
+                        if (useShort) {
+                            counts = readInt16FromFile(bufferin);
+                        } else {
+                            counts = readFloatFromFile(bufferin);
+                        }
+                        appendRecord(v, index++, binX, binY, counts);
+                    }
+                }
+            } else {
+                int32_t rowCount = readInt32FromFile(bufferin);
+                for (int i = 0; i < rowCount; i++) {
+                    int32_t binY = binYOffset + readInt32FromFile(bufferin);
+                    int32_t colCount = readInt32FromFile(bufferin);
+                    for (int j = 0; j < colCount; j++) {
+                        int32_t binX = binXOffset + readInt32FromFile(bufferin);
+                        float counts;
+                        if (useShort) {
+                            counts = readInt16FromFile(bufferin);
+                        } else {
+                            counts = readFloatFromFile(bufferin);
+                        }
+                        appendRecord(v, index++, binX, binY, counts);
+                    }
+                }
+            }
+        } else if (type == 2) {
+            int32_t nPts = readInt32FromFile(bufferin);
+            int16_t w = readInt16FromFile(bufferin);
+
+            for (int i = 0; i < nPts; i++) {
+                //int32_t idx = (p.y - binOffset2) * w + (p.x - binOffset1);
+                int32_t row = i / w;
+                int32_t col = i - row * w;
+                int32_t bin1 = binXOffset + col;
+                int32_t bin2 = binYOffset + row;
+
+                float counts;
+                if (useShort) {
+                    int16_t c = readInt16FromFile(bufferin);
+                    if (c != -32768) {
+                        appendRecord(v, index++, bin1, bin2, c);
+                    }
+                } else {
+                    counts = readFloatFromFile(bufferin);
+                    if (!isnan(counts)) {
+                        appendRecord(v, index++, bin1, bin2, counts);
+                    }
+                }
+            }
+        }
+    }
+    delete[] compressedBytes;
+    delete[] uncompressedBytes; // don't forget to delete your heap arrays in C++!
+    return v;
+}
+
+// reads the normalization vector from the file at the specified location
+vector<double> readNormalizationVector(istream &bufferin, int32_t version) {
+    int64_t nValues;
+    if (version > 8) {
+        nValues = readInt64FromFile(bufferin);
+    } else {
+        nValues = (int64_t) readInt32FromFile(bufferin);
+    }
+
+    uint64_t numValues;
+    numValues = static_cast<uint64_t>(nValues);
+    vector<double> values(numValues);
+
+    if (version > 8) {
+        for (int i = 0; i < nValues; i++) {
+            values[i] = (double) readFloatFromFile(bufferin);
+        }
+    } else {
+        for (int i = 0; i < nValues; i++) {
+            values[i] = readDoubleFromFile(bufferin);
+        }
+    }
+
+    return values;
+}
+
+class MatrixZoomData {
+public:
+    bool isIntra;
+    string fileName;
+    int64_t myFilePos = 0LL;
+    vector<double> expectedValues;
+    bool foundFooter = false;
+    vector<double> c1Norm;
+    vector<double> c2Norm;
+    int32_t c1 = 0;
+    int32_t c2 = 0;
+    string matrixType;
+    string norm;
+    int32_t version = 0;
+    int32_t resolution = 0;
+    int32_t numBins1 = 0;
+    int32_t numBins2 = 0;
+    float sumCounts;
+    int32_t blockBinCount, blockColumnCount;
+    map<int32_t, indexEntry> blockMap;
+    double avgCount;
+
+    MatrixZoomData(const chromosome &chrom1, const chromosome &chrom2, const string &matrixType,
+                   const string &norm, const string &unit, int32_t resolution,
+                   int32_t &version, int64_t &master, int64_t &totalFileSize,
+                   const string &fileName) {
+        this->version = version;
+        this->fileName = fileName;
+        int32_t c01 = chrom1.index;
+        int32_t c02 = chrom2.index;
+        if (c01 <= c02) { // default is ok
+            this->c1 = c01;
+            this->c2 = c02;
+            this->numBins1 = static_cast<int32_t>(chrom1.length / resolution);
+            this->numBins2 = static_cast<int32_t>(chrom2.length / resolution);
+        } else { // flip
+            this->c1 = c02;
+            this->c2 = c01;
+            this->numBins1 = static_cast<int32_t>(chrom2.length / resolution);
+            this->numBins2 = static_cast<int32_t>(chrom1.length / resolution);
+        }
+        isIntra = c1 == c2;
+
+        this->matrixType = matrixType;
+        this->norm = norm;
+        this->resolution = resolution;
+
+        HiCFileStream *stream = new HiCFileStream(fileName);
+        indexEntry c1NormEntry{}, c2NormEntry{};
+
+        if (stream->isHttp) {
+            foundFooter = readFooterURL(stream->curl, master, version, c1, c2, matrixType, norm, unit,
+                                     resolution,
+                                     myFilePos,
+                                     c1NormEntry, c2NormEntry, expectedValues);
+        } else {
+            stream->fin.seekg(master, ios::beg);
+            foundFooter = readFooter(stream->fin, master, version, c1, c2, matrixType, norm,
+                                     unit,
+                                     resolution, myFilePos,
+                                     c1NormEntry, c2NormEntry, expectedValues);
+        }
+
+        if (!foundFooter) {
+            return;
+        }
+        stream->close();
+
+        if (norm != "NONE") {
+            c1Norm = readNormalizationVectorFromFooter(c1NormEntry, version, fileName);
+            if (isIntra) {
+                c2Norm = c1Norm;
+            } else {
+                c2Norm = readNormalizationVectorFromFooter(c2NormEntry, version, fileName);
+            }
+        }
+
+        HiCFileStream *stream2 = new HiCFileStream((fileName));
+        if (stream2->isHttp) {
+            // readMatrix will assign blockBinCount and blockColumnCount
+            blockMap = readMatrixHttp(stream2->curl, myFilePos, unit, resolution, sumCounts,
+                                      blockBinCount,
+                                      blockColumnCount);
+        } else {
+            // readMatrix will assign blockBinCount and blockColumnCount
+            blockMap = readMatrix(stream2->fin, myFilePos, unit, resolution, sumCounts,
+                                  blockBinCount,
+                                  blockColumnCount);
+        }
+        stream2->close();
+
+        if (!isIntra) {
+            avgCount = (sumCounts / numBins1) / numBins2;   // <= trying to avoid overflows
+        }
+    }
+
+    static vector<double> readNormalizationVectorFromFooter(indexEntry cNormEntry, int32_t &version,
+                                                            const string &fileName) {
+        if (cNormEntry.size == 0) {
+            // no normalization entries in this file.  Bow out now and avoid an allocation error
+            throw strawException("Trying to retrieve other normalization options, but none exist");
+        }
+        char *buffer = readCompressedBytesFromFile(fileName, cNormEntry);
+        memstream bufferin(buffer, cNormEntry.size);
+        vector<double> cNorm = readNormalizationVector(bufferin, version);
+        delete buffer;
+        return cNorm;
+    }
+
+    static bool isInRange(int32_t r, int32_t c, int32_t numRows, int32_t numCols) {
+        return 0 <= r && r < numRows && 0 <= c && c < numCols;
+    }
+
+    set<int32_t> getBlockNumbers(int64_t *regionIndices) const {
+        if (version > 8 && isIntra) {
+            return getBlockNumbersForRegionFromBinPositionV9Intra(regionIndices, blockBinCount, blockColumnCount);
+        } else {
+            return getBlockNumbersForRegionFromBinPosition(regionIndices, blockBinCount, blockColumnCount, isIntra);
+        }
+    }
+
+    vector<double> getNormVector(int32_t index) {
+        if (index == c1) {
+            return c1Norm;
+        } else if (index == c2) {
+            return c2Norm;
+        }
+        cerr << "Invalid index provided: " << index << endl;
+        cerr << "Should be either " << c1 << " or " << c2 << endl;
+        vector<double> v;
+        return v;
+    }
+
+    vector<double> getExpectedValues() {
+        return expectedValues;
+    }
+
+    vector<contactRecord> getRecords(int64_t gx0, int64_t gx1, int64_t gy0, int64_t gy1) {
+        if (!foundFooter) {
+            vector<contactRecord> v;
+            return v;
+        }
+        int64_t origRegionIndices[] = {gx0, gx1, gy0, gy1};
+        int64_t regionIndices[4];
+        convertGenomeToBinPos(origRegionIndices, regionIndices, resolution);
+
+        set<int32_t> blockNumbers = getBlockNumbers(regionIndices);
+        vector<contactRecord> records;
+        for (int32_t blockNumber : blockNumbers) {
+            // get contacts in this block
+            //cout << *it << " -- " << blockMap.size() << endl;
+            //cout << blockMap[*it].size << " " <<  blockMap[*it].position << endl;
+            vector<contactRecord> tmp_records = readBlock(fileName, blockMap[blockNumber], version);
+            for (contactRecord rec : tmp_records) {
+                int64_t x = rec.binX * resolution;
+                int64_t y = rec.binY * resolution;
+
+                if ((x >= origRegionIndices[0] && x <= origRegionIndices[1] &&
+                     y >= origRegionIndices[2] && y <= origRegionIndices[3]) ||
+                    // or check regions that overlap with lower left
+                    (isIntra && y >= origRegionIndices[0] && y <= origRegionIndices[1] && x >= origRegionIndices[2] &&
+                     x <= origRegionIndices[3])) {
+
+                    float c = rec.counts;
+                    if (norm != "NONE") {
+                        c = static_cast<float>(c / (c1Norm[rec.binX] * c2Norm[rec.binY]));
+                    }
+                    if (matrixType == "oe") {
+                        if (isIntra) {
+                            c = static_cast<float>(c / expectedValues[min(expectedValues.size() - 1,
+                                                                          (size_t) floor(abs(y - x) /
+                                                                                         resolution))]);
+                        } else {
+                            c = static_cast<float>(c / avgCount);
+                        }
+                    } else if (matrixType == "expected") {
+                        if (isIntra) {
+                            c = static_cast<float>(expectedValues[min(expectedValues.size() - 1,
+                                                                      (size_t) floor(abs(y - x) /
+                                                                                     resolution))]);
+                        } else {
+                            c = static_cast<float>(avgCount);
+                        }
+                    }
+
+                    if (!isnan(c) && !isinf(c)){
+                        contactRecord record = contactRecord();
+                        record.binX = static_cast<int32_t>(x);
+                        record.binY = static_cast<int32_t>(y);
+                        record.counts = c;
+                        records.push_back(record);
+                    }
+                }
+            }
+        }
+        return records;
+    }
+
+    vector<vector<float> > getRecordsAsMatrix(int64_t gx0, int64_t gx1, int64_t gy0, int64_t gy1) {
+        vector<contactRecord> records = this->getRecords(gx0, gx1, gy0, gy1);
+        if (records.empty()) {
+            vector<vector<float> > res = vector<vector<float> >(1, vector<float>(1, 0));
+            return res;
+        }
+
+        int64_t origRegionIndices[] = {gx0, gx1, gy0, gy1};
+        int64_t regionIndices[4];
+        convertGenomeToBinPos(origRegionIndices, regionIndices, resolution);
+
+        int64_t originR = regionIndices[0];
+        int64_t endR = regionIndices[1];
+        int64_t originC = regionIndices[2];
+        int64_t endC = regionIndices[3];
+        int32_t numRows = endR - originR + 1;
+        int32_t numCols = endC - originC + 1;
+        vector<vector<float> > matrix;
+        for (int32_t i = 0; i < numRows; i++) {
+            matrix.emplace_back(numCols, 0);
+        }
+
+        for (contactRecord cr : records) {
+            if (isnan(cr.counts) || isinf(cr.counts)) continue;
+            int32_t r = cr.binX / resolution - originR;
+            int32_t c = cr.binY / resolution - originC;
+            if (isInRange(r, c, numRows, numCols)) {
+                matrix[r][c] = cr.counts;
+            }
+            if (isIntra) {
+                r = cr.binY / resolution - originR;
+                c = cr.binX / resolution - originC;
+                if (isInRange(r, c, numRows, numCols)) {
+                    matrix[r][c] = cr.counts;
+                }
+            }
+        }
+        return matrix;
+    }
+
+    int64_t getNumberOfTotalRecords() {
+        if (!foundFooter) {
+            return 0;
+        }
+        int64_t regionIndices[4] = {0, numBins1, 0, numBins2};
+        set<int32_t> blockNumbers = getBlockNumbers(regionIndices);
+        int64_t total = 0;
+        for (int32_t blockNumber : blockNumbers) {
+            total += getNumRecordsInBlock(fileName, blockMap[blockNumber], version);
+        }
+        return total;
+    }
+};
+
+class HiCFile {
+public:
+    string prefix = "http"; // HTTP code
+    int64_t master = 0LL;
+    map<string, chromosome> chromosomeMap;
+    string genomeID;
+    int32_t numChromosomes = 0;
+    int32_t version = 0;
+    int64_t nviPosition = 0LL;
+    int64_t nviLength = 0LL;
+    vector<int32_t> resolutions;
+    static int64_t totalFileSize;
+    string fileName;
+    vector<string> attributes;
+
+    static size_t hdf(char *b, size_t size, size_t nitems, void *userdata) {
+        size_t numbytes = size * nitems;
+        b[numbytes + 1] = '\0';
+        string s(b);
+        int32_t found;
+        found = static_cast<int32_t>(s.find("content-range"));
+        if ((size_t) found == string::npos) {
+            found = static_cast<int32_t>(s.find("Content-Range"));
+        }
+        if ((size_t) found != string::npos) {
+            int32_t found2;
+            found2 = static_cast<int32_t>(s.find('/'));
+            //content-range: bytes 0-100000/891471462
+            if ((size_t) found2 != string::npos) {
+                string total = s.substr(found2 + 1);
+                totalFileSize = stol(total);
+            }
+        }
+
+        return numbytes;
+    }
+
+    static CURL *oneTimeInitCURL(const char *url) {
+        CURL *curl = initCURL(url);
+        curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, hdf);
+        curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
+        return curl;
+    }
+
+    explicit HiCFile(const string &fileName) {
+        this->fileName = fileName;
+
+        // read header into buffer; 100K should be sufficient
+        if (std::strncmp(fileName.c_str(), prefix.c_str(), prefix.size()) == 0) {
+            CURL *curl;
+            curl = oneTimeInitCURL(fileName.c_str());
+            char *buffer = getData(curl, 0, 100000);
+            memstream bufin(buffer, 100000);
+            chromosomeMap = readHeader(bufin, master, genomeID, numChromosomes,
+                                       version, nviPosition, nviLength, attributes);
+            resolutions = readResolutionsFromHeader(bufin);
+            curl_easy_cleanup(curl);
+            delete buffer;
+        } else {
+            ifstream fin;
+            fin.open(fileName, fstream::in | fstream::binary);
+            if (!fin) {
+                throw strawException("File " + fileName + " cannot be opened for reading");
+                //cerr << "File " << fileName << " cannot be opened for reading" << endl;
+                //exit(6);
+            }
+            chromosomeMap = readHeader(fin, master, genomeID, numChromosomes,
+                                       version, nviPosition, nviLength, attributes);
+            resolutions = readResolutionsFromHeader(fin);
+            fin.close();
+        }
+    }
+
+    string getGenomeID() const {
+        return genomeID;
+    }
+
+    vector<int32_t> getResolutions() const {
+        return resolutions;
+    }
+
+    vector<chromosome> getChromosomes() {
+        chromosome chromosomes[chromosomeMap.size()];
+        map<string, chromosome>::iterator iter = chromosomeMap.begin();
+        while (iter != chromosomeMap.end()) {
+            chromosome chrom = static_cast<chromosome>(iter->second);
+            chromosomes[chrom.index] = chrom;
+            iter++;
+        }
+
+        vector<chromosome> final_chromosomes;
+        final_chromosomes.reserve(chromosomeMap.size());
+        for(int32_t i = 0; i < chromosomeMap.size(); i++){
+            final_chromosomes.push_back(chromosomes[i]);
+        }
+        return final_chromosomes;
+    }
+
+    MatrixZoomData * getMatrixZoomData(const string &chr1, const string &chr2, const string &matrixType,
+                                       const string &norm, const string &unit, int32_t resolution) {
+        chromosome chrom1 = chromosomeMap[chr1];
+        chromosome chrom2 = chromosomeMap[chr2];
+        return new MatrixZoomData(chrom1, chrom2, (matrixType), (norm), (unit),
+                                  resolution, version, master, totalFileSize, fileName);
+    }
+};
+
+int64_t HiCFile::totalFileSize = 0LL;
+
+void parsePositions(const string &chrLoc, string &chrom, int64_t &pos1, int64_t &pos2, map<string, chromosome> map) {
+    string x, y;
+    stringstream ss(chrLoc);
+    getline(ss, chrom, ':');
+    if (map.count(chrom) == 0) {
+        throw strawException("chromosome " + chrom + " not found in the file.");
+        //cerr << "chromosome " << chrom << " not found in the file." << endl;
+        //exit(7);
+    }
+
+    if (getline(ss, x, ':') && getline(ss, y, ':')) {
+        pos1 = stol(x);
+        pos2 = stol(y);
+    } else {
+        pos1 = 0LL;
+        pos2 = map[chrom].length;
+    }
+}
+
+vector<contactRecord> straw(const string &matrixType, const string &norm, const string &fileName, const string &chr1loc,
+                            const string &chr2loc, const string &unit, int32_t binsize) {
+    if (!(unit == "BP" || unit == "FRAG")) {
+        cerr << "Norm specified incorrectly, must be one of <BP/FRAG>" << endl;
+        cerr << "Usage: straw [observed/oe/expected] <NONE/VC/VC_SQRT/KR> <hicFile(s)> <chr1>[:x1:x2] <chr2>[:y1:y2] <BP/FRAG> <binsize>"
+             << endl;
+        vector<contactRecord> v;
+        return v;
+    }
+
+    HiCFile *hiCFile = new HiCFile(fileName);
+    string chr1, chr2;
+    int64_t origRegionIndices[4] = {-100LL, -100LL, -100LL, -100LL};
+    parsePositions((chr1loc), chr1, origRegionIndices[0], origRegionIndices[1], hiCFile->chromosomeMap);
+    parsePositions((chr2loc), chr2, origRegionIndices[2], origRegionIndices[3], hiCFile->chromosomeMap);
+
+    if (hiCFile->chromosomeMap[chr1].index > hiCFile->chromosomeMap[chr2].index) {
+        MatrixZoomData *mzd = hiCFile->getMatrixZoomData(chr2, chr1, matrixType, norm, unit, binsize);
+        return mzd->getRecords(origRegionIndices[2], origRegionIndices[3], origRegionIndices[0], origRegionIndices[1]);
+    } else {
+        MatrixZoomData *mzd = hiCFile->getMatrixZoomData(chr1, chr2, matrixType, norm, unit, binsize);
+        return mzd->getRecords(origRegionIndices[0], origRegionIndices[1], origRegionIndices[2], origRegionIndices[3]);
+    }
+}
+
+vector<vector<float> > strawAsMatrix(const string &matrixType, const string &norm, const string &fileName, const string &chr1loc,
+                   const string &chr2loc, const string &unit, int32_t binsize) {
+    if (!(unit == "BP" || unit == "FRAG")) {
+        cerr << "Norm specified incorrectly, must be one of <BP/FRAG>" << endl;
+        cerr << "Usage: straw [observed/oe/expected] <NONE/VC/VC_SQRT/KR> <hicFile(s)> <chr1>[:x1:x2] <chr2>[:y1:y2] <BP/FRAG> <binsize>"
+             << endl;
+        vector<vector<float> > res = vector<vector<float> >(1, vector<float>(1, 0));
+        return res;
+    }
+
+    HiCFile *hiCFile = new HiCFile(fileName);
+    string chr1, chr2;
+    int64_t origRegionIndices[4] = {-100LL, -100LL, -100LL, -100LL};
+    parsePositions((chr1loc), chr1, origRegionIndices[0], origRegionIndices[1], hiCFile->chromosomeMap);
+    parsePositions((chr2loc), chr2, origRegionIndices[2], origRegionIndices[3], hiCFile->chromosomeMap);
+
+    if (hiCFile->chromosomeMap[chr1].index > hiCFile->chromosomeMap[chr2].index) {
+        MatrixZoomData *mzd = hiCFile->getMatrixZoomData(chr2, chr1, matrixType, norm, unit, binsize);
+        return mzd->getRecordsAsMatrix(origRegionIndices[2], origRegionIndices[3], origRegionIndices[0], origRegionIndices[1]);
+    } else {
+        MatrixZoomData *mzd = hiCFile->getMatrixZoomData(chr1, chr2, matrixType, norm, unit, binsize);
+        return mzd->getRecordsAsMatrix(origRegionIndices[0], origRegionIndices[1], origRegionIndices[2], origRegionIndices[3]);
+    }
+}
+
+int64_t getNumRecordsForFile(const string &fileName, int32_t binsize, bool interOnly) {
+    HiCFile *hiCFile = new HiCFile(fileName);
+    int64_t totalNumRecords = 0;
+
+    int32_t indexOffset = 0;
+    if (interOnly){
+        indexOffset = 1;
+    }
+
+    vector<chromosome> chromosomes = hiCFile->getChromosomes();
+    for(int32_t i = 0; i < chromosomes.size(); i++){
+        if(chromosomes[i].index <= 0) continue;
+        for(int32_t j = i + indexOffset; j < chromosomes.size(); j++){
+            if(chromosomes[j].index <= 0) continue;
+            MatrixZoomData *mzd;
+            if(chromosomes[i].index > chromosomes[j].index){
+                mzd = hiCFile->getMatrixZoomData(chromosomes[j].name, chromosomes[i].name, "observed", "NONE", "BP", binsize);
+            } else {
+                mzd = hiCFile->getMatrixZoomData(chromosomes[i].name, chromosomes[j].name, "observed", "NONE", "BP", binsize);
+            }
+            totalNumRecords += mzd->getNumberOfTotalRecords();
+        }
+    }
+
+    return totalNumRecords;
+}
+
+int64_t getNumRecordsForChromosomes(const string &fileName, int32_t binsize, bool interOnly) {
+    HiCFile *hiCFile = new HiCFile(fileName);
+    vector<chromosome> chromosomes = hiCFile->getChromosomes();
+    for(int32_t i = 0; i < chromosomes.size(); i++){
+        if(chromosomes[i].index <= 0) continue;
+        MatrixZoomData *mzd = hiCFile->getMatrixZoomData(chromosomes[i].name, chromosomes[i].name, "observed", "NONE", "BP", binsize);
+        int64_t totalNumRecords = mzd->getNumberOfTotalRecords();
+        cout << chromosomes[i].name << " " << totalNumRecords << " ";
+        cout << totalNumRecords*12/1000/1000/1000 << " GB" << endl;
+    }
+    return 0;
+}
+
+void getHeaderFields(const string &filename, string &genome, vector<string> &chromNames, vector<int> &chromSizes,
+        vector<int> &bpResolutions, vector<int> &fragResolutions, vector<string> &attributes)
+/* Fill in the provided fields with information from the header of the hic file in the supplied filename.
+ * fragResolutions is left empty for now, as we're not making use of it. */
+{
+    HiCFile *hiCFile = new HiCFile(filename);
+
+    genome.assign(hiCFile->getGenomeID());
+
+    vector<chromosome> chromList = hiCFile->getChromosomes();
+    vector<string> myChromNames;
+    myChromNames.reserve(chromList.size());
+    vector<int> myChromSizes;
+    myChromSizes.reserve(chromList.size());
+    for(int32_t i = 0; i < chromList.size(); i++){
+        myChromNames.push_back(chromList[i].name);
+        myChromSizes.push_back(chromList[i].length);
+    }
+    chromNames = myChromNames;
+    chromSizes = myChromSizes;
+
+    vector<int> myBpResolutions;
+    myBpResolutions.reserve(hiCFile->resolutions.size());
+    for(int32_t i = 0; i < hiCFile->resolutions.size(); i++){
+        myBpResolutions.push_back(hiCFile->resolutions[i]);
+    }
+    bpResolutions = myBpResolutions;
+
+    attributes = hiCFile->attributes;
+
+    // Ignoring fragment resolutions for now, since they're never being used here
+}
+
+set<string> getNormOptions()
+/* Return the set of normalization options that have been encountered through footer parsing.
+ * The result will be empty unless at least one straw() request has been made.
+ */
+{
+    return globalNormOptions;
+}