910378643087254ec5d8fffe664b4bf92f5e7361
jcasper
  Thu Jul 25 11:59:44 2024 -0700
Final fixup for the new straw library, refs #33225

diff --git src/hg/lib/straw/straw.cpp src/hg/lib/straw/straw.cpp
index c7428df..a8ad55f 100644
--- src/hg/lib/straw/straw.cpp
+++ src/hg/lib/straw/straw.cpp
@@ -1,1755 +1,1755 @@
 /*
   The MIT License (MIT)
 
   Copyright (c) 2017-2021 Aiden Lab, Rice University, Baylor College of Medicine
 
  Permission is hereby granted, free of charge, to any person obtaining a copy
  of this software and associated documentation files (the "Software"), to deal
  in the Software without restriction, including without limitation the rights
  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  copies of the Software, and to permit persons to whom the Software is
  furnished to do so, subject to the following conditions:
 
  The above copyright notice and this permission notice shall be included in
  all copies or substantial portions of the Software.
 
  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  THE SOFTWARE.
 */
 #include <cstring>
 #include <iostream>
 #include <fstream>
 #include <sstream>
 #include <map>
 #include <cmath>
 #include <set>
 #include <utility>
 #include <vector>
 #include <streambuf>
 //#include <curl/curl.h>
 #include <iterator>
 #include <algorithm>
 #include "zlib.h"
 //#include "straw.h"
-#include "new_straw.h"
+#include "straw.h"
 extern "C" {
 #include "../../inc/fakeCurl.h"
 }
 
 using namespace std;
 
 
 // Added UCSC: Dirty hack, but it's hard to quickly extract a list of available normalization options.
 // This gets populated by readFooter and readFooterURL, so you have to make a quick dummy data request
 // with straw() to set it up (e.g. to the first listed chromosome for a 1 bp window).
 set<string> globalNormOptions;
 
 /*
   Straw: fast C++ implementation of dump. Not as fully featured as the
   Java version. Reads the .hic file, finds the appropriate matrix and slice
   of data, and outputs as text in sparse upper triangular format.
 
   Currently only supporting matrices.
 
   Usage: straw [observed/oe/expected] <NONE/VC/VC_SQRT/KR> <hicFile(s)> <chr1>[:x1:x2] <chr2>[:y1:y2] <BP/FRAG> <binsize>
  */
 
 // callback for libcurl. data written to this buffer
 static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) {
     size_t realsize = size * nmemb;
     struct MemoryStruct *mem;
     mem = (struct MemoryStruct *) userp;
 
     mem->memory = static_cast<char *>(realloc(mem->memory, mem->size + realsize + 1));
     if (mem->memory == nullptr) {
         /* out of memory! */
         printf("not enough memory (realloc returned NULL)\n");
         return 0;
     }
 
     std::memcpy(&(mem->memory[mem->size]), contents, realsize);
     mem->size += realsize;
     mem->memory[mem->size] = 0;
 
     return realsize;
 }
 
 // get a buffer that can be used as an input stream from the URL
 char *getData(CURL *curl, int64_t position, int64_t chunksize) {
     std::ostringstream oss;
     struct MemoryStruct chunk{};
     chunk.memory = static_cast<char *>(malloc(1));
     chunk.size = 0;    /* no data at this point */
     oss << position << "-" << position + chunksize;
     curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *) &chunk);
     curl_easy_setopt(curl, CURLOPT_RANGE, oss.str().c_str());
     // Setting FAILONERROR because otherwise this library has a bad habit of assuming data exists
     // and parsing what it gets back, regardless of whether it has run off the end of the file.
     curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
     CURLcode res = curl_easy_perform(curl);
     if (res != CURLE_OK) {
         throw strawException("curl_easy_perform() failed: " + string(curl_easy_strerror(res)));
         //fprintf(stderr, "curl_easy_perform() failed: %s\n",
         //        curl_easy_strerror(res));
     }
 
     return chunk.memory;
 }
 
 bool readMagicString(istream &fin) {
     string str;
     getline(fin, str, '\0');
     return str[0] == 'H' && str[1] == 'I' && str[2] == 'C';
 }
 
 char readCharFromFile(istream &fin) {
     char tempChar;
     fin.read(&tempChar, sizeof(char));
     return tempChar;
 }
 
 int16_t readInt16FromFile(istream &fin) {
     int16_t tempInt16;
     fin.read((char *) &tempInt16, sizeof(int16_t));
     return tempInt16;
 }
 
 int32_t readInt32FromFile(istream &fin) {
     int32_t tempInt32;
     fin.read((char *) &tempInt32, sizeof(int32_t));
     return tempInt32;
 }
 
 int64_t readInt64FromFile(istream &fin) {
     int64_t tempInt64;
     fin.read((char *) &tempInt64, sizeof(int64_t));
     return tempInt64;
 }
 
 float readFloatFromFile(istream &fin) {
     float tempFloat;
     fin.read((char *) &tempFloat, sizeof(float));
     return tempFloat;
 }
 
 double readDoubleFromFile(istream &fin) {
     double tempDouble;
     fin.read((char *) &tempDouble, sizeof(double));
     return tempDouble;
 }
 
 void convertGenomeToBinPos(const int64_t origRegionIndices[4], int64_t regionIndices[4], int32_t resolution) {
     for (uint16_t q = 0; q < 4; q++) {
         // used to find the blocks we need to access
         regionIndices[q] = origRegionIndices[q] / resolution;
     }
 }
 
 static CURL *initCURL(const char *url) {
     CURL *curl = curl_easy_init();
     if (curl) {
         curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
         curl_easy_setopt(curl, CURLOPT_URL, url);
         curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
         curl_easy_setopt(curl, CURLOPT_USERAGENT, "straw");
         curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
     } else {
         throw strawException("Unable to initialize curl");
         //cerr << "Unable to initialize curl " << endl;
         //exit(2);
     }
     return curl;
 }
 
 class HiCFileStream {
 public:
     string prefix = "http"; // HTTP code
     ifstream fin;
     CURL *curl;
     bool isHttp = false;
 
     explicit HiCFileStream(const string &fileName) {
         if (std::strncmp(fileName.c_str(), prefix.c_str(), prefix.size()) == 0) {
             isHttp = true;
             curl = initCURL(fileName.c_str());
             if (!curl) {
                 throw strawException("URL " + fileName + " cannot be opened for reading");
                 //cerr << "URL " << fileName << " cannot be opened for reading" << endl;
                 //exit(3);
             }
         } else {
             fin.open(fileName, fstream::in | fstream::binary);
             if (!fin) {
                 throw strawException("File " + fileName + " cannot be opened for reading");
                 //cerr << "File " << fileName << " cannot be opened for reading" << endl;
                 //exit(4);
             }
         }
     }
 
     void close() {
         if (isHttp) {
             curl_easy_cleanup(curl);
         } else {
             fin.close();
         }
     }
 
     char *readCompressedBytes(indexEntry idx) {
         if (isHttp) {
             return getData(curl, idx.position, idx.size);
         } else {
             char *buffer = new char[idx.size];
             fin.seekg(idx.position, ios::beg);
             fin.read(buffer, idx.size);
             return buffer;
         }
     }
 };
 
 char *readCompressedBytesFromFile(const string &fileName, indexEntry idx) {
     HiCFileStream *stream = new HiCFileStream(fileName);
     char *compressedBytes = stream->readCompressedBytes(idx);
     stream->close();
     delete stream;
     return compressedBytes;
 }
 
 // reads the header, storing the positions of the normalization vectors and returning the masterIndexPosition pointer
 map<string, chromosome> readHeader(istream &fin, int64_t &masterIndexPosition, string &genomeID,
                                    int32_t &numChromosomes, int32_t &version, int64_t &nviPosition,
                                    int64_t &nviLength, vector<string> &attributes) {
     map<string, chromosome> chromosomeMap;
     if (!readMagicString(fin)) {
         throw strawException("Hi-C magic string is missing, does not appear to be a hic file");
         // Not sure why this function was trying to continue after this failure (the next step is
         // reading resolutions from the file, which clearly isn't going to work).
         //cerr << "Hi-C magic string is missing, does not appear to be a hic file" << endl;
         masterIndexPosition = -1;
         return chromosomeMap;
     }
 
     version = readInt32FromFile(fin);
     if (version < 6) {
         throw strawException("Version " + to_string(version) + " no longer supported");
         // Not sure why this function was trying to continue after this failure (the next step is
         // reading resolutions from the file, which clearly isn't going to work).
         //cerr << "Version " << version << " no longer supported" << endl;
         masterIndexPosition = -1;
         return chromosomeMap;
     }
     masterIndexPosition = readInt64FromFile(fin);
     getline(fin, genomeID, '\0');
 
     if (version > 8) {
         nviPosition = readInt64FromFile(fin);
         nviLength = readInt64FromFile(fin);
     }
 
     int32_t nattributes = readInt32FromFile(fin);
 
     // reading attribute-value dictionary
     for (int i = 0; i < nattributes; i++) {
         string key, value;
         getline(fin, key, '\0');
         getline(fin, value, '\0');
         attributes.insert(attributes.end(), key);
         attributes.insert(attributes.end(), value);
     }
 
     numChromosomes = readInt32FromFile(fin);
     // chromosome map for finding matrixType
     for (int i = 0; i < numChromosomes; i++) {
         string name;
         int64_t length;
         getline(fin, name, '\0');
         if (version > 8) {
             length = readInt64FromFile(fin);
         } else {
             length = (int64_t) readInt32FromFile(fin);
         }
 
         chromosome chr;
         chr.index = i;
         chr.name = name;
         chr.length = length;
         chromosomeMap[name] = chr;
     }
     return chromosomeMap;
 }
 
 vector<int32_t> readResolutionsFromHeader(istream &fin) {
     int numBpResolutions = readInt32FromFile(fin);
     vector<int32_t> resolutions;
     for (int i = 0; i < numBpResolutions; i++) {
         int32_t res = readInt32FromFile(fin);
         resolutions.push_back(res);
     }
     return resolutions;
 }
 
 //https://www.techiedelight.com/get-slice-sub-vector-from-vector-cpp/
 vector<double> sliceVector(vector<double> &v, int64_t m, int64_t n) {
     vector<double> vec;
     copy(v.begin() + m, v.begin() + n + 1, back_inserter(vec));
     return vec;
 }
 
 // assume always an odd number for length of vector;
 // eve if even, this calculation should be close enough
 double getMedian(vector<double> &v) {
     size_t n = v.size() / 2;
     nth_element(v.begin(), v.begin() + n, v.end());
     return v[n];
 }
 
 void rollingMedian(vector<double> &initialValues, vector<double> &finalResult, int32_t window) {
     // window is actually a ~wing-span
     if (window < 1) {
         finalResult = initialValues;
         return;
     }
 
     /*
     finalResult.push_back(initialValues[0]);
     int64_t length = initialValues.size();
     for (int64_t index = 1; index < length; index++) {
         int64_t initialIndex;
         int64_t finalIndex;
         if (index < window) {
             initialIndex = 0;
             finalIndex = 2 * index;
         } else {
             initialIndex = index - window;
             finalIndex = index + window;
         }
 
         if (finalIndex > length - 1) {
             finalIndex = length - 1;
         }
 
         vector<double> subVector = sliceVector(initialValues, initialIndex, finalIndex);
         finalResult.push_back(getMedian(subVector));
     }
     */
     finalResult = initialValues;
 }
 
 void populateVectorWithFloats(istream &fin, vector<double> &vector, int64_t nValues) {
     for (int j = 0; j < nValues; j++) {
         double v = readFloatFromFile(fin);
         vector.push_back(v);
     }
 }
 
 void populateVectorWithDoubles(istream &fin, vector<double> &vector, int64_t nValues) {
     for (int j = 0; j < nValues; j++) {
         double v = readDoubleFromFile(fin);
         vector.push_back(v);
     }
 }
 
 int64_t readThroughExpectedVectorURL(CURL *curl, int64_t currentPointer, int32_t version, vector<double> &expectedValues, int64_t nValues,
                                bool store, int32_t resolution) {
     if (store) {
         int32_t bufferSize = nValues * sizeof(double) + 10000;
         if (version > 8) {
             bufferSize = nValues * sizeof(float) + 10000;
         }
         char *buffer = getData(curl, currentPointer, bufferSize);
         memstream fin(buffer, bufferSize);
 
         vector<double> initialExpectedValues;
         if (version > 8) {
             populateVectorWithFloats(fin, initialExpectedValues, nValues);
         } else {
             populateVectorWithDoubles(fin, initialExpectedValues, nValues);
         }
         int32_t window = 5000000 / resolution;
         rollingMedian(initialExpectedValues, expectedValues, window);
         delete buffer;
     }
 
     if (version > 8) {
         return nValues * sizeof(float);
     } else {
         return nValues * sizeof(double);
     }
 }
 
 void readThroughExpectedVector(int32_t version, istream &fin, vector<double> &expectedValues, int64_t nValues,
                                bool store, int32_t resolution) {
     if (store) {
         vector<double> initialExpectedValues;
         if (version > 8) {
             populateVectorWithFloats(fin, initialExpectedValues, nValues);
         } else {
             populateVectorWithDoubles(fin, initialExpectedValues, nValues);
         }
         int32_t window = 5000000 / resolution;
         rollingMedian(initialExpectedValues, expectedValues, window);
     } else if (nValues > 0) {
         if (version > 8) {
             fin.seekg(nValues * sizeof(float), ios_base::cur);
         } else {
             fin.seekg(nValues * sizeof(double), ios_base::cur);
         }
     }
 }
 
 int64_t readThroughNormalizationFactorsURL(CURL *curl, int64_t currentPointer, int32_t version, bool store, vector<double> &expectedValues,
                                      int32_t c1, int32_t nNormalizationFactors) {
 
     if (store) {
         int32_t bufferSize = nNormalizationFactors * (sizeof(int32_t) + sizeof(double)) + 10000;
         if (version > 8) {
             bufferSize = nNormalizationFactors * (sizeof(int32_t) + sizeof(float )) + 10000;
         }
         char *buffer = getData(curl, currentPointer, bufferSize);
         memstream fin(buffer, bufferSize);
 
         for (int j = 0; j < nNormalizationFactors; j++) {
             int32_t chrIdx = readInt32FromFile(fin);
             double v;
             if (version > 8) {
                 v = readFloatFromFile(fin);
             } else {
                 v = readDoubleFromFile(fin);
             }
             if (chrIdx == c1) {
                 for (double &expectedValue : expectedValues) {
                     expectedValue = expectedValue / v;
                 }
             }
         }
         delete buffer;
     }
 
     if (version > 8) {
         return nNormalizationFactors * (sizeof(int32_t) + sizeof(float));
     } else {
         return nNormalizationFactors * (sizeof(int32_t) + sizeof(double));
     }
 }
 
 void readThroughNormalizationFactors(istream &fin, int32_t version, bool store, vector<double> &expectedValues,
                                      int32_t c1) {
     int32_t nNormalizationFactors = readInt32FromFile(fin);
     if (store) {
         for (int j = 0; j < nNormalizationFactors; j++) {
             int32_t chrIdx = readInt32FromFile(fin);
             double v;
             if (version > 8) {
                 v = readFloatFromFile(fin);
             } else {
                 v = readDoubleFromFile(fin);
             }
             if (chrIdx == c1) {
                 for (double &expectedValue : expectedValues) {
                     expectedValue = expectedValue / v;
                 }
             }
         }
     } else if (nNormalizationFactors > 0) {
         if (version > 8) {
             fin.seekg(nNormalizationFactors * (sizeof(int32_t) + sizeof(float)), ios_base::cur);
         } else {
             fin.seekg(nNormalizationFactors * (sizeof(int32_t) + sizeof(double)), ios_base::cur);
         }
     }
 }
 
 int64_t readStringFromURL(istream &fin, string &basicString) {
     getline(fin, basicString, '\0');
     return (basicString.length() + 1);
 }
 
 // reads the footer from the master pointer location. takes in the chromosomes,
 // norm, unit (BP or FRAG) and resolution or binsize, and sets the file
 // position of the matrix and the normalization vectors for those chromosomes
 // at the given normalization and resolution
 bool readFooterURL(CURL *curl, int64_t master, int32_t version, int32_t c1, int32_t c2, const string &matrixType,
                 const string &norm, const string &unit, int32_t resolution, int64_t &myFilePos,
                 indexEntry &c1NormEntry, indexEntry &c2NormEntry, vector<double> &expectedValues) {
 
     int64_t currentPointer = master;
 
     char *buffer = getData(curl, currentPointer, 100);
     memstream fin1(buffer, 100);
 
     if (version > 8) {
         int64_t nBytes = readInt64FromFile(fin1);
         currentPointer += 8;
     } else {
         int32_t nBytes = readInt32FromFile(fin1);
         currentPointer += 4;
     }
 
     stringstream ss;
     ss << c1 << "_" << c2;
     string key = ss.str();
 
     int32_t nEntries = readInt32FromFile(fin1);
     currentPointer += 4;
     delete buffer;
 
     int32_t bufferSize0 = nEntries * 50;
     buffer = getData(curl, currentPointer, bufferSize0);
     memstream fin2(buffer, bufferSize0);
 
     bool found = false;
     for (int i = 0; i < nEntries; i++) {
         string keyStr;
         currentPointer += readStringFromURL(fin2, keyStr);
 
         int64_t fpos = readInt64FromFile(fin2);
         int32_t sizeinbytes = readInt32FromFile(fin2);
         currentPointer += 12;
         if (keyStr == key) {
             myFilePos = fpos;
             found = true;
         }
     }
     delete buffer;
     if (!found) {
         cerr << "Remote file doesn't have the given chr_chr map " << key << endl;
         return false;
     }
 
     if ((matrixType == "observed" && norm == "NONE") ||
         ((matrixType == "oe" || matrixType == "expected") && norm == "NONE" && c1 != c2))
         return true; // no need to read norm vector index
 
     // read in and ignore expected value maps; don't store; reading these to
     // get to norm vector index
     buffer = getData(curl, currentPointer, 100);
     memstream fin3(buffer, 100);
 
     int32_t nExpectedValues = readInt32FromFile(fin3);
     if (fin3.bad() || fin3.fail())
     {
         throw strawException("Unable to read expected values from file - it might be truncated early");
     }
     currentPointer += 4;
     delete buffer;
     for (int i = 0; i < nExpectedValues; i++) {
 
         buffer = getData(curl, currentPointer, 1000);
         memstream fin4(buffer, 1000);
 
         string unit0;
         currentPointer += readStringFromURL(fin4, unit0);
 
         int32_t binSize = readInt32FromFile(fin4);
         currentPointer += 4;
 
         int64_t nValues;
         if (version > 8) {
             nValues = readInt64FromFile(fin4);
             currentPointer += 8;
         } else {
             nValues = (int64_t) readInt32FromFile(fin4);
             currentPointer += 4;
         }
 
         delete buffer;
 
         bool store = c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm == "NONE" && unit0 == unit &&
                      binSize == resolution;
 
         currentPointer += readThroughExpectedVectorURL(curl, currentPointer, version, expectedValues, nValues, store, resolution);
 
         buffer = getData(curl, currentPointer, 100);
         memstream fin5(buffer, 100);
         int32_t nNormalizationFactors = readInt32FromFile(fin5);
         currentPointer += 4;
         delete buffer;
 
         currentPointer += readThroughNormalizationFactorsURL(curl, currentPointer, version, store, expectedValues, c1, nNormalizationFactors);
     }
 
     if (c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm == "NONE") {
         if (expectedValues.empty()) {
             cerr << "Remote file did not contain expected values vectors at " << resolution << " " << unit << endl;
             return false;
         }
         return true;
     }
 
     buffer = getData(curl, currentPointer, 100);
     memstream fin6(buffer, 100);
     nExpectedValues = readInt32FromFile(fin6);
     if (fin6.bad() || fin6.fail())
     {
         throw strawException("Unable to read normalized expected values from file - it might be truncated early");
     }
     currentPointer += 4;
     delete buffer;
     for (int i = 0; i < nExpectedValues; i++) {
         buffer = getData(curl, currentPointer, 1000);
         memstream fin7(buffer, 1000);
 
         string nType, unit0;
         currentPointer += readStringFromURL(fin7, nType);
         currentPointer += readStringFromURL(fin7, unit0);
 
         int32_t binSize = readInt32FromFile(fin7);
         currentPointer += 4;
 
         int64_t nValues;
         if (version > 8) {
             nValues = readInt64FromFile(fin7);
             currentPointer += 8;
         } else {
             nValues = (int64_t) readInt32FromFile(fin7);
             currentPointer += 4;
         }
         bool store = c1 == c2 && (matrixType == "oe" || matrixType == "expected") && nType == norm && unit0 == unit &&
                      binSize == resolution;
 
         delete buffer;
 
         currentPointer += readThroughExpectedVectorURL(curl, currentPointer, version, expectedValues, nValues, store, resolution);
 
         buffer = getData(curl, currentPointer, 100);
         memstream fin8(buffer, 100);
         int32_t nNormalizationFactors = readInt32FromFile(fin8);
         currentPointer += 4;
         delete buffer;
 
         currentPointer += readThroughNormalizationFactorsURL(curl, currentPointer, version, store, expectedValues, c1, nNormalizationFactors);
     }
 
     if (c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm != "NONE") {
         if (expectedValues.empty()) {
             cerr << "Remote file did not contain normalized expected values vectors at " << resolution << " " << unit << endl;
             return false;
         }
     }
 
     buffer = getData(curl, currentPointer, 100);
     memstream fin9(buffer, 100);
     nEntries = readInt32FromFile(fin9);
     if (fin9.bad() || fin9.fail())
     {
         throw strawException("Unable to read normalization vectors from file - it might be truncated early");
     }
     currentPointer += 4;
     delete buffer;
 
     bool found1 = false;
     bool found2 = false;
     int32_t bufferSize2 = nEntries*60;
     buffer = getData(curl, currentPointer, bufferSize2);
     memstream fin10(buffer, bufferSize2);
 
     // added UCSC
     globalNormOptions.clear();
 
     for (int i = 0; i < nEntries; i++) {
         string normtype;
         currentPointer += readStringFromURL(fin10, normtype);
 
         // added UCSC
         globalNormOptions.insert(normtype);
 
         int32_t chrIdx = readInt32FromFile(fin10);
         currentPointer += 4;
         string unit1;
         currentPointer += readStringFromURL(fin10, unit1);
 
         int32_t resolution1 = readInt32FromFile(fin10);
         int64_t filePosition = readInt64FromFile(fin10);
         currentPointer += 12;
 
         int64_t sizeInBytes;
         if (version > 8) {
             sizeInBytes = readInt64FromFile(fin10);
             currentPointer += 8;
         } else {
             sizeInBytes = (int64_t) readInt32FromFile(fin10);
             currentPointer += 4;
         }
 
         if (chrIdx == c1 && normtype == norm && unit1 == unit && resolution1 == resolution) {
             c1NormEntry.position = filePosition;
             c1NormEntry.size = sizeInBytes;
             found1 = true;
         }
         if (chrIdx == c2 && normtype == norm && unit1 == unit && resolution1 == resolution) {
             c2NormEntry.position = filePosition;
             c2NormEntry.size = sizeInBytes;
             found2 = true;
         }
     }
     delete buffer;
     if (!found1 || !found2) {
         cerr << "Remote file did not contain " << norm << " normalization vectors for one or both chromosomes at "
              << resolution << " " << unit << endl;
     }
     return true;
 }
 
 bool readFooter(istream &fin, int64_t master, int32_t version, int32_t c1, int32_t c2, const string &matrixType,
                 const string &norm, const string &unit, int32_t resolution, int64_t &myFilePos,
                 indexEntry &c1NormEntry, indexEntry &c2NormEntry, vector<double> &expectedValues) {
 
     if (version > 8) {
         int64_t nBytes = readInt64FromFile(fin);
     } else {
         int32_t nBytes = readInt32FromFile(fin);
     }
 
     stringstream ss;
     ss << c1 << "_" << c2;
     string key = ss.str();
 
     int32_t nEntries = readInt32FromFile(fin);
     bool found = false;
     for (int i = 0; i < nEntries; i++) {
         string keyStr;
         getline(fin, keyStr, '\0');
         int64_t fpos = readInt64FromFile(fin);
         int32_t sizeinbytes = readInt32FromFile(fin);
         if (keyStr == key) {
             myFilePos = fpos;
             found = true;
         }
     }
     if (!found) {
         cerr << "File doesn't have the given chr_chr map " << key << endl;
         return false;
     }
 
     if ((matrixType == "observed" && norm == "NONE") ||
         ((matrixType == "oe" || matrixType == "expected") && norm == "NONE" && c1 != c2))
         return true; // no need to read norm vector index
 
     // read in and ignore expected value maps; don't store; reading these to
     // get to norm vector index
     int32_t nExpectedValues = readInt32FromFile(fin);
     if (fin.bad() || fin.fail())
     {
         throw strawException("Unable to read expected values from file - it might be truncated early");
     }
     for (int i = 0; i < nExpectedValues; i++) {
         string unit0;
         getline(fin, unit0, '\0'); //unit
         int32_t binSize = readInt32FromFile(fin);
 
         int64_t nValues;
         if (version > 8) {
             nValues = readInt64FromFile(fin);
         } else {
             nValues = (int64_t) readInt32FromFile(fin);
         }
 
         bool store = c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm == "NONE" && unit0 == unit &&
                      binSize == resolution;
         readThroughExpectedVector(version, fin, expectedValues, nValues, store, resolution);
         readThroughNormalizationFactors(fin, version, store, expectedValues, c1);
     }
 
     if (c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm == "NONE") {
         if (expectedValues.empty()) {
             cerr << "File did not contain expected values vectors at " << resolution << " " << unit << endl;
             return false;
         }
         return true;
     }
 
     nExpectedValues = readInt32FromFile(fin);
     if (fin.bad() || fin.fail())
     {
         throw strawException("Unable to read normalized expected values from file - it might be truncated early");
     }
 
     for (int i = 0; i < nExpectedValues; i++) {
         string nType, unit0;
         getline(fin, nType, '\0'); //typeString
         getline(fin, unit0, '\0'); //unit
         int32_t binSize = readInt32FromFile(fin);
         int64_t nValues;
         if (version > 8) {
             nValues = readInt64FromFile(fin);
         } else {
             nValues = (int64_t) readInt32FromFile(fin);
         }
         bool store = c1 == c2 && (matrixType == "oe" || matrixType == "expected") && nType == norm && unit0 == unit &&
                      binSize == resolution;
         readThroughExpectedVector(version, fin, expectedValues, nValues, store, resolution);
         readThroughNormalizationFactors(fin, version, store, expectedValues, c1);
     }
 
     if (c1 == c2 && (matrixType == "oe" || matrixType == "expected") && norm != "NONE") {
         if (expectedValues.empty()) {
             cerr << "File did not contain normalized expected values vectors at " << resolution << " " << unit << endl;
             return false;
         }
     }
 
     // Index of normalization vectors
     nEntries = readInt32FromFile(fin);
     bool found1 = false;
     bool found2 = false;
 
     // added UCSC
     globalNormOptions.clear();
     if (fin.bad() || fin.fail())
     {
         throw strawException("Unable to read normalization vectors from file - it might be truncated early");
     }
 
     for (int i = 0; i < nEntries; i++) {
         string normtype;
         getline(fin, normtype, '\0'); //normalization type
         int32_t chrIdx = readInt32FromFile(fin);
         string unit1;
         getline(fin, unit1, '\0'); //unit
         int32_t resolution1 = readInt32FromFile(fin);
         int64_t filePosition = readInt64FromFile(fin);
         int64_t sizeInBytes;
         if (version > 8) {
             sizeInBytes = readInt64FromFile(fin);
         } else {
             sizeInBytes = (int64_t) readInt32FromFile(fin);
         }
 
         // added UCSC
         globalNormOptions.insert(normtype);
 
         if (chrIdx == c1 && normtype == norm && unit1 == unit && resolution1 == resolution) {
             c1NormEntry.position = filePosition;
             c1NormEntry.size = sizeInBytes;
             found1 = true;
         }
         if (chrIdx == c2 && normtype == norm && unit1 == unit && resolution1 == resolution) {
             c2NormEntry.position = filePosition;
             c2NormEntry.size = sizeInBytes;
             found2 = true;
         }
     }
     if (!found1 || !found2) {
         cerr << "File did not contain " << norm << " normalization vectors for one or both chromosomes at "
              << resolution << " " << unit << endl;
     }
     return true;
 }
 
 indexEntry readIndexEntry(istream &fin) {
     int64_t filePosition = readInt64FromFile(fin);
     int32_t blockSizeInBytes = readInt32FromFile(fin);
     indexEntry entry = indexEntry();
     entry.size = (int64_t) blockSizeInBytes;
     entry.position = filePosition;
     return entry;
 }
 
 void setValuesForMZD(istream &fin, const string &myunit, float &mySumCounts, int32_t &mybinsize,
                      int32_t &myBlockBinCount, int32_t &myBlockColumnCount, bool &found) {
     string unit;
     getline(fin, unit, '\0'); // unit
     readInt32FromFile(fin); // Old "zoom" index -- not used
     float sumCounts = readFloatFromFile(fin); // sumCounts
     readFloatFromFile(fin); // occupiedCellCount
     readFloatFromFile(fin); // stdDev
     readFloatFromFile(fin); // percent95
     int32_t binSize = readInt32FromFile(fin);
     int32_t blockBinCount = readInt32FromFile(fin);
     int32_t blockColumnCount = readInt32FromFile(fin);
     found = false;
     if (myunit == unit && mybinsize == binSize) {
         mySumCounts = sumCounts;
         myBlockBinCount = blockBinCount;
         myBlockColumnCount = blockColumnCount;
         found = true;
     }
 }
 
 void populateBlockMap(istream &fin, int32_t nBlocks, map<int32_t, indexEntry> &blockMap) {
     for (int b = 0; b < nBlocks; b++) {
         int32_t blockNumber = readInt32FromFile(fin);
         blockMap[blockNumber] = readIndexEntry(fin);
     }
 }
 
 // reads the raw binned contact matrix at specified resolution, setting the block bin count and block column count
 map<int32_t, indexEntry> readMatrixZoomData(istream &fin, const string &myunit, int32_t mybinsize, float &mySumCounts,
                                             int32_t &myBlockBinCount, int32_t &myBlockColumnCount, bool &found) {
 
     map<int32_t, indexEntry> blockMap;
     setValuesForMZD(fin, myunit, mySumCounts, mybinsize, myBlockBinCount, myBlockColumnCount, found);
 
     int32_t nBlocks = readInt32FromFile(fin);
     if (found) {
         populateBlockMap(fin, nBlocks, blockMap);
     } else {
         fin.seekg(nBlocks * (sizeof(int32_t) + sizeof(int64_t) + sizeof(int32_t)), ios_base::cur);
     }
     return blockMap;
 }
 
 // reads the raw binned contact matrix at specified resolution, setting the block bin count and block column count
 map<int32_t, indexEntry> readMatrixZoomDataHttp(CURL *curl, int64_t &myFilePosition, const string &myunit,
                                                 int32_t mybinsize, float &mySumCounts, int32_t &myBlockBinCount,
                                                 int32_t &myBlockColumnCount, bool &found) {
     map<int32_t, indexEntry> blockMap;
     int32_t header_size = 5 * sizeof(int32_t) + 4 * sizeof(float);
     char *first = getData(curl, myFilePosition, 1);
     if (first[0] == 'B') {
         header_size += 3;
     } else if (first[0] == 'F') {
         header_size += 5;
     } else {
         cerr << "Unit not understood" << endl;
         return blockMap;
     }
     delete first;
     char *buffer = getData(curl, myFilePosition, header_size);
     memstream fin(buffer, header_size);
     setValuesForMZD(fin, myunit, mySumCounts, mybinsize, myBlockBinCount, myBlockColumnCount, found);
     int32_t nBlocks = readInt32FromFile(fin);
     delete buffer;
 
     if (found) {
         int32_t chunkSize = nBlocks * (sizeof(int32_t) + sizeof(int64_t) + sizeof(int32_t));
         buffer = getData(curl, myFilePosition + header_size, chunkSize);
         memstream fin2(buffer, chunkSize);
         populateBlockMap(fin2, nBlocks, blockMap);
         delete buffer;
     } else {
         myFilePosition = myFilePosition + header_size
                          + (nBlocks * (sizeof(int32_t) + sizeof(int64_t) + sizeof(int32_t)));
     }
     return blockMap;
 }
 
 // goes to the specified file pointer in http and finds the raw contact matrixType at specified resolution, calling readMatrixZoomData.
 // sets blockbincount and blockcolumncount
 map<int32_t, indexEntry> readMatrixHttp(CURL *curl, int64_t myFilePosition, const string &unit, int32_t resolution,
                                         float &mySumCounts, int32_t &myBlockBinCount, int32_t &myBlockColumnCount) {
     int32_t size = sizeof(int32_t) * 3;
     char *buffer = getData(curl, myFilePosition, size);
     memstream bufin(buffer, size);
 
     int32_t c1 = readInt32FromFile(bufin);
     int32_t c2 = readInt32FromFile(bufin);
     int32_t nRes = readInt32FromFile(bufin);
     int32_t i = 0;
     bool found = false;
     myFilePosition = myFilePosition + size;
     delete buffer;
     map<int32_t, indexEntry> blockMap;
 
     while (i < nRes && !found) {
         // myFilePosition gets updated within call
         blockMap = readMatrixZoomDataHttp(curl, myFilePosition, unit, resolution, mySumCounts, myBlockBinCount,
                                           myBlockColumnCount, found);
         i++;
     }
     if (!found) {
         cerr << "Error finding block data" << endl;
     }
     return blockMap;
 }
 
 // goes to the specified file pointer and finds the raw contact matrixType at specified resolution, calling readMatrixZoomData.
 // sets blockbincount and blockcolumncount
 map<int32_t, indexEntry> readMatrix(istream &fin, int64_t myFilePosition, const string &unit, int32_t resolution,
                                     float &mySumCounts, int32_t &myBlockBinCount, int32_t &myBlockColumnCount) {
     map<int32_t, indexEntry> blockMap;
 
     fin.seekg(myFilePosition, ios::beg);
     int32_t c1 = readInt32FromFile(fin);
     int32_t c2 = readInt32FromFile(fin);
     int32_t nRes = readInt32FromFile(fin);
     int32_t i = 0;
     bool found = false;
     while (i < nRes && !found) {
         blockMap = readMatrixZoomData(fin, unit, resolution, mySumCounts, myBlockBinCount, myBlockColumnCount, found);
         i++;
     }
     if (!found) {
         cerr << "Error finding block data" << endl;
     }
     return blockMap;
 }
 
 // gets the blocks that need to be read for this slice of the data.  needs blockbincount, blockcolumncount, and whether
 // or not this is intrachromosomal.
 set<int32_t> getBlockNumbersForRegionFromBinPosition(const int64_t *regionIndices, int32_t blockBinCount,
                                                      int32_t blockColumnCount, bool intra) {
     int32_t col1, col2, row1, row2;
     col1 = static_cast<int32_t>(regionIndices[0] / blockBinCount);
     col2 = static_cast<int32_t>((regionIndices[1] + 1) / blockBinCount);
     row1 = static_cast<int32_t>(regionIndices[2] / blockBinCount);
     row2 = static_cast<int32_t>((regionIndices[3] + 1) / blockBinCount);
 
     set<int32_t> blocksSet;
     // first check the upper triangular matrixType
     for (int r = row1; r <= row2; r++) {
         for (int c = col1; c <= col2; c++) {
             int32_t blockNumber = r * blockColumnCount + c;
             blocksSet.insert(blockNumber);
         }
     }
     // check region part that overlaps with lower left triangle but only if intrachromosomal
     if (intra) {
         for (int r = col1; r <= col2; r++) {
             for (int c = row1; c <= row2; c++) {
                 int32_t blockNumber = r * blockColumnCount + c;
                 blocksSet.insert(blockNumber);
             }
         }
     }
     return blocksSet;
 }
 
 set<int32_t> getBlockNumbersForRegionFromBinPositionV9Intra(int64_t *regionIndices, int32_t blockBinCount,
                                                             int32_t blockColumnCount) {
     // regionIndices is binX1 binX2 binY1 binY2
     set<int32_t> blocksSet;
     int32_t translatedLowerPAD, translatedHigherPAD, translatedNearerDepth, translatedFurtherDepth;
     translatedLowerPAD = static_cast<int32_t>((regionIndices[0] + regionIndices[2]) / 2 / blockBinCount);
     translatedHigherPAD = static_cast<int32_t>((regionIndices[1] + regionIndices[3]) / 2 / blockBinCount + 1);
     translatedNearerDepth = static_cast<int32_t>(log2(
             1 + abs(regionIndices[0] - regionIndices[3]) / sqrt(2) / blockBinCount));
     translatedFurtherDepth = static_cast<int32_t>(log2(
             1 + abs(regionIndices[1] - regionIndices[2]) / sqrt(2) / blockBinCount));
 
     // because code above assume above diagonal; but we could be below diagonal
     int32_t nearerDepth = min(translatedNearerDepth, translatedFurtherDepth);
     if ((regionIndices[0] > regionIndices[3] && regionIndices[1] < regionIndices[2]) ||
         (regionIndices[1] > regionIndices[2] && regionIndices[0] < regionIndices[3])) {
         nearerDepth = 0;
     }
     int32_t furtherDepth = max(translatedNearerDepth, translatedFurtherDepth) + 1; // +1; integer divide rounds down
 
     for (int depth = nearerDepth; depth <= furtherDepth; depth++) {
         for (int pad = translatedLowerPAD; pad <= translatedHigherPAD; pad++) {
             int32_t blockNumber = depth * blockColumnCount + pad;
             blocksSet.insert(blockNumber);
         }
     }
 
     return blocksSet;
 }
 
 void appendRecord(vector<contactRecord> &vector, int32_t index, int32_t binX, int32_t binY, float counts) {
     contactRecord record = contactRecord();
     record.binX = binX;
     record.binY = binY;
     record.counts = counts;
     vector[index] = record;
 }
 
 int32_t decompressBlock(indexEntry idx, char *compressedBytes, char *uncompressedBytes) {
     z_stream infstream;
     infstream.zalloc = Z_NULL;
     infstream.zfree = Z_NULL;
     infstream.opaque = Z_NULL;
     infstream.avail_in = static_cast<uInt>(idx.size); // size of input
     infstream.next_in = (Bytef *) compressedBytes; // input char array
     infstream.avail_out = static_cast<uInt>(idx.size * 10); // size of output
     infstream.next_out = (Bytef *) uncompressedBytes; // output char array
     // the actual decompression work.
     inflateInit(&infstream);
     inflate(&infstream, Z_NO_FLUSH);
     inflateEnd(&infstream);
     int32_t uncompressedSize = static_cast<int32_t>(infstream.total_out);
     return uncompressedSize;
 }
 
 long getNumRecordsInBlock(const string &fileName, indexEntry idx, int32_t version){
     if (idx.size <= 0) {
         return 0;
     }
     char *compressedBytes = readCompressedBytesFromFile(fileName, idx);
     char *uncompressedBytes = new char[idx.size * 10]; //biggest seen so far is 3
     int32_t uncompressedSize = decompressBlock(idx, compressedBytes, uncompressedBytes);
 
     // create stream from buffer for ease of use
     memstream bufferin(uncompressedBytes, uncompressedSize);
     uint64_t nRecords;
     nRecords = static_cast<uint64_t>(readInt32FromFile(bufferin));
     delete[] compressedBytes;
     delete[] uncompressedBytes; // don't forget to delete your heap arrays in C++!
     return nRecords;
 }
 
 // this is the meat of reading the data.  takes in the block number and returns the set of contact records corresponding to
 // that block.  the block data is compressed and must be decompressed using the zlib library functions
 vector<contactRecord> readBlock(const string &fileName, indexEntry idx, int32_t version) {
     if (idx.size <= 0) {
         vector<contactRecord> v;
         return v;
     }
     char *compressedBytes = readCompressedBytesFromFile(fileName, idx);
     char *uncompressedBytes = new char[idx.size * 10]; //biggest seen so far is 3
     int32_t uncompressedSize = decompressBlock(idx, compressedBytes, uncompressedBytes);
 
     // create stream from buffer for ease of use
     memstream bufferin(uncompressedBytes, uncompressedSize);
     uint64_t nRecords;
     nRecords = static_cast<uint64_t>(readInt32FromFile(bufferin));
     vector<contactRecord> v(nRecords);
     // different versions have different specific formats
     if (version < 7) {
         for (uInt i = 0; i < nRecords; i++) {
             int32_t binX = readInt32FromFile(bufferin);
             int32_t binY = readInt32FromFile(bufferin);
             float counts = readFloatFromFile(bufferin);
             appendRecord(v, i, binX, binY, counts);
         }
     } else {
         int32_t binXOffset = readInt32FromFile(bufferin);
         int32_t binYOffset = readInt32FromFile(bufferin);
         bool useShort = readCharFromFile(bufferin) == 0; // yes this is opposite of usual
 
         bool useShortBinX = true;
         bool useShortBinY = true;
         if (version > 8) {
             useShortBinX = readCharFromFile(bufferin) == 0;
             useShortBinY = readCharFromFile(bufferin) == 0;
         }
 
         char type = readCharFromFile(bufferin);
         int32_t index = 0;
         if (type == 1) {
             if (useShortBinX && useShortBinY) {
                 int16_t rowCount = readInt16FromFile(bufferin);
                 for (int i = 0; i < rowCount; i++) {
                     int32_t binY = binYOffset + readInt16FromFile(bufferin);
                     int16_t colCount = readInt16FromFile(bufferin);
                     for (int j = 0; j < colCount; j++) {
                         int32_t binX = binXOffset + readInt16FromFile(bufferin);
                         float counts;
                         if (useShort) {
                             counts = readInt16FromFile(bufferin);
                         } else {
                             counts = readFloatFromFile(bufferin);
                         }
                         appendRecord(v, index++, binX, binY, counts);
                     }
                 }
             } else if (useShortBinX && !useShortBinY) {
                 int32_t rowCount = readInt32FromFile(bufferin);
                 for (int i = 0; i < rowCount; i++) {
                     int32_t binY = binYOffset + readInt32FromFile(bufferin);
                     int16_t colCount = readInt16FromFile(bufferin);
                     for (int j = 0; j < colCount; j++) {
                         int32_t binX = binXOffset + readInt16FromFile(bufferin);
                         float counts;
                         if (useShort) {
                             counts = readInt16FromFile(bufferin);
                         } else {
                             counts = readFloatFromFile(bufferin);
                         }
                         appendRecord(v, index++, binX, binY, counts);
                     }
                 }
             } else if (!useShortBinX && useShortBinY) {
                 int16_t rowCount = readInt16FromFile(bufferin);
                 for (int i = 0; i < rowCount; i++) {
                     int32_t binY = binYOffset + readInt16FromFile(bufferin);
                     int32_t colCount = readInt32FromFile(bufferin);
                     for (int j = 0; j < colCount; j++) {
                         int32_t binX = binXOffset + readInt32FromFile(bufferin);
                         float counts;
                         if (useShort) {
                             counts = readInt16FromFile(bufferin);
                         } else {
                             counts = readFloatFromFile(bufferin);
                         }
                         appendRecord(v, index++, binX, binY, counts);
                     }
                 }
             } else {
                 int32_t rowCount = readInt32FromFile(bufferin);
                 for (int i = 0; i < rowCount; i++) {
                     int32_t binY = binYOffset + readInt32FromFile(bufferin);
                     int32_t colCount = readInt32FromFile(bufferin);
                     for (int j = 0; j < colCount; j++) {
                         int32_t binX = binXOffset + readInt32FromFile(bufferin);
                         float counts;
                         if (useShort) {
                             counts = readInt16FromFile(bufferin);
                         } else {
                             counts = readFloatFromFile(bufferin);
                         }
                         appendRecord(v, index++, binX, binY, counts);
                     }
                 }
             }
         } else if (type == 2) {
             int32_t nPts = readInt32FromFile(bufferin);
             int16_t w = readInt16FromFile(bufferin);
 
             for (int i = 0; i < nPts; i++) {
                 //int32_t idx = (p.y - binOffset2) * w + (p.x - binOffset1);
                 int32_t row = i / w;
                 int32_t col = i - row * w;
                 int32_t bin1 = binXOffset + col;
                 int32_t bin2 = binYOffset + row;
 
                 float counts;
                 if (useShort) {
                     int16_t c = readInt16FromFile(bufferin);
                     if (c != -32768) {
                         appendRecord(v, index++, bin1, bin2, c);
                     }
                 } else {
                     counts = readFloatFromFile(bufferin);
                     if (!isnan(counts)) {
                         appendRecord(v, index++, bin1, bin2, counts);
                     }
                 }
             }
         }
     }
     delete[] compressedBytes;
     delete[] uncompressedBytes; // don't forget to delete your heap arrays in C++!
     return v;
 }
 
 // reads the normalization vector from the file at the specified location
 vector<double> readNormalizationVector(istream &bufferin, int32_t version) {
     int64_t nValues;
     if (version > 8) {
         nValues = readInt64FromFile(bufferin);
     } else {
         nValues = (int64_t) readInt32FromFile(bufferin);
     }
 
     uint64_t numValues;
     numValues = static_cast<uint64_t>(nValues);
     vector<double> values(numValues);
 
     if (version > 8) {
         for (int i = 0; i < nValues; i++) {
             values[i] = (double) readFloatFromFile(bufferin);
         }
     } else {
         for (int i = 0; i < nValues; i++) {
             values[i] = readDoubleFromFile(bufferin);
         }
     }
 
     return values;
 }
 
 class MatrixZoomData {
 public:
     bool isIntra;
     string fileName;
     int64_t myFilePos = 0LL;
     vector<double> expectedValues;
     bool foundFooter = false;
     vector<double> c1Norm;
     vector<double> c2Norm;
     int32_t c1 = 0;
     int32_t c2 = 0;
     string matrixType;
     string norm;
     int32_t version = 0;
     int32_t resolution = 0;
     int32_t numBins1 = 0;
     int32_t numBins2 = 0;
     float sumCounts;
     int32_t blockBinCount, blockColumnCount;
     map<int32_t, indexEntry> blockMap;
     double avgCount;
 
     MatrixZoomData(const chromosome &chrom1, const chromosome &chrom2, const string &matrixType,
                    const string &norm, const string &unit, int32_t resolution,
                    int32_t &version, int64_t &master, int64_t &totalFileSize,
                    const string &fileName) {
         this->version = version;
         this->fileName = fileName;
         int32_t c01 = chrom1.index;
         int32_t c02 = chrom2.index;
         if (c01 <= c02) { // default is ok
             this->c1 = c01;
             this->c2 = c02;
             this->numBins1 = static_cast<int32_t>(chrom1.length / resolution);
             this->numBins2 = static_cast<int32_t>(chrom2.length / resolution);
         } else { // flip
             this->c1 = c02;
             this->c2 = c01;
             this->numBins1 = static_cast<int32_t>(chrom2.length / resolution);
             this->numBins2 = static_cast<int32_t>(chrom1.length / resolution);
         }
         isIntra = c1 == c2;
 
         this->matrixType = matrixType;
         this->norm = norm;
         this->resolution = resolution;
 
         HiCFileStream *stream = new HiCFileStream(fileName);
         indexEntry c1NormEntry{}, c2NormEntry{};
 
         if (stream->isHttp) {
             foundFooter = readFooterURL(stream->curl, master, version, c1, c2, matrixType, norm, unit,
                                      resolution,
                                      myFilePos,
                                      c1NormEntry, c2NormEntry, expectedValues);
         } else {
             stream->fin.seekg(master, ios::beg);
             foundFooter = readFooter(stream->fin, master, version, c1, c2, matrixType, norm,
                                      unit,
                                      resolution, myFilePos,
                                      c1NormEntry, c2NormEntry, expectedValues);
         }
 
         if (!foundFooter) {
             return;
         }
         stream->close();
 
         if (norm != "NONE") {
             c1Norm = readNormalizationVectorFromFooter(c1NormEntry, version, fileName);
             if (isIntra) {
                 c2Norm = c1Norm;
             } else {
                 c2Norm = readNormalizationVectorFromFooter(c2NormEntry, version, fileName);
             }
         }
 
         HiCFileStream *stream2 = new HiCFileStream((fileName));
         if (stream2->isHttp) {
             // readMatrix will assign blockBinCount and blockColumnCount
             blockMap = readMatrixHttp(stream2->curl, myFilePos, unit, resolution, sumCounts,
                                       blockBinCount,
                                       blockColumnCount);
         } else {
             // readMatrix will assign blockBinCount and blockColumnCount
             blockMap = readMatrix(stream2->fin, myFilePos, unit, resolution, sumCounts,
                                   blockBinCount,
                                   blockColumnCount);
         }
         stream2->close();
 
         if (!isIntra) {
             avgCount = (sumCounts / numBins1) / numBins2;   // <= trying to avoid overflows
         }
     }
 
     static vector<double> readNormalizationVectorFromFooter(indexEntry cNormEntry, int32_t &version,
                                                             const string &fileName) {
         if (cNormEntry.size == 0) {
             // no normalization entries in this file.  Bow out now and avoid an allocation error
             throw strawException("Trying to retrieve other normalization options, but none exist");
         }
         char *buffer = readCompressedBytesFromFile(fileName, cNormEntry);
         memstream bufferin(buffer, cNormEntry.size);
         vector<double> cNorm = readNormalizationVector(bufferin, version);
         delete buffer;
         return cNorm;
     }
 
     static bool isInRange(int32_t r, int32_t c, int32_t numRows, int32_t numCols) {
         return 0 <= r && r < numRows && 0 <= c && c < numCols;
     }
 
     set<int32_t> getBlockNumbers(int64_t *regionIndices) const {
         if (version > 8 && isIntra) {
             return getBlockNumbersForRegionFromBinPositionV9Intra(regionIndices, blockBinCount, blockColumnCount);
         } else {
             return getBlockNumbersForRegionFromBinPosition(regionIndices, blockBinCount, blockColumnCount, isIntra);
         }
     }
 
     vector<double> getNormVector(int32_t index) {
         if (index == c1) {
             return c1Norm;
         } else if (index == c2) {
             return c2Norm;
         }
         cerr << "Invalid index provided: " << index << endl;
         cerr << "Should be either " << c1 << " or " << c2 << endl;
         vector<double> v;
         return v;
     }
 
     vector<double> getExpectedValues() {
         return expectedValues;
     }
 
     vector<contactRecord> getRecords(int64_t gx0, int64_t gx1, int64_t gy0, int64_t gy1) {
         if (!foundFooter) {
             vector<contactRecord> v;
             return v;
         }
         int64_t origRegionIndices[] = {gx0, gx1, gy0, gy1};
         int64_t regionIndices[4];
         convertGenomeToBinPos(origRegionIndices, regionIndices, resolution);
 
         set<int32_t> blockNumbers = getBlockNumbers(regionIndices);
         vector<contactRecord> records;
         for (int32_t blockNumber : blockNumbers) {
             // get contacts in this block
             //cout << *it << " -- " << blockMap.size() << endl;
             //cout << blockMap[*it].size << " " <<  blockMap[*it].position << endl;
             vector<contactRecord> tmp_records = readBlock(fileName, blockMap[blockNumber], version);
             for (contactRecord rec : tmp_records) {
                 int64_t x = rec.binX * resolution;
                 int64_t y = rec.binY * resolution;
 
                 if ((x >= origRegionIndices[0] && x <= origRegionIndices[1] &&
                      y >= origRegionIndices[2] && y <= origRegionIndices[3]) ||
                     // or check regions that overlap with lower left
                     (isIntra && y >= origRegionIndices[0] && y <= origRegionIndices[1] && x >= origRegionIndices[2] &&
                      x <= origRegionIndices[3])) {
 
                     float c = rec.counts;
                     if (norm != "NONE") {
                         c = static_cast<float>(c / (c1Norm[rec.binX] * c2Norm[rec.binY]));
                     }
                     if (matrixType == "oe") {
                         if (isIntra) {
                             c = static_cast<float>(c / expectedValues[min(expectedValues.size() - 1,
                                                                           (size_t) floor(abs(y - x) /
                                                                                          resolution))]);
                         } else {
                             c = static_cast<float>(c / avgCount);
                         }
                     } else if (matrixType == "expected") {
                         if (isIntra) {
                             c = static_cast<float>(expectedValues[min(expectedValues.size() - 1,
                                                                       (size_t) floor(abs(y - x) /
                                                                                      resolution))]);
                         } else {
                             c = static_cast<float>(avgCount);
                         }
                     }
 
                     if (!isnan(c) && !isinf(c)){
                         contactRecord record = contactRecord();
                         record.binX = static_cast<int32_t>(x);
                         record.binY = static_cast<int32_t>(y);
                         record.counts = c;
                         records.push_back(record);
                     }
                 }
             }
         }
         return records;
     }
 
     vector<vector<float> > getRecordsAsMatrix(int64_t gx0, int64_t gx1, int64_t gy0, int64_t gy1) {
         vector<contactRecord> records = this->getRecords(gx0, gx1, gy0, gy1);
         if (records.empty()) {
             vector<vector<float> > res = vector<vector<float> >(1, vector<float>(1, 0));
             return res;
         }
 
         int64_t origRegionIndices[] = {gx0, gx1, gy0, gy1};
         int64_t regionIndices[4];
         convertGenomeToBinPos(origRegionIndices, regionIndices, resolution);
 
         int64_t originR = regionIndices[0];
         int64_t endR = regionIndices[1];
         int64_t originC = regionIndices[2];
         int64_t endC = regionIndices[3];
         int32_t numRows = endR - originR + 1;
         int32_t numCols = endC - originC + 1;
         vector<vector<float> > matrix;
         for (int32_t i = 0; i < numRows; i++) {
             matrix.emplace_back(numCols, 0);
         }
 
         for (contactRecord cr : records) {
             if (isnan(cr.counts) || isinf(cr.counts)) continue;
             int32_t r = cr.binX / resolution - originR;
             int32_t c = cr.binY / resolution - originC;
             if (isInRange(r, c, numRows, numCols)) {
                 matrix[r][c] = cr.counts;
             }
             if (isIntra) {
                 r = cr.binY / resolution - originR;
                 c = cr.binX / resolution - originC;
                 if (isInRange(r, c, numRows, numCols)) {
                     matrix[r][c] = cr.counts;
                 }
             }
         }
         return matrix;
     }
 
     int64_t getNumberOfTotalRecords() {
         if (!foundFooter) {
             return 0;
         }
         int64_t regionIndices[4] = {0, numBins1, 0, numBins2};
         set<int32_t> blockNumbers = getBlockNumbers(regionIndices);
         int64_t total = 0;
         for (int32_t blockNumber : blockNumbers) {
             total += getNumRecordsInBlock(fileName, blockMap[blockNumber], version);
         }
         return total;
     }
 };
 
 class HiCFile {
 public:
     string prefix = "http"; // HTTP code
     int64_t master = 0LL;
     map<string, chromosome> chromosomeMap;
     string genomeID;
     int32_t numChromosomes = 0;
     int32_t version = 0;
     int64_t nviPosition = 0LL;
     int64_t nviLength = 0LL;
     vector<int32_t> resolutions;
     static int64_t totalFileSize;
     string fileName;
     vector<string> attributes;
 
     static size_t hdf(char *b, size_t size, size_t nitems, void *userdata) {
         size_t numbytes = size * nitems;
         b[numbytes + 1] = '\0';
         string s(b);
         int32_t found;
         found = static_cast<int32_t>(s.find("content-range"));
         if ((size_t) found == string::npos) {
             found = static_cast<int32_t>(s.find("Content-Range"));
         }
         if ((size_t) found != string::npos) {
             int32_t found2;
             found2 = static_cast<int32_t>(s.find('/'));
             //content-range: bytes 0-100000/891471462
             if ((size_t) found2 != string::npos) {
                 string total = s.substr(found2 + 1);
                 totalFileSize = stol(total);
             }
         }
 
         return numbytes;
     }
 
     static CURL *oneTimeInitCURL(const char *url) {
         CURL *curl = initCURL(url);
         curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, hdf);
         curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1);
         return curl;
     }
 
     explicit HiCFile(const string &fileName) {
         this->fileName = fileName;
 
         // read header into buffer; 100K should be sufficient
         if (std::strncmp(fileName.c_str(), prefix.c_str(), prefix.size()) == 0) {
             CURL *curl;
             curl = oneTimeInitCURL(fileName.c_str());
             char *buffer = getData(curl, 0, 100000);
             memstream bufin(buffer, 100000);
             chromosomeMap = readHeader(bufin, master, genomeID, numChromosomes,
                                        version, nviPosition, nviLength, attributes);
             resolutions = readResolutionsFromHeader(bufin);
             curl_easy_cleanup(curl);
             delete buffer;
         } else {
             ifstream fin;
             fin.open(fileName, fstream::in | fstream::binary);
             if (!fin) {
                 throw strawException("File " + fileName + " cannot be opened for reading");
                 //cerr << "File " << fileName << " cannot be opened for reading" << endl;
                 //exit(6);
             }
             chromosomeMap = readHeader(fin, master, genomeID, numChromosomes,
                                        version, nviPosition, nviLength, attributes);
             resolutions = readResolutionsFromHeader(fin);
             fin.close();
         }
     }
 
     string getGenomeID() const {
         return genomeID;
     }
 
     vector<int32_t> getResolutions() const {
         return resolutions;
     }
 
     vector<chromosome> getChromosomes() {
         chromosome chromosomes[chromosomeMap.size()];
         map<string, chromosome>::iterator iter = chromosomeMap.begin();
         while (iter != chromosomeMap.end()) {
             chromosome chrom = static_cast<chromosome>(iter->second);
             chromosomes[chrom.index] = chrom;
             iter++;
         }
 
         vector<chromosome> final_chromosomes;
         final_chromosomes.reserve(chromosomeMap.size());
         for(int32_t i = 0; i < chromosomeMap.size(); i++){
             final_chromosomes.push_back(chromosomes[i]);
         }
         return final_chromosomes;
     }
 
     MatrixZoomData * getMatrixZoomData(const string &chr1, const string &chr2, const string &matrixType,
                                        const string &norm, const string &unit, int32_t resolution) {
         chromosome chrom1 = chromosomeMap[chr1];
         chromosome chrom2 = chromosomeMap[chr2];
         return new MatrixZoomData(chrom1, chrom2, (matrixType), (norm), (unit),
                                   resolution, version, master, totalFileSize, fileName);
     }
 };
 
 int64_t HiCFile::totalFileSize = 0LL;
 
 void parsePositions(const string &chrLoc, string &chrom, int64_t &pos1, int64_t &pos2, map<string, chromosome> map) {
     string x, y;
     stringstream ss(chrLoc);
     getline(ss, chrom, ':');
     if (map.count(chrom) == 0) {
         throw strawException("chromosome " + chrom + " not found in the file.");
         //cerr << "chromosome " << chrom << " not found in the file." << endl;
         //exit(7);
     }
 
     if (getline(ss, x, ':') && getline(ss, y, ':')) {
         pos1 = stol(x);
         pos2 = stol(y);
     } else {
         pos1 = 0LL;
         pos2 = map[chrom].length;
     }
 }
 
 vector<contactRecord> straw(const string &matrixType, const string &norm, const string &fileName, const string &chr1loc,
                             const string &chr2loc, const string &unit, int32_t binsize) {
     if (!(unit == "BP" || unit == "FRAG")) {
         cerr << "Norm specified incorrectly, must be one of <BP/FRAG>" << endl;
         cerr << "Usage: straw [observed/oe/expected] <NONE/VC/VC_SQRT/KR> <hicFile(s)> <chr1>[:x1:x2] <chr2>[:y1:y2] <BP/FRAG> <binsize>"
              << endl;
         vector<contactRecord> v;
         return v;
     }
 
     HiCFile *hiCFile = new HiCFile(fileName);
     string chr1, chr2;
     int64_t origRegionIndices[4] = {-100LL, -100LL, -100LL, -100LL};
     parsePositions((chr1loc), chr1, origRegionIndices[0], origRegionIndices[1], hiCFile->chromosomeMap);
     parsePositions((chr2loc), chr2, origRegionIndices[2], origRegionIndices[3], hiCFile->chromosomeMap);
 
     if (hiCFile->chromosomeMap[chr1].index > hiCFile->chromosomeMap[chr2].index) {
         MatrixZoomData *mzd = hiCFile->getMatrixZoomData(chr2, chr1, matrixType, norm, unit, binsize);
         return mzd->getRecords(origRegionIndices[2], origRegionIndices[3], origRegionIndices[0], origRegionIndices[1]);
     } else {
         MatrixZoomData *mzd = hiCFile->getMatrixZoomData(chr1, chr2, matrixType, norm, unit, binsize);
         return mzd->getRecords(origRegionIndices[0], origRegionIndices[1], origRegionIndices[2], origRegionIndices[3]);
     }
 }
 
 vector<vector<float> > strawAsMatrix(const string &matrixType, const string &norm, const string &fileName, const string &chr1loc,
                    const string &chr2loc, const string &unit, int32_t binsize) {
     if (!(unit == "BP" || unit == "FRAG")) {
         cerr << "Norm specified incorrectly, must be one of <BP/FRAG>" << endl;
         cerr << "Usage: straw [observed/oe/expected] <NONE/VC/VC_SQRT/KR> <hicFile(s)> <chr1>[:x1:x2] <chr2>[:y1:y2] <BP/FRAG> <binsize>"
              << endl;
         vector<vector<float> > res = vector<vector<float> >(1, vector<float>(1, 0));
         return res;
     }
 
     HiCFile *hiCFile = new HiCFile(fileName);
     string chr1, chr2;
     int64_t origRegionIndices[4] = {-100LL, -100LL, -100LL, -100LL};
     parsePositions((chr1loc), chr1, origRegionIndices[0], origRegionIndices[1], hiCFile->chromosomeMap);
     parsePositions((chr2loc), chr2, origRegionIndices[2], origRegionIndices[3], hiCFile->chromosomeMap);
 
     if (hiCFile->chromosomeMap[chr1].index > hiCFile->chromosomeMap[chr2].index) {
         MatrixZoomData *mzd = hiCFile->getMatrixZoomData(chr2, chr1, matrixType, norm, unit, binsize);
         return mzd->getRecordsAsMatrix(origRegionIndices[2], origRegionIndices[3], origRegionIndices[0], origRegionIndices[1]);
     } else {
         MatrixZoomData *mzd = hiCFile->getMatrixZoomData(chr1, chr2, matrixType, norm, unit, binsize);
         return mzd->getRecordsAsMatrix(origRegionIndices[0], origRegionIndices[1], origRegionIndices[2], origRegionIndices[3]);
     }
 }
 
 int64_t getNumRecordsForFile(const string &fileName, int32_t binsize, bool interOnly) {
     HiCFile *hiCFile = new HiCFile(fileName);
     int64_t totalNumRecords = 0;
 
     int32_t indexOffset = 0;
     if (interOnly){
         indexOffset = 1;
     }
 
     vector<chromosome> chromosomes = hiCFile->getChromosomes();
     for(int32_t i = 0; i < chromosomes.size(); i++){
         if(chromosomes[i].index <= 0) continue;
         for(int32_t j = i + indexOffset; j < chromosomes.size(); j++){
             if(chromosomes[j].index <= 0) continue;
             MatrixZoomData *mzd;
             if(chromosomes[i].index > chromosomes[j].index){
                 mzd = hiCFile->getMatrixZoomData(chromosomes[j].name, chromosomes[i].name, "observed", "NONE", "BP", binsize);
             } else {
                 mzd = hiCFile->getMatrixZoomData(chromosomes[i].name, chromosomes[j].name, "observed", "NONE", "BP", binsize);
             }
             totalNumRecords += mzd->getNumberOfTotalRecords();
         }
     }
 
     return totalNumRecords;
 }
 
 int64_t getNumRecordsForChromosomes(const string &fileName, int32_t binsize, bool interOnly) {
     HiCFile *hiCFile = new HiCFile(fileName);
     vector<chromosome> chromosomes = hiCFile->getChromosomes();
     for(int32_t i = 0; i < chromosomes.size(); i++){
         if(chromosomes[i].index <= 0) continue;
         MatrixZoomData *mzd = hiCFile->getMatrixZoomData(chromosomes[i].name, chromosomes[i].name, "observed", "NONE", "BP", binsize);
         int64_t totalNumRecords = mzd->getNumberOfTotalRecords();
         cout << chromosomes[i].name << " " << totalNumRecords << " ";
         cout << totalNumRecords*12/1000/1000/1000 << " GB" << endl;
     }
     return 0;
 }
 
 void getHeaderFields(const string &filename, string &genome, vector<string> &chromNames, vector<int> &chromSizes,
         vector<int> &bpResolutions, vector<int> &fragResolutions, vector<string> &attributes)
 /* Fill in the provided fields with information from the header of the hic file in the supplied filename.
  * fragResolutions is left empty for now, as we're not making use of it. */
 {
     HiCFile *hiCFile = new HiCFile(filename);
 
     genome.assign(hiCFile->getGenomeID());
 
     vector<chromosome> chromList = hiCFile->getChromosomes();
     vector<string> myChromNames;
     myChromNames.reserve(chromList.size());
     vector<int> myChromSizes;
     myChromSizes.reserve(chromList.size());
     for(int32_t i = 0; i < chromList.size(); i++){
         myChromNames.push_back(chromList[i].name);
         myChromSizes.push_back(chromList[i].length);
     }
     chromNames = myChromNames;
     chromSizes = myChromSizes;
 
     vector<int> myBpResolutions;
     myBpResolutions.reserve(hiCFile->resolutions.size());
     for(int32_t i = 0; i < hiCFile->resolutions.size(); i++){
         myBpResolutions.push_back(hiCFile->resolutions[i]);
     }
     bpResolutions = myBpResolutions;
 
     attributes = hiCFile->attributes;
 
     // Ignoring fragment resolutions for now, since they're never being used here
 }
 
 set<string> getNormOptions()
 /* Return the set of normalization options that have been encountered through footer parsing.
  * The result will be empty unless at least one straw() request has been made.
  */
 {
     return globalNormOptions;
 }