ae1ef6aa226d4436d4dc9e2b9b28120d802ab8a5 jcasper Thu Jul 25 11:57:53 2024 -0700 Dropping new straw library files into place, refs #33225 diff --git src/hg/lib/straw/straw.h src/hg/lib/straw/straw.h new file mode 100644 index 0000000..feb1228 --- /dev/null +++ src/hg/lib/straw/straw.h @@ -0,0 +1,132 @@ +/* + The MIT License (MIT) + + Copyright (c) 2011-2016 Broad Institute, Aiden Lab + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ +#ifndef STRAW_H +#define STRAW_H + +#include +#include +#include +#include + +// pointer structure for reading blocks or matrices, holds the size and position +struct indexEntry { + int64_t size; + int64_t position; +}; + +// sparse matrixType entry +struct contactRecord { + int32_t binX; + int32_t binY; + float counts; +}; + +// chromosome +struct chromosome { + std::string name; + int32_t index; + int64_t length; +}; + +// this is for creating a stream from a byte array for ease of use +// see https://stackoverflow.com/questions/41141175/how-to-implement-seekg-seekpos-on-an-in-memory-buffer +struct membuf : std::streambuf { + membuf(char *begin, int32_t l) { + setg(begin, begin, begin + l); + } +}; + +struct memstream : virtual membuf, std::istream { + memstream(char *begin, int32_t l) : + membuf(begin, l), + std::istream(static_cast(this)) { + } + + std::istream::pos_type seekpos(std::istream::pos_type sp, std::ios_base::openmode which) override { + return seekoff(sp - std::istream::pos_type(std::istream::off_type(0)), std::ios_base::beg, which); + } + + std::istream::pos_type seekoff(std::istream::off_type off, + std::ios_base::seekdir dir, + std::ios_base::openmode which = std::ios_base::in) override { + if (dir == std::ios_base::cur) + gbump(off); + else if (dir == std::ios_base::end) + setg(eback(), egptr() + off, egptr()); + else if (dir == std::ios_base::beg) + setg(eback(), eback() + off, egptr()); + return gptr() - eback(); + } +}; + +// for holding data from URL call +struct MemoryStruct { + char *memory; + size_t size; +}; + +std::map +readMatrixZoomData(std::istream &fin, const std::string &myunit, int32_t mybinsize, float &mySumCounts, + int32_t &myBlockBinCount, + int32_t &myBlockColumnCount, bool &found); + +std::map +readMatrix(std::istream &fin, int32_t myFilePosition, std::string unit, int32_t resolution, float &mySumCounts, + int32_t &myBlockBinCount, int32_t &myBlockColumnCount); + +std::vector readNormalizationVector(std::istream &fin, indexEntry entry); + +std::vector +straw(const std::string& matrixType, const std::string& norm, const std::string& fname, const std::string& chr1loc, + const std::string& chr2loc, const std::string &unit, int32_t binsize); + +std::vector> +strawAsMatrix(const std::string &matrixType, const std::string &norm, const std::string &fileName, + const std::string &chr1loc, const std::string &chr2loc, const std::string &unit, int32_t binsize); + +int64_t getNumRecordsForFile(const std::string& filename, int32_t binsize, bool interOnly); + +int64_t getNumRecordsForChromosomes(const std::string& filename, int32_t binsize, bool interOnly); + +/* Added at UCSC */ +void getHeaderFields(const std::string &filename, std::string &genome, std::vector &chromNames, + std::vector &chromSizes, std::vector &bpResolutions, std::vector &fragResolutions, + std::vector &attributes); +/* Fill in the provided fields with information from the header of the hic file in the supplied filename. + * fragResolutions is left empty for now, as we're not making use of it. */ + +class strawException : public std::runtime_error { +/* Simple exception wrapper class */ + public: + strawException(const std::string& error): + std::runtime_error(error) { + } +}; + +std::set getNormOptions(); +/* Return the set of normalization options that have been encountered through footer parsing. + * The result will be empty unless at least one straw() request has been made. + */ + +#endif