b2acdf83569bcb30013ed09d885d3f48b19c1d4e jcasper Wed Sep 11 16:03:26 2019 -0700 Better support for hic composite tracks, and hic trackUi pages now include metadata from the file, refs #22316 diff --git src/hg/lib/straw/straw.cpp src/hg/lib/straw/straw.cpp index 9054889..e9602a9 100644 --- src/hg/lib/straw/straw.cpp +++ src/hg/lib/straw/straw.cpp @@ -790,112 +790,113 @@ if ((x >= origRegionIndices[0] && x <= origRegionIndices[1] && y >= origRegionIndices[2] && y <= origRegionIndices[3]) || // or check regions that overlap with lower left ((c1==c2) && y >= origRegionIndices[0] && y <= origRegionIndices[1] && x >= origRegionIndices[2] && x <= origRegionIndices[3])) { xActual.push_back(x); yActual.push_back(y); counts.push_back(c); //printf("%d\t%d\t%.14g\n", x, y, c); } } } } -void parseHeaderFields(istream& fin, string &genome, vector<string> &chromNames, vector<int> &bpResolutions, vector<int> &fragResolutions) +void parseHeaderFields(istream& fin, string &genome, vector<string> &chromNames, vector<int> &chromSizes, vector<int> &bpResolutions, vector<int> &fragResolutions, vector<string> &attributes) /* Parse out header fields from the supplied data buffer, assumed to come from the beginning of * a .hic file. */ { if (!readMagicString(fin)) { throw strawException("Hi-C magic string is missing, does not appear to be a hic file"); } fin.read((char*)&version, sizeof(int)); if (version < 6) { throw strawException("Version " + to_string(version) + " no longer supported"); } long master; fin.read((char*)&master, sizeof(long)); getline(fin, genome, '\0' ); int nattributes; fin.read((char*)&nattributes, sizeof(int)); - // reading and ignoring attribute-value dictionary - // Should expand this to save these to another structure + // read in attribute-value dictionary for (int i=0; i<nattributes; i++) { string key, value; getline(fin, key, '\0'); getline(fin, value, '\0'); + attributes.insert(attributes.end(), key); + attributes.insert(attributes.end(), value); } int nChrs; fin.read((char*)&nChrs, sizeof(int)); // chromosome map for finding matrix for (int i=0; i<nChrs; i++) { string name; getline(fin, name, '\0'); chromNames.insert(chromNames.end(), name); - // ignoring chromosome sizes int length; fin.read((char*)&length, sizeof(int)); + chromSizes.insert(chromSizes.end(), length); } int nBpResolutions; fin.read((char*)&nBpResolutions, sizeof(int)); for (int i=0; i<nBpResolutions; i++) { int res; fin.read((char*)&res, sizeof(int)); bpResolutions.insert(bpResolutions.end(), res); } int nFragResolutions; fin.read((char*)&nFragResolutions, sizeof(int)); for (int i=0; i<nFragResolutions; i++) { int res; fin.read((char*)&res, sizeof(int)); fragResolutions.insert(fragResolutions.end(), res); } } -void getHeaderFields(string fname, string &genome, vector<string> &chromNames, vector<int> &bpResolutions, vector<int> &fragResolutions) +void getHeaderFields(string fname, string &genome, vector<string> &chromNames, vector<int> &chromSizes, vector<int> &bpResolutions, vector<int> &fragResolutions, vector<string> &attributes) /* Retrieve .hic header fields from the supplied filename and return them in the supplied variables. */ { // HTTP code string prefix="http"; bool isHttp = false; // read header into buffer; 100K should be sufficient char * buffer = NULL; char *url = NULL; if (std::strncmp(fname.c_str(), prefix.c_str(), prefix.size()) == 0) { isHttp = true; url = (char*) fname.c_str(); buffer = getHttpData(url, 0, 100000); membuf sbuf(buffer, buffer + 100000); istream bufin(&sbuf); - parseHeaderFields(bufin, genome, chromNames, bpResolutions, fragResolutions); + parseHeaderFields(bufin, genome, chromNames, chromSizes, bpResolutions, fragResolutions, attributes); delete buffer; } else { fstream fin; fin.open(fname, fstream::in); if (!fin) { throw strawException("File " + fname + " cannot be opened for reading"); } - parseHeaderFields(fin, genome, chromNames, bpResolutions, fragResolutions); + parseHeaderFields(fin, genome, chromNames, chromSizes, bpResolutions, fragResolutions, attributes); fin.close(); } } extern "C" char *Cstraw (char *norm, char *fname, int binsize, char *chr1loc, char *chr2loc, char *unit, int **xActual, int **yActual, double **counts, int *numRecords) /* Wrapper function to retrieve a data chunk from a .hic file, for use by C libraries. * norm is one of NONE/VC/VC_SQRT/KR. * binsize is one of the supported bin sizes as determined by CstrawHeader. * chr1loc and chr2loc are the two positions to retrieve interaction data for, specified as chr:start-end. * unit is one of BP/FRAG. * Values are returned in newly allocated arrays in xActual, yActual, and counts, with the number of * returned records in numRecords. * The function returns NULL unless an error was encountered, in which case the return value points @@ -918,80 +919,103 @@ } *numRecords = thisx.size(); *xActual = (int*) calloc((size_t) *numRecords, sizeof(int)); *yActual = (int*) calloc((size_t) *numRecords, sizeof(int)); *counts = (double*) calloc((size_t) *numRecords, sizeof(double)); for (int i=0; i<*numRecords; i++) { (*xActual)[i] = thisx[i]; (*yActual)[i] = thisy[i]; (*counts)[i] = (double) thiscounts[i]; } return NULL; } -extern "C" char *CstrawHeader (char *filename, char **genome, char ***chromNames, int *nChroms, char ***bpResolutions, int *nBpRes, char ***fragResolutions, int *nFragRes) +extern "C" char *CstrawHeader (char *filename, char **genome, char ***chromNames, int **chromSizes, int *nChroms, char ***bpResolutions, int *nBpRes, char ***fragResolutions, int *nFragRes, char ***attributes, int *nAttributes) /* Wrapper function to retrieve header fields from a .hic file, for use by C libraries. * This retrieves the assembly name, list of chromosome names, list of available binsize resolutions, * and list of available fragment resolutions in the specific .hic file. * The function returns NULL unless an error was encountered, in which case the return value points * to a character string explaining the error. */ { string filenameString(filename); string genomeString; vector<string> chromNameVector; + vector<int> chromSizeVector; vector<int> bpResolutionVector; vector<int> fragResolutionVector; + vector<string> attributeVector; try { - getHeaderFields(filenameString, genomeString, chromNameVector, bpResolutionVector, fragResolutionVector); + getHeaderFields(filenameString, genomeString, chromNameVector, chromSizeVector, bpResolutionVector, fragResolutionVector, attributeVector); } catch (strawException& err) { char *errMsg = (char*) calloc((size_t) strlen(err.what())+1, sizeof(char)); strcpy(errMsg, err.what()); return errMsg; } if (genome != NULL) { *genome = (char*) malloc((genomeString.length()+1)*sizeof(char)); strcpy(*genome, genomeString.c_str()); } if (nChroms != NULL) { *nChroms = chromNameVector.size(); } if (chromNames != NULL) { *chromNames = (char**) calloc((size_t) chromNameVector.size(), sizeof(char*)); for (int i=0; i<chromNameVector.size(); i++) { (*chromNames)[i] = (char*) malloc((chromNameVector[i].length()+1)*sizeof(char)); strcpy((*chromNames)[i], chromNameVector[i].c_str()); } } + if (chromSizes != NULL) + { + *chromSizes = (int*) calloc((size_t) chromSizeVector.size(), sizeof(int)); + for (int i=0; i<chromSizeVector.size(); i++) + { + (*chromSizes)[i] = chromSizeVector[i]; + } + } if (nBpRes != NULL) { *nBpRes = bpResolutionVector.size(); } if (bpResolutions != NULL) { *bpResolutions = (char**) calloc((size_t) bpResolutionVector.size(), sizeof(char*)); for (int i=0; i<bpResolutionVector.size(); i++) { (*bpResolutions)[i] = (char*) malloc((to_string(bpResolutionVector[i]).length()+1)*sizeof(char)); strcpy((*bpResolutions)[i], to_string(bpResolutionVector[i]).c_str()); } } if (nFragRes != NULL) { *nFragRes = fragResolutionVector.size(); } if (fragResolutions != NULL) { *fragResolutions = (char**) calloc((size_t) fragResolutionVector.size(), sizeof(char*)); for (int i=0; i<fragResolutionVector.size(); i++) { (*fragResolutions)[i] = (char*) malloc((to_string(fragResolutionVector[i]).length()+1)*sizeof(char)); strcpy((*fragResolutions)[i], to_string(fragResolutionVector[i]).c_str()); } } + if (nAttributes != NULL) + { + *nAttributes = attributeVector.size(); + } + if (attributes != NULL) + { + *attributes = (char**) calloc((size_t) attributeVector.size(), sizeof(char*)); + for (int i=0; i<attributeVector.size(); i++) + { + (*attributes)[i] = (char*) malloc((attributeVector[i].length()+1)*sizeof(char)); + strcpy((*attributes)[i], attributeVector[i].c_str()); + } + } return NULL; }