b2acdf83569bcb30013ed09d885d3f48b19c1d4e
jcasper
  Wed Sep 11 16:03:26 2019 -0700
Better support for hic composite tracks, and hic trackUi pages now include
metadata from the file, refs #22316

diff --git src/hg/lib/straw/straw.cpp src/hg/lib/straw/straw.cpp
index 9054889..e9602a9 100644
--- src/hg/lib/straw/straw.cpp
+++ src/hg/lib/straw/straw.cpp
@@ -790,112 +790,113 @@
 
       if ((x >= origRegionIndices[0] && x <= origRegionIndices[1] &&
 	   y >= origRegionIndices[2] && y <= origRegionIndices[3]) ||
 	  // or check regions that overlap with lower left
 	  ((c1==c2) && y >= origRegionIndices[0] && y <= origRegionIndices[1] && x >= origRegionIndices[2] && x <= origRegionIndices[3])) {
 	xActual.push_back(x);
 	yActual.push_back(y);
 	counts.push_back(c);
 	//printf("%d\t%d\t%.14g\n", x, y, c);
       }
     }
   }
 }
 
 
-void parseHeaderFields(istream& fin, string &genome, vector<string> &chromNames, vector<int> &bpResolutions, vector<int> &fragResolutions)
+void parseHeaderFields(istream& fin, string &genome, vector<string> &chromNames, vector<int> &chromSizes, vector<int> &bpResolutions, vector<int> &fragResolutions, vector<string> &attributes)
 /* Parse out header fields from the supplied data buffer, assumed to come from the beginning of
  * a .hic file. */
 {
   if (!readMagicString(fin)) {
     throw strawException("Hi-C magic string is missing, does not appear to be a hic file");
   }
 
   fin.read((char*)&version, sizeof(int));
   if (version < 6) {
     throw strawException("Version " + to_string(version) + " no longer supported");
   }
   long master;
   fin.read((char*)&master, sizeof(long));
   getline(fin, genome, '\0' );
   int nattributes;
   fin.read((char*)&nattributes, sizeof(int));
 
-  // reading and ignoring attribute-value dictionary
-  // Should expand this to save these to another structure
+  // read in attribute-value dictionary
   for (int i=0; i<nattributes; i++) {
     string key, value;
     getline(fin, key, '\0');
     getline(fin, value, '\0');
+    attributes.insert(attributes.end(), key);
+    attributes.insert(attributes.end(), value);
   }
   int nChrs;
   fin.read((char*)&nChrs, sizeof(int));
   // chromosome map for finding matrix
   for (int i=0; i<nChrs; i++) {
     string name;
     getline(fin, name, '\0');
     chromNames.insert(chromNames.end(), name);
-    // ignoring chromosome sizes
     int length;
     fin.read((char*)&length, sizeof(int));
+    chromSizes.insert(chromSizes.end(), length);
   }
 
   int nBpResolutions;
   fin.read((char*)&nBpResolutions, sizeof(int));
   for (int i=0; i<nBpResolutions; i++) {
     int res;
     fin.read((char*)&res, sizeof(int));
     bpResolutions.insert(bpResolutions.end(), res);
   }
 
   int nFragResolutions;
   fin.read((char*)&nFragResolutions, sizeof(int));
   for (int i=0; i<nFragResolutions; i++) {
     int res;
     fin.read((char*)&res, sizeof(int));
     fragResolutions.insert(fragResolutions.end(), res);
   }
 }
 
 
-void getHeaderFields(string fname, string &genome, vector<string> &chromNames, vector<int> &bpResolutions, vector<int> &fragResolutions)
+void getHeaderFields(string fname, string &genome, vector<string> &chromNames, vector<int> &chromSizes, vector<int> &bpResolutions, vector<int> &fragResolutions, vector<string> &attributes)
 /* Retrieve .hic header fields from the supplied filename and return them in the supplied variables. */
 {
   // HTTP code
   string prefix="http";
   bool isHttp = false;
 
   // read header into buffer; 100K should be sufficient
   char * buffer = NULL;
   char *url = NULL;
 
   if (std::strncmp(fname.c_str(), prefix.c_str(), prefix.size()) == 0) {
     isHttp = true;
     url = (char*) fname.c_str();
     buffer = getHttpData(url, 0, 100000);
     membuf sbuf(buffer, buffer + 100000);
     istream bufin(&sbuf);  
-    parseHeaderFields(bufin, genome, chromNames, bpResolutions, fragResolutions);
+    parseHeaderFields(bufin, genome, chromNames, chromSizes, bpResolutions, fragResolutions, attributes);
     delete buffer;
   }
   else {
     fstream fin;
     fin.open(fname, fstream::in);
     if (!fin) {
       throw strawException("File " + fname + " cannot be opened for reading");
     }
-    parseHeaderFields(fin, genome, chromNames, bpResolutions, fragResolutions);
+    parseHeaderFields(fin, genome, chromNames, chromSizes, bpResolutions, fragResolutions, attributes);
     fin.close();
   }
 }
 
 
 
 extern "C" char *Cstraw (char *norm, char *fname, int binsize, char *chr1loc, char *chr2loc, char *unit, int **xActual, int **yActual, double **counts, int *numRecords)
 /* Wrapper function to retrieve a data chunk from a .hic file, for use by C libraries.
  * norm is one of NONE/VC/VC_SQRT/KR.
  * binsize is one of the supported bin sizes as determined by CstrawHeader.
  * chr1loc and chr2loc are the two positions to retrieve interaction data for, specified as chr:start-end.
  * unit is one of BP/FRAG.
  * Values are returned in newly allocated arrays in xActual, yActual, and counts, with the number of
  * returned records in numRecords.
  * The function returns NULL unless an error was encountered, in which case the return value points
@@ -918,80 +919,103 @@
     }
     *numRecords = thisx.size();
     *xActual = (int*) calloc((size_t) *numRecords, sizeof(int));
     *yActual = (int*) calloc((size_t) *numRecords, sizeof(int));
     *counts = (double*) calloc((size_t) *numRecords, sizeof(double));
     for (int i=0; i<*numRecords; i++)
         {
         (*xActual)[i] = thisx[i];
         (*yActual)[i] = thisy[i];
         (*counts)[i] = (double) thiscounts[i];
         }
     return NULL;
 }
 
 
-extern "C" char *CstrawHeader (char *filename, char **genome, char ***chromNames, int *nChroms, char ***bpResolutions, int *nBpRes, char ***fragResolutions, int *nFragRes)
+extern "C" char *CstrawHeader (char *filename, char **genome, char ***chromNames, int **chromSizes, int *nChroms, char ***bpResolutions, int *nBpRes, char ***fragResolutions, int *nFragRes, char ***attributes, int *nAttributes)
 /* Wrapper function to retrieve header fields from a .hic file, for use by C libraries.
  * This retrieves the assembly name, list of chromosome names, list of available binsize resolutions,
  * and list of available fragment resolutions in the specific .hic file.
  * The function returns NULL unless an error was encountered, in which case the return value points
  * to a character string explaining the error. */
 {
   string filenameString(filename);
   string genomeString;
   vector<string> chromNameVector;
+  vector<int> chromSizeVector;
   vector<int> bpResolutionVector;
   vector<int> fragResolutionVector;
+  vector<string> attributeVector;
   try {
-    getHeaderFields(filenameString, genomeString, chromNameVector, bpResolutionVector, fragResolutionVector);
+    getHeaderFields(filenameString, genomeString, chromNameVector, chromSizeVector, bpResolutionVector, fragResolutionVector, attributeVector);
   } catch (strawException& err) {
     char *errMsg = (char*) calloc((size_t) strlen(err.what())+1, sizeof(char));
     strcpy(errMsg, err.what());
     return errMsg;
   }
   if (genome != NULL)
   {
     *genome = (char*) malloc((genomeString.length()+1)*sizeof(char));
     strcpy(*genome, genomeString.c_str());
   }
   if (nChroms != NULL)
   {
     *nChroms = chromNameVector.size();
   }
   if (chromNames != NULL)
   {
     *chromNames = (char**) calloc((size_t) chromNameVector.size(), sizeof(char*));
     for (int i=0; i<chromNameVector.size(); i++)
     {
       (*chromNames)[i] = (char*) malloc((chromNameVector[i].length()+1)*sizeof(char));
       strcpy((*chromNames)[i], chromNameVector[i].c_str());
     }
   }
+  if (chromSizes != NULL)
+  {
+    *chromSizes = (int*) calloc((size_t) chromSizeVector.size(), sizeof(int));
+    for (int i=0; i<chromSizeVector.size(); i++)
+    {
+      (*chromSizes)[i] = chromSizeVector[i];
+    }
+  }
   if (nBpRes != NULL)
   {
     *nBpRes = bpResolutionVector.size();
   }
   if (bpResolutions != NULL)
   {
     *bpResolutions = (char**) calloc((size_t) bpResolutionVector.size(), sizeof(char*));
     for (int i=0; i<bpResolutionVector.size(); i++)
     {
       (*bpResolutions)[i] = (char*) malloc((to_string(bpResolutionVector[i]).length()+1)*sizeof(char));
       strcpy((*bpResolutions)[i], to_string(bpResolutionVector[i]).c_str());
     }
   }
   if (nFragRes != NULL)
   {
     *nFragRes = fragResolutionVector.size();
   }
   if (fragResolutions != NULL)
   {
     *fragResolutions = (char**) calloc((size_t) fragResolutionVector.size(), sizeof(char*));
     for (int i=0; i<fragResolutionVector.size(); i++)
     {
       (*fragResolutions)[i] = (char*) malloc((to_string(fragResolutionVector[i]).length()+1)*sizeof(char));
       strcpy((*fragResolutions)[i], to_string(fragResolutionVector[i]).c_str());
     }
   }
+  if (nAttributes != NULL)
+  {
+    *nAttributes = attributeVector.size();
+  }
+  if (attributes != NULL)
+  {
+    *attributes = (char**) calloc((size_t) attributeVector.size(), sizeof(char*));
+    for (int i=0; i<attributeVector.size(); i++)
+    {
+      (*attributes)[i] = (char*) malloc((attributeVector[i].length()+1)*sizeof(char));
+      strcpy((*attributes)[i], attributeVector[i].c_str());
+    }
+  }
   return NULL;
 }