60aca91bcce6d4fa555e6c7c91d8ff8aa9e7bd2b
jcasper
  Fri Jun 11 15:17:21 2021 -0700
Updating hic support for files with large headers (over 100kb) and improving
multi-region performance, refs #18842, #27593

diff --git src/hg/lib/hic.c src/hg/lib/hic.c
index bfbc6d5..d1c03c1 100644
--- src/hg/lib/hic.c
+++ src/hg/lib/hic.c
@@ -1,61 +1,63 @@
 /* hic.c contains a few helpful wrapper functions for managing Hi-C data. */
 
 #include "common.h"
 #include "linefile.h"
 #include "dystring.h"
 #include "jksql.h"
 #include "hic.h"
 #include "hdb.h"
 #include "trackHub.h"
-#include "Cstraw.h"
+#include "cStraw.h"
 #include "hash.h"
 #include "chromAlias.h"
 #include "interact.h"
 
 #ifdef USE_HIC
 
 void mangleName(char *ucscName, char mangledUcscName[], int size)
 /* Generate a version of an assembly's chromosome name that matches
  * the mangling performed by the Juicer .hic creation tool (strip any initial
  * "chr" and capitalize the rest). */
 {
     int offset = 0;
     char workingName[size];
     safef(workingName, sizeof(workingName), "%s", ucscName);
     touppers(workingName);
     if (startsWith("CHR", workingName))
         offset = 3;
     safencpy(mangledUcscName, size, workingName+offset, strlen(workingName+offset));
 }
 
 
 char *hicLoadHeader(char *filename, struct hicMeta **header, char *ucscAssembly)
 /* Create a hicMeta structure for the supplied Hi-C file.  If
  * the return value is non-NULL, it points to a string containing
  * an error message that explains why the retrieval failed. */
 {
 char *genome;
 char **chromosomes, **bpResolutions, **attributes;
 int *chromSizes, nChroms, nBpRes, nAttributes;
 
-char *errMsg = CstrawHeader(filename, &genome, &chromosomes, &chromSizes, &nChroms, &bpResolutions, &nBpRes, NULL, NULL, &attributes, &nAttributes);
+Straw *newStraw = cStrawOpen(filename);
+char *errMsg = cStrawHeader(newStraw, &genome, &chromosomes, &chromSizes, &nChroms, &bpResolutions, &nBpRes, NULL, NULL, &attributes, &nAttributes);
 if (errMsg != NULL)
     return errMsg;
 
 struct hicMeta *newMeta = NULL;
 AllocVar(newMeta);
+newMeta->strawObj = newStraw;
 newMeta->fileAssembly = genome;
 newMeta->nRes = nBpRes;
 newMeta->resolutions = bpResolutions;
 newMeta->nChroms = nChroms;
 newMeta->chromNames = chromosomes;
 newMeta->chromSizes = chromSizes;
 newMeta->ucscToAlias = NULL;
 newMeta->ucscAssembly = cloneString(ucscAssembly);
 newMeta->filename = cloneString(filename);
 newMeta->attributes = attributes;
 newMeta->nAttributes = nAttributes;
 
 *header = newMeta;
 
 struct slName *ucscNameList = NULL, *ucscName = NULL;
@@ -163,31 +165,31 @@
 
 char *leftChromName = chrom1;
 char *rightChromName = chrom2;
 if (fileInfo->ucscToAlias != NULL)
     {
     leftChromName = (char*) hashFindVal(fileInfo->ucscToAlias, leftChromName);
     if (leftChromName == NULL)
         leftChromName = chrom1;
     rightChromName = (char*) hashFindVal(fileInfo->ucscToAlias, rightChromName);
     if (rightChromName == NULL)
         rightChromName = chrom2;
     }
 dyStringPrintf(leftWindowPos, "%s:%d:%d", leftChromName, start1, end1);
 dyStringPrintf(rightWindowPos, "%s:%d:%d", rightChromName, start2, end2);
 
-char *networkErrMsg = Cstraw(normalization, fileInfo->filename, resolution, dyStringContents(leftWindowPos),
+char *networkErrMsg = cStraw(fileInfo->strawObj, normalization, resolution, dyStringContents(leftWindowPos),
          dyStringContents(rightWindowPos), "BP", &x, &y, &counts, &numRecords);
 
 int i=0;
 for (i=0; i<numRecords; i++)
     {
     if (isnan(counts[i]))
         {
         // Yes, apparently NAN is possible with normalized values in some methods.  Ignore those.
         continue;
         }
 
     struct interact *new = interactFromHic(chrom1, x[i], chrom2, y[i], resolution, counts[i]);
     slAddHead(resultPtr, new);
 
     if (differentWord(chrom1, chrom2))