60079bc1898e003d9f1397176baaee00e431b011
jcasper
  Tue Mar 10 14:36:41 2020 -0700
Changes to support better chr mappings in .hic files depended on a non-NULL assembly name,
which was not previously assumed.  Fixing what I broke, refs #24988

diff --git src/hg/lib/hic.c src/hg/lib/hic.c
index b7ea44f..1552570 100644
--- src/hg/lib/hic.c
+++ src/hg/lib/hic.c
@@ -1,197 +1,199 @@
 /* hic.c contains a few helpful wrapper functions for managing Hi-C data. */
 
 #include "common.h"
 #include "linefile.h"
 #include "dystring.h"
 #include "jksql.h"
 #include "hic.h"
 #include "hdb.h"
 #include "trackHub.h"
 #include "Cstraw.h"
 #include "hash.h"
 #include "chromAlias.h"
 #include "interact.h"
 
 void mangleName(char *ucscName, char mangledUcscName[], int size)
 /* Generate a version of an assembly's chromosome name that matches
  * the mangling performed by the Juicer .hic creation tool (strip any initial
  * "chr" and capitalize the rest). */
 {
     int offset = 0;
     char workingName[size];
     safef(workingName, sizeof(workingName), "%s", ucscName);
     touppers(workingName);
     if (startsWith("CHR", workingName))
         offset = 3;
-    strncpy(mangledUcscName, workingName+offset, size-offset);
+    safencpy(mangledUcscName, size, workingName+offset, strlen(workingName+offset));
 }
 
 
 char *hicLoadHeader(char *filename, struct hicMeta **header, char *ucscAssembly)
 /* Create a hicMeta structure for the supplied Hi-C file.  If
  * the return value is non-NULL, it points to a string containing
  * an error message that explains why the retrieval failed. */
 {
 char *genome;
 char **chromosomes, **bpResolutions, **attributes;
 int *chromSizes, nChroms, nBpRes, nAttributes;
 
 char *errMsg = CstrawHeader(filename, &genome, &chromosomes, &chromSizes, &nChroms, &bpResolutions, &nBpRes, NULL, NULL, &attributes, &nAttributes);
 if (errMsg != NULL)
     return errMsg;
 
 struct hicMeta *newMeta = NULL;
 AllocVar(newMeta);
 newMeta->fileAssembly = genome;
 newMeta->nRes = nBpRes;
 newMeta->resolutions = bpResolutions;
 newMeta->nChroms = nChroms;
 newMeta->chromNames = chromosomes;
 newMeta->chromSizes = chromSizes;
 newMeta->ucscToAlias = NULL;
 newMeta->ucscAssembly = cloneString(ucscAssembly);
 newMeta->filename = cloneString(filename);
 newMeta->attributes = attributes;
 newMeta->nAttributes = nAttributes;
 
 *header = newMeta;
 
-struct slName *ucscNameList = hAllChromNames(newMeta->ucscAssembly), *ucscName = NULL;
+struct slName *ucscNameList = NULL, *ucscName = NULL;
+if (newMeta->ucscAssembly != NULL)
+    ucscNameList = hAllChromNames(newMeta->ucscAssembly);
 struct hash *ucscToChromAlias = NULL;
 if ((newMeta->ucscAssembly != NULL) && !trackHubDatabase(ucscAssembly))
     ucscToChromAlias = chromAliasMakeReverseLookupTable(newMeta->ucscAssembly);
 
 struct slName *hicChromNames = slNameListFromStringArray(chromosomes, nChroms);
 struct hash *hicChromHash = hashSetFromSlNameList(hicChromNames);
 struct hash *ucscToHicName = newHash(0);
 
 // For each UCSC chrom name, try to find a .hic file chromosome to fetch annotation from.
 for (ucscName = ucscNameList; ucscName != NULL; ucscName = ucscName->next)
     {
     char mangledName[2048];
     mangleName(ucscName->name, mangledName, sizeof(mangledName));
     if (hashLookup(hicChromHash, ucscName->name))
         hashAdd(ucscToHicName, ucscName->name, cloneString(ucscName->name));
     else if (hashLookup(hicChromHash, mangledName))
         hashAdd(ucscToHicName, ucscName->name, cloneString(mangledName));
     else if (ucscToChromAlias != NULL)
         {
         // No hits on the primary chromosome name; time to start going through aliases.
         struct hashEl *thisEl = hashLookup(ucscToChromAlias, ucscName->name);
         while (thisEl != NULL)
             {
             struct chromAlias *cA = (struct chromAlias*) thisEl->val;
             if (hashLookup(hicChromHash, cA->alias))
                 {
                 hashAdd(ucscToHicName, ucscName->name, cloneString(cA->alias));
                 break;
                 }
             mangleName(cA->alias, mangledName, sizeof(mangledName));
             if (hashLookup(hicChromHash, mangledName))
                 {
                 hashAdd(ucscToHicName, ucscName->name, cloneString(mangledName));
                 break;
                 }
             thisEl = hashLookupNext(thisEl);
             }
         }
     }
 newMeta->ucscToAlias = ucscToHicName;
 
 if (ucscToChromAlias != NULL)
     hashFreeWithVals(&ucscToChromAlias, chromAliasFree);
 hashFree(&hicChromHash);
 slNameFreeList(&hicChromNames);
 slNameFreeList(&ucscNameList);
 
 return NULL;
 }
 
 
 struct interact *interactFromHic(char *chrom1, int start1, char *chrom2, int start2, int size, double value)
 /* Given some data values from an interaction in a hic file, build a corresponding interact structure. */
 {
 struct interact *new = NULL;
 AllocVar(new);
 
 new->chrom = cloneString(chrom1);
 // start1 is always before start2 on same-chromosome records, so start1 is always the start.
 // On records that link between chromosomes, just use the coordinates for this chromosome.
 new->chromStart = start1;
 if (sameWord(chrom1, chrom2))
     new->chromEnd = start2+size;
 else
     new->chromEnd = start1+size;
 new->name = cloneString("");
 new->score = 0; // ignored
 new->value = value;
 new->exp = cloneString(".");
 new->color = 0;
 new->sourceChrom = cloneString(chrom1);
 new->sourceStart = start1;
 new->sourceEnd = start1+size;
 new->sourceName = cloneString("");
 new->sourceStrand = cloneString(".");
 new->targetChrom = cloneString(chrom2);
 new->targetStart = start2;
 new->targetEnd = start2+size;
 new->targetName = cloneString("");
 new->targetStrand = cloneString(".");
 
 return new;
 }
 
 char *hicLoadData(struct hicMeta *fileInfo, int resolution, char *normalization, char *chrom1, int start1,
          int end1, char *chrom2, int start2, int end2, struct interact **resultPtr)
 /* Fetch heatmap data from a hic file.  The hic file info must be provided in fileInfo, which should be
  * populated by hicLoadHeader.  The result is a linked list of interact structures in *resultPtr,
  * and the return value (if non-NULL) is the text of any error message encountered by the underlying
  * Straw library. */
 {
 int *x, *y, numRecords=0;
 double *counts;
 
 if (!fileInfo)
     errAbort("Attempting to load hic data from a NULL hicMeta pointer");
 
 struct dyString *leftWindowPos = dyStringNew(0);
 struct dyString *rightWindowPos = dyStringNew(0);
 
 char *leftChromName = chrom1;
 char *rightChromName = chrom2;
 if (fileInfo->ucscToAlias != NULL)
     {
     leftChromName = (char*) hashFindVal(fileInfo->ucscToAlias, leftChromName);
     if (leftChromName == NULL)
         leftChromName = chrom1;
     rightChromName = (char*) hashFindVal(fileInfo->ucscToAlias, rightChromName);
     if (rightChromName == NULL)
         rightChromName = chrom2;
     }
 dyStringPrintf(leftWindowPos, "%s:%d:%d", leftChromName, start1, end1);
 dyStringPrintf(rightWindowPos, "%s:%d:%d", rightChromName, start2, end2);
 
 char *networkErrMsg = Cstraw(normalization, fileInfo->filename, resolution, dyStringContents(leftWindowPos),
          dyStringContents(rightWindowPos), "BP", &x, &y, &counts, &numRecords);
 
 int i=0;
 for (i=0; i<numRecords; i++)
     {
     if (isnan(counts[i]))
         {
         // Yes, apparently NAN is possible with normalized values in some methods.  Ignore those.
         continue;
         }
 
     struct interact *new = interactFromHic(chrom1, x[i], chrom2, y[i], resolution, counts[i]);
     slAddHead(resultPtr, new);
 
     if (differentWord(chrom1, chrom2))
         {
         // a second interact structure must be created on the other chromosome
         new = interactFromHic(chrom2, y[i], chrom1, x[i], resolution, counts[i]);
         slAddHead(resultPtr, new);
         }
     }
 return networkErrMsg;
 }