src/utils/clusterMatrixToBarchartBed/clusterMatrixToBarchartBed.c f0c88ba99715e71d8868232507ddf166270e1e33

f0c88ba99715e71d8868232507ddf166270e1e33
kent
  Fri Dec 18 18:19:57 2020 -0800
Allowing a gene name to be multiply mapped.

diff --git src/utils/clusterMatrixToBarchartBed/clusterMatrixToBarchartBed.c src/utils/clusterMatrixToBarchartBed/clusterMatrixToBarchartBed.c
index b757bdb..ea28755 100644
--- src/utils/clusterMatrixToBarchartBed/clusterMatrixToBarchartBed.c
+++ src/utils/clusterMatrixToBarchartBed/clusterMatrixToBarchartBed.c
@@ -47,31 +47,31 @@
 while (lineFileNextReal(lf, &line))
     {
     if (colCount == 0)
         {
 	*retColCount = colCount = chopByChar(line, '\t', NULL, 0);
 	verbose(2, "Got %d columns in first real line\n", colCount);
 	colAlloc = colCount + 1;  // +1 so we can detect unexpected input and complain 
 	lmAllocArray(hash->lm, row, colAlloc);
 	}
     int count = chopByChar(line, '\t', row, colAlloc);
     if (count != colCount)
         {
 	errAbort("Expecting %d words, got more than that line %d of %s", 
 	    colCount, lf->lineIx, lf->fileName);
 	}
-    hashAddUnique(hash, row[keyColIx], lmCloneRow(hash->lm, row, colCount) );
+    hashAdd(hash, row[keyColIx], lmCloneRow(hash->lm, row, colCount) );
     }
 lineFileClose(&lf);
 return hash;
 }
 
 void hashSamplesAndClusters(char *tsvFile, 
     struct hash **retSampleHash, struct hash **retClusterHash)
 /* Read two column tsv file into a hash keyed by first column */
 {
 struct hash *sampleHash = hashNew(0);
 struct hash *clusterHash = hashNew(0);
 char *row[2];
 struct lineFile *lf = lineFileOpen(tsvFile, TRUE);
 while (lineFileNextRowTab(lf, row, ArraySize(row)) )
     {
@@ -192,40 +192,45 @@
      * start with # character. */
     int lineLength = 0;
     char *line;
     if (!lineFileNext(lf, &line, &lineLength))
         break;
     char *s = skipLeadingSpaces(line);
     char c = s[0];
     if (c == 0 || c == '#')
         continue;
 
     /* Chop it into tabs */
     int rowSize = chopByChar(line, '\t', matrixRow, colAlloc);
     lineFileExpectWords(lf, colCount, rowSize);
 
     char *geneName = matrixRow[0];
-    char **geneBedVal = hashFindVal(geneHash, geneName);
-    if (geneBedVal == NULL)
+    struct hashEl *onePos = hashLookup(geneHash, geneName);
+    if (onePos == NULL)
 	{
 	warn("Can't find gene %s in %s", geneName, geneBed);
 	++missCount;
 	continue;
 	}
     else
+	{
 	++hitCount;
+	}
 
+    for (; onePos != NULL; onePos = hashLookupNext(onePos))
+        {
+	char **geneBedVal = onePos->val;
 
 	/* Zero out cluster histogram */
 	int i;
 	for (i=0; i<clusterCount; ++i)
 	    {
 	    clusterTotal[i] = 0.0;
 	    clusterElements[i] = 0;
 	    }
 
 	zeroBytes(&clusterTotal, sizeof(clusterTotal));
 	zeroBytes(&clusterElements, sizeof(clusterElements));
 
 	/* Loop through rest of row filling in histogram */
 	for (i=1; i<colCount; ++i)
 	    {
@@ -247,30 +252,33 @@
 	    {
 	    if (i != 0)
 	       fprintf(f, ",");
 	    if (clMean)
 		fprintf(f, "%g",  clusterTotal[i]/clusterElements[i]);
 	    else
 		fprintf(f, "%g", doubleMedian(clusterElements[i], clusterSamples[i]));
 	    }
 	
 	/* Data file offset info */
 	if (clDataOffset)
 	    fprintf(f, "\t%lld\t%lld",  (long long)lineFileTell(lf), (long long)lineLength);
 
 	fprintf(f, "\n");
 	}
+
+
+    }
 verbose(1, "%d genes found, %d missed\n", hitCount, missCount);
 carefulClose(&f);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 5)
     usage();
 clDataOffset = (optionExists("_dataOffset") || optionExists("dataOffset"));
 clMean = optionExists("mean");
 clusterMatrixToBarchartBed(argv[1], argv[2], argv[3], argv[4]);
 return 0;
 }