56531279ae19b09424550f6af2c9c8326ecafc29
ceisenhart
  Wed Mar 2 09:44:36 2016 -0800
Cleaning up the code, removing commented out code lines as reccommended by Braney, refs #16216

diff --git src/hg/expMatrixToJson/expMatrixToJson.c src/hg/expMatrixToJson/expMatrixToJson.c
index 19b3938..9058b02 100644
--- src/hg/expMatrixToJson/expMatrixToJson.c
+++ src/hg/expMatrixToJson/expMatrixToJson.c
@@ -43,63 +43,64 @@
     );
 }
 
 /* Command line validation table. */
 static struct optionSpec options[] = {
     {"multiThreads", OPTION_BOOLEAN},
     {"CSV", OPTION_BOOLEAN},
     {"threads", OPTION_INT},
     {"memLim", OPTION_INT},
     {"descFile", OPTION_STRING},
     {"attributeTable", OPTION_STRING},
     {NULL, 0},
     };
 
 struct slDoubleInt
+/* Used to keep track of the top ten genes contributed */
     {
     struct slDoubleInt *next; 
     double val; 
     int index; 
     };
 
 struct bioExpVector
 /* Contains expression information for a biosample on many genes. */
     {
     struct bioExpVector *next;
     char *name;	    // name of biosample.
     char *desc;	    // description of biosample. 
     int count;	    // Number of genes we have data for.
     double *vector;   //  An array allocated dynamically.
     struct rgbColor color;  // Color for this one
     int children;   // Number of bioExpVectors used to build the current 
     struct slDoubleInt *topGeneIndeces; // The indeces for the top 10 genes that drove the clustering up to this point 
     int contGenes; //The number of contributing genes
     };
 
 
 struct slDoubleInt *slDoubleIntNew(double x, int y)
-/* Return a new double. */
+/* Return a new doubleInt */
 {
 struct slDoubleInt *a;
 AllocVar(a);
 a->val = x;
 a->index = y; 
 return a;
 }
 
 int slDoubleIntCmp(const void *va, const void *vb)
-/* Compare two slDoubles. */
+/* Compare two doubleInts */
 {
 const struct slDoubleInt *a = *((struct slDoubleInt **)va);
 const struct slDoubleInt *b = *((struct slDoubleInt **)vb);
 double diff = a->val - b->val;
 if (diff < 0)
     return -1;
 else if (diff > 0)
     return 1;
 else
     return 0;
 }
 
 double stringToDouble(char *s)
 /* Convert string to a double.  Assumes all of string is number
  * and aborts on an error. Errors on 'nan'*/
@@ -281,32 +282,30 @@
     for (i = 0;  i < level;  i++)
 	fputc(' ', f);
     fputs("}", f);
     }
 
 void printHierarchicalJson(FILE *f, struct hacTree *tree, char *geneNamesFile)
 /* Prints out the binary tree into .json format intended for d3
  * hierarchical layouts */
     {
     if (tree == NULL)
 	{
 	fputs("Empty tree.\n", f);
 	return;
 	}
     double distance = 0;
-    
-    
     struct lineFile *lf = lineFileOpen(geneNamesFile, TRUE);
     char *line;
     struct slName *geneNames;
     AllocVar(geneNames);
     while (lineFileNextReal(lf, &line))
 	{
 	struct slName *geneName = newSlName(cloneString(line));
 	slAddTail(&geneNames, geneName);
 	}
     lineFileClose(&lf);
     rPrintHierarchicalJson(f, tree, 0, distance, geneNames);
     fputc('\n', f);
     }
 
 
@@ -315,92 +314,86 @@
  * to create the current node.  Designed for HAC tree use*/
     {
     verbose(3,"Calculating Distance...\n");
     const struct bioExpVector *kid1 = (const struct bioExpVector *)item1;
     const struct bioExpVector *kid2 = (const struct bioExpVector *)item2;
     int j;
     double diff = 0, sum = 0;
     for (j = 0; j < kid1->count; ++j)
 	{
 	diff = kid1->vector[j] - kid2->vector[j]; 
 	sum += (diff * diff);
 	}
     return sqrt(sum);
     }
 
-
 struct slList *slBioExpVectorMerge(const struct slList *item1, const struct slList *item2,
 				void *unusedExtraData)
 /* Make a new slPair where the name is the children names concattenated and the 
  * value is the average of kids' values.
  * Designed for HAC tree use*/
     {
     verbose(3,"Merging...\n");
     const struct bioExpVector *kid1 = (const struct bioExpVector *)item1;
     const struct bioExpVector *kid2 = (const struct bioExpVector *)item2;
-    float kid1Weight = kid1->children / (float)(kid1->children + kid2->children);
+    float kid1Weight = kid1->children / (float)(kid1->children + kid2->children); //Weight based on number of children.
     float kid2Weight = kid2->children / (float)(kid1->children + kid2->children);
     struct bioExpVector *el;
     AllocVar(el);
     AllocArray(el->vector, kid1->count);
     assert(kid1->count == kid2->count);
     el->count = kid1->count; 
     el->name = catTwoStrings(kid1->name, kid2->name);
     int i;
     int gCount = 0;
     for (i = 0; i < el->count; ++i)
 	{
-	if (kid1->vector[i] == kid2->vector[i]) // Were doing thousands of merges, lets cut out useless compute where we can. 
+	if (kid1->vector[i] == kid2->vector[i]) // We are doing thousands of merges, lets cut out useless compute where we can. 
 	    {
 	    el->vector[i] = kid1->vector[i];  
 	    continue;    
 	    }
-	el->vector[i] = (kid1Weight*kid1->vector[i] + kid2Weight*kid2->vector[i]); // Weight based on number of children. 
+	el->vector[i] = (kid1Weight*kid1->vector[i] + kid2Weight*kid2->vector[i]);  
 	float diff; 
-	//if (((float)(kid1Weight*kid1->vector[i])) > ((float)(kid2Weight*kid2->vector[i]))) 
-	//    {diff = ((float)(kid1Weight*kid1->vector[i])) - ((float)(kid2Weight*kid2->vector[i]));}
-	//else {diff = ((float)(kid2Weight*kid2->vector[i])) - ((float)(kid1Weight*kid1->vector[i]));} 
-	if (((float)(kid1->vector[i])) > ((float)(kid2->vector[i]))) 
-	    {diff = ((float)(kid1->vector[i])) - ((float)(kid2->vector[i]));}
-	else {diff = ((float)(kid2->vector[i])) - ((float)(kid1->vector[i]));} 
+	if (((float)(kid1->vector[i])) > ((float)(kid2->vector[i]))) diff = ((float)(kid1->vector[i])) - ((float)(kid2->vector[i]));
+	else diff = ((float)(kid2->vector[i])) - ((float)(kid1->vector[i]));
 	++el->contGenes; 
 	++gCount;
 	int index = i + 1; 
 	if (gCount <= 10){
 	    struct slDoubleInt *newGene = slDoubleIntNew(diff, index); 
 	    slAddHead(&el->topGeneIndeces, newGene); 
 	    slSort(&el->topGeneIndeces, slDoubleIntCmp); 
 	    }
 	else{
 	    if (el->vector[i] > el->topGeneIndeces->val){
 		slPopHead(el->topGeneIndeces); 
 		struct slDoubleInt *newGene = slDoubleIntNew(diff, index); 
 		slAddHead(&el->topGeneIndeces, newGene); 
 		slSort(&el->topGeneIndeces, slDoubleIntCmp); 
 		}
 	    }
 	}
     slReverse(&el->topGeneIndeces); 
     el->children = kid1->children + kid2->children;
     return (struct slList *)(el);
     }
 
 void colorLeaves(struct slRef *leafList)
 /* Assign colors of rainbow to leaves. */
     {
     float total = 0.0;
-    //double purplePos = 0.80;
     struct slRef *el, *nextEl;
 
     /* Loop through list once to figure out total, since we need to normalize */
     for (el = leafList; el != NULL; el = nextEl)
 	{
 	nextEl = el->next;
 	if (nextEl == NULL)
 	    break;
 	struct bioExpVector *bio1 = el->val;
 	struct bioExpVector *bio2 = nextEl->val;
 	double distance = slBioExpVectorDistance((struct slList *)bio1, (struct slList *)bio2, NULL);
 	if (distance != distance ) distance = 0;
 	total += distance;
 	}
 
@@ -410,42 +403,37 @@
     bool firstLine = TRUE; 
     for (el = leafList; el != NULL; el = nextEl)
 	{
 	nextEl = el->next;
 	if (nextEl == NULL)
 	    break;
 	struct bioExpVector *bio1 = el->val;
 	struct bioExpVector *bio2 = nextEl->val;
 	double distance = slBioExpVectorDistance((struct slList *)bio1, (struct slList *)bio2, NULL);
 	if (firstLine) 
 	    {
 	    double normalized = distance/total; 
 	    bio1->color = whiteToBlackRainbowAtPos(normalized); 
 	    firstLine = FALSE;
 	    }
-	//if (distance != distance ) distance = 0 ;
-	//soFar += distance;
-	//double normalized = soFar/total;
 	double normalized = distance/total; 
 	if (normalized * 100 >= .95) bio2->color = whiteToBlackRainbowAtPos(.95);
 	else bio2->color = whiteToBlackRainbowAtPos(normalized*100); 
-	//bio2->color = saturatedRainbowAtPos(distance);
 	soFar += normalized;     
 	}
     /* Set first color to correspond to 0, since not set in above loop */
     struct bioExpVector *bio = leafList->val;
-    //bio->color = saturatedRainbowAtPos(0);
     bio->color = whiteToBlackRainbowAtPos(.95);
     }
 
 void convertInput(char *expMatrix, char *descFile, bool csv)
 /* Takes in a expression matrix and makes the inputs that this program will use. 
  * Namely a transposed table with the first column removed.  Makes use of system calls
  * to use cut, sed, kent utility rowsToCols, and paste (for descFile option). */
     {
     char cmd[1024],cmd1[1024], cmd2[1024];
     if (csv)
 	/* A sed one liner will convert comma separated values into a tab separated values*/ 
 	{
 	char cmd3[1024]; 
 	safef(cmd3, 1024, "sed -i 's/,/\\t/g' %s ",expMatrix);  
 	verbose(2,"%s\n", cmd3);
@@ -473,39 +461,38 @@
 	}
     else
 	{
 	safef(cmd2, 1024, "rowsToCols %s stdout | cut -f1 | sed \'1d\' > %s.cellNames", expMatrix, expMatrix);  
 	verbose(2,"%s\n", cmd2); 
 	mustSystem(cmd2);
 	}
     }
 
 void generateHtml(FILE *outputFile, int nameSize, char* jsonFile)
 // Generates a new .html file for the dendrogram. Will do some size calculations as well. 
     {
     char *pageName = cloneString(jsonFile);
     chopSuffix(pageName);
     int textSize = 12 - log(nodeCount);  
-    //int radius = 540 + 270*log10(nodeCount);  
     int width = 10 * nodeCount; 
     int height = 10 * nodeCount; 
     int labelLength = 10+nameSize*(15-textSize);
     if (labelLength > 100) labelLength = 100;
 
     fprintf(outputFile,"<!DOCTYPE html>\n"); 
     fprintf(outputFile,"<head>\n"); 
-    fprintf(outputFile,"<title>New dendrogram tests</title>\n"); 
+    fprintf(outputFile,"<title>%s</title>\n", pageName); 
     fprintf(outputFile,"<link rel=\"stylesheet\" href=\"http://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css\">\n"); 
     fprintf(outputFile,"<script src=\"https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js\"></script>\n"); 
     fprintf(outputFile,"<script src=\"http://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js\"></script>\n"); 
     fprintf(outputFile,"<script src=\"http://d3js.org/d3.v3.min.js\" type=\"text/javascript\"></script>\n"); 
     fprintf(outputFile,"<script src=\"/js/d3.dendrograms.js\" type=\"text/javascript\"></script>\n"); 
     fprintf(outputFile,"<div class = \"dropdown\">\n"); 
     fprintf(outputFile,"	<div id = dropdown>\n");  
     fprintf(outputFile,"</div>\n");
     fprintf(outputFile,"<script>\n"); 
     fprintf(outputFile,"	function load() {\n"); 
     fprintf(outputFile,"	var data;\n\n"); 
     fprintf(outputFile,"	d3.json(\"%s\", function(error,json){\n", jsonFile); 
     fprintf(outputFile,"		if (error) return console.warn(error);\n"); 
     fprintf(outputFile,"		data = json;\n"); 
     fprintf(outputFile,"			d3.dendrogram.makeRadialDendrogram('#dendrogram', data,{\n");