56faab879b052fc2cdf3265cd34b36064faf6fe9
ceisenhart
  Tue Mar 8 12:33:32 2016 -0800
Changing the program name as suggested by Jim in #16789, refs #16182

diff --git src/hg/expMatrixToJson/expMatrixToJson.c src/hg/expMatrixToJson/expMatrixToJson.c
deleted file mode 100644
index 8bea130..0000000
--- src/hg/expMatrixToJson/expMatrixToJson.c
+++ /dev/null
@@ -1,636 +0,0 @@
-/* expData -  Takes in an expression matrix and clusters it using a hierarchical agglomerative clustering algorithm. The output defaults to a hierarchichal .json format, with two additional options. */
-#include "common.h"
-#include "linefile.h"
-#include "hash.h"
-#include "options.h"
-#include "obscure.h"
-#include "memalloc.h"
-#include "jksql.h"
-#include "expData.h"
-#include "jsonWrite.h"
-#include "sqlList.h"
-#include "hacTree.h"
-#include "rainbow.h" 
-
-boolean clCSV = FALSE; // Converts the comma separated matrix into a tab based file. 
-int clThreads = 10; // The number of threads to run with the multiThreads option
-int clMemLim = 4; // The amount of memeory the program can use, read in Gigabytes. 
-float clLongest = 0;  // Used to normalize link distances in rlinkJson.
-char* clDescFile = NULL; // The user can provide a description file 
-char* clAttributeTable = NULL; // The user can provide an attributes table... this may get removed soon.  
-int nodeCount; //The number of nodes. 
-int internalNodes = 0; 
-void usage()
-/* Explain usage and exit. */
-{
-errAbort(
-    "expMatrixToJson -  Takes in an expression matrix and outputs a binary tree clustering the data.\n"
-    "			The tree is output as a .json file,  a .html file is generated to view the \n"
-    "			tree.  The files are named using the output argument (ex output.json, output.html).\n"
-    "usage:\n"
-    "   expMatrixToJson [options] matrix output\n"
-    "options:\n"
-    "    -CSV               The input matrix is in .csv format. \n"
-    "    -threads=int       Sets the thread count for the multiThreads option, default is 10 \n"
-    "    -memLim=int        Sets the amount of memeory the program can use before aborting. The default is 4G. \n"
-    "    -verbose=2         Show basic run stats. \n"
-    "    -verbose=3         Show all run stats. Very ugly, avoid at all costs. \n" 
-    "    -descFile=string   The user is providing a description file. The description file must provide a \n"
-    "                       description for each cell line in the expression matrix. There should be one description per \n" 
-    "                       line, starting on the left side of the expression matrix. The description will appear over a \n" 
-    "                       leaf node when hovered over.\n"
-    );
-}
-
-/* Command line validation table. */
-static struct optionSpec options[] = {
-    {"CSV", OPTION_BOOLEAN},
-    {"threads", OPTION_INT},
-    {"memLim", OPTION_INT},
-    {"descFile", OPTION_STRING},
-    {"attributeTable", OPTION_STRING},
-    {NULL, 0},
-    };
-
-struct slDoubleInt
-/* Used to keep track of the top ten genes contributed */
-    {
-    struct slDoubleInt *next; 
-    double val; 
-    int index; 
-    };
-
-struct bioExpVector
-/* Contains expression information for a biosample on many genes. */
-    {
-    struct bioExpVector *next;
-    char *name;	    // name of biosample.
-    char *desc;	    // description of biosample. 
-    int count;	    // Number of genes we have data for.
-    double *vector;   //  An array allocated dynamically.
-    struct rgbColor color;  // Color for this one
-    int children;   // Number of bioExpVectors used to build the current 
-    struct slDoubleInt *topGeneIndeces; // The indeces for the top 10 genes that drove the clustering up to this point 
-    int contGenes; //The number of contributing genes
-    int mergeCount; //Number in the merge chain. 
-    };
-
-
-struct slDoubleInt *slDoubleIntNew(double x, int y)
-/* Return a new doubleInt */
-    {
-    struct slDoubleInt *a;
-    AllocVar(a);
-    a->val = x;
-    a->index = y; 
-    return a;
-    }
-
-int slDoubleIntCmp(const void *va, const void *vb)
-/* Compare two doubleInts */
-{
-    const struct slDoubleInt *a = *((struct slDoubleInt **)va);
-    const struct slDoubleInt *b = *((struct slDoubleInt **)vb);
-    double diff = a->val - b->val;
-    if (diff < 0)
-	return -1;
-    else if (diff > 0)
-	return 1;
-    else
-	return 0;
-    }
-
-double stringToDouble(char *s)
-/* Convert string to a double.  Assumes all of string is number
- * and aborts on an error. Errors on 'nan'*/
-    {
-    char* end;
-    double val = strtod(s, &end);
-
-    if (val != val) errAbort("A value of %f was encountered. Please change this value then re run the program.", val); 
-    if ((end == s) || (*end != '\0'))
-	errAbort("invalid double: %s", s);
-    return val;
-    }
-
-struct bioExpVector *bioExpVectorListFromFile(char *matrixFile)
-/* Read a tab-delimited file and return list of bioExpVectors */
-    {
-    int vectorSize = 0;
-    struct lineFile *lf = lineFileOpen(matrixFile, TRUE);
-    char *line, **row = NULL;
-    struct bioExpVector *list = NULL, *el;
-    while (lineFileNextReal(lf, &line))
-	{
-	++nodeCount; 
-	if (vectorSize == 0)
-	    {
-	    // Detect first row.
-	    vectorSize = chopByWhite(line, NULL, 0);  // counting up
-	    AllocArray(row, vectorSize);
-	    continue;
-	    }
-	AllocVar(el);
-	AllocArray(el->vector, vectorSize);
-	el->count = chopByWhite(line, row, vectorSize);
-	assert(el->count == vectorSize);
-	int i;
-	for (i = 0; i < el->count; ++i)
-	    el->vector[i] = stringToDouble(row[i]);
-	el->children = 1;
-	el->contGenes = 0;
-	slAddHead(&list, el);
-	}
-    lineFileClose(&lf);
-    slReverse(&list);
-    return list;
-    }
-
-int fillInNames(struct bioExpVector *list, char *nameFile)
-/* Fill in name field from file. */
-    {
-    struct lineFile *lf = lineFileOpen(nameFile, TRUE);
-    char *line;
-    struct bioExpVector *el = list;
-    int maxSize = 0 ; 
-
-    while (lineFileNextReal(lf, &line))
-	{
-	if (el == NULL)
-	    {
-	    warn("More names than items in list");
-	    break;
-	    }
-	char *fields[2];
-	if (strlen(line) > maxSize) maxSize = strlen(line); 
-	int fieldCount = chopTabs(line, fields);
-	if (fieldCount >= 1)
-	   {
-	   el->name = cloneString(fields[0]);
-	   if (fieldCount >= 2)
-	       el->desc = cloneString(fields[1]);
-	   else
-	       el->desc = cloneString("0");
-	   }
-	el = el->next;
-	}
-    if (el != NULL)
-	errAbort("More items in matrix file than %s", nameFile);
-    lineFileClose(&lf);
-    return maxSize; 
-    }
-
-static void rAddLeaf(struct hacTree *tree, struct slRef **pList)
-/* Recursively add leaf to list */
-    {
-    if (tree->left == NULL && tree->right == NULL)
-	refAdd(pList, tree->itemOrCluster);
-    else
-	{
-	rAddLeaf(tree->left, pList);
-	rAddLeaf(tree->right, pList);
-	}
-    }
-
-struct slRef *getOrderedLeafList(struct hacTree *tree)
-/* Return list of references to bioExpVectors from leaf nodes
- * ordered by position in hacTree */
-    {
-    struct slRef *leafList = NULL;
-    rAddLeaf(tree, &leafList);
-    slReverse(&leafList);
-    return leafList;
-    }
-
-
-static void printJsonNode(FILE *f, struct hacTree *tree, int level, double distance, struct slName *geneNames) 
-    {
-    int i; 
-    struct bioExpVector *bio = (struct bioExpVector *)tree->itemOrCluster;
-    struct rgbColor colors = bio->color;
-    for (i = 0;  i < level + 1;  i++)
-	fputc(' ', f);
-    distance = tree->childDistance/clLongest;
-    double length = 0; 
-    if (tree->parent != NULL)
-	{
-	length = tree->parent->childDistance;
-	}
-    ++internalNodes; 
-    fprintf(f, "{\"name\":\" \", \"number\":\"%i\", \"kids\":\"%i\", \"tpmDistance\":"
-			"\"%f\", \"length\": \"%f\",  \"colorGroup\": \"rgb(%i,""%i,%i)\","
-			"\"contGenes\":\"%i\",\"geneList\": {",internalNodes,  bio->children,
-			tree->childDistance, length, colors.r,colors.g,colors.b, bio->contGenes);
-    
-    struct slDoubleInt *j;
-    for (j = bio->topGeneIndeces; j != NULL; j = j->next)
-	{
-	struct slName *geneName = slElementFromIx(geneNames, j->index); 
-	fprintf(f,"\"%s\":\"%f\"", geneName->name, j->val);  
-	if (j->next != NULL) fprintf(f, ", "); 
-	
-	}
-    fprintf(f , "},"); 
-    if (distance != distance) distance = 0;
-    struct rgbColor wTB; 
-    struct rgbColor wTBsqrt; 
-    struct rgbColor wTBquad; 
-    if (distance == 0) {
-	wTB = whiteToBlackRainbowAtPos(0);
-	wTBsqrt = whiteToBlackRainbowAtPos(0);
-	wTBquad = whiteToBlackRainbowAtPos(0);
-	}
-    else  {
-	wTB = whiteToBlackRainbowAtPos(distance*.95);
-	wTBsqrt = whiteToBlackRainbowAtPos(sqrt(distance*.95));
-	wTBquad = whiteToBlackRainbowAtPos(sqrt(sqrt(distance*.95)));
-	}
-    fprintf(f, "\"normalizedDistance\": \"%f\", \"whiteToBlack\":\"rgb(%i,%i,%i)\", \"whiteTo", 
-			distance, wTB.r, wTB.g, wTB.b); 
-    fprintf(f, "BlackSqrt\":\"rgb(%i,%i,%i)\", \"whiteToBlackQuad\":\"rgb(%i,%i,%i)\",\n",
-			wTBsqrt.r, wTBsqrt.g, wTBsqrt.b, wTBquad.r, wTBquad.g, wTBquad.b);
-    for (i = 0;  i < level + 1;  i++)
-	fputc(' ', f);
-    fprintf(f, "\"children\":[\n");
-    }
-
-static void rPrintHierarchicalJson(FILE *f, struct hacTree *tree, int level, double distance, struct slName *geneNames)
-/* Recursively prints out the elements of the hierarchical .json file. */
-    {
-    struct bioExpVector *bio = (struct bioExpVector *)tree->itemOrCluster;
-    char *tissue = bio->name;
-    struct rgbColor colors = bio->color;
-    if (tree->childDistance > clLongest)
-	/* In practice the first distance will be the longest, and is used for normalization. */ 
-	clLongest = tree->childDistance;
-    
-    int i;
-    for (i = 0;  i < level;  i++)
-	fputc(' ', f); // correct spacing for .json format
-    
-    if (tree->left == NULL && tree->right == NULL) // Check if the current node is a leaf
-	{
-	fprintf(f, "{\"name\":\"%s\",\"kids\":\"0\",\"length\":\"%f\",\"colorGroup\":\"rgb(%i,%i,%i)\"}",
-			    tissue, tree->parent->childDistance, colors.r, colors.g, colors.b); 
-	return; 
-	}
-    // Sanity check leaf nodes, they should not have any kids, definitely should not have a single kid. 
-    else if (tree->left == NULL || tree->right == NULL)
-	errAbort("\nHow did we get a node with one NULL kid??");
-    
-    printJsonNode(f, tree, level,  distance, geneNames); 
-    
-    rPrintHierarchicalJson(f, tree->left, level+1, distance, geneNames);
-    fputs(",\n", f);
-    rPrintHierarchicalJson(f, tree->right, level+1, distance, geneNames);
-    fputc('\n', f);
-    // Closes the children block for node objects
-    for (i=0;  i < level + 1;  i++)
-	fputc(' ', f);
-    fputs("]\n", f);
-    for (i = 0;  i < level;  i++)
-	fputc(' ', f);
-    fputs("}", f);
-    }
-
-void printHierarchicalJson(FILE *f, struct hacTree *tree, char *geneNamesFile)
-/* Prints out the binary tree into .json format intended for d3
- * hierarchical layouts */
-    {
-    if (tree == NULL)
-	{
-	fputs("Empty tree.\n", f);
-	return;
-	}
-    double distance = 0;
-    struct lineFile *lf = lineFileOpen(geneNamesFile, TRUE);
-    char *line;
-    struct slName *geneNames;
-    AllocVar(geneNames);
-    while (lineFileNextReal(lf, &line))
-	{
-	struct slName *geneName = newSlName(cloneString(line));
-	slAddTail(&geneNames, geneName);
-	}
-    lineFileClose(&lf);
-    rPrintHierarchicalJson(f, tree, 0, distance, geneNames);
-    }
-
-double slBioExpVectorDistance(const struct slList *item1, const struct slList *item2, void *extraData)
-/* Return the absolute difference between the two kids' values. Weight based on how many nodes have been merged
- * to create the current node.  Designed for HAC tree use*/
-    {
-    verbose(3,"Calculating Distance...\n");
-    const struct bioExpVector *kid1 = (const struct bioExpVector *)item1;
-    const struct bioExpVector *kid2 = (const struct bioExpVector *)item2;
-    int j;
-    double diff = 0, sum = 0;
-    for (j = 0; j < kid1->count; ++j)
-	{
-	diff = kid1->vector[j] - kid2->vector[j]; 
-	sum += (diff * diff);
-	}
-    return sqrt(sum);
-    }
-
-int mergeCount = 0; 
-
-struct slList *slBioExpVectorMerge(const struct slList *item1, const struct slList *item2,
-				void *unusedExtraData)
-/* Make a new slPair where the name is the children names concattenated and the 
- * value is the average of kids' values.
- * Designed for HAC tree use*/
-    {
-    verbose(3,"Merging...\n");
-    const struct bioExpVector *kid1 = (const struct bioExpVector *)item1;
-    const struct bioExpVector *kid2 = (const struct bioExpVector *)item2;
-    float kid1Weight = kid1->children / (float)(kid1->children + kid2->children); //Weight based on number of children.
-    float kid2Weight = kid2->children / (float)(kid1->children + kid2->children);
-    struct bioExpVector *el;
-    AllocVar(el);
-    AllocArray(el->vector, kid1->count);
-    assert(kid1->count == kid2->count);
-    el->count = kid1->count; 
-    el->name = catTwoStrings(kid1->name, kid2->name);
-    int i;
-    mergeCount++; 
-    int gCount = 0;
-    for (i = 0; i < el->count; ++i)
-	{
-	if (kid1->vector[i] == kid2->vector[i]) // We are doing thousands of merges, lets cut out useless compute where we can. 
-	    {
-	    el->vector[i] = kid1->vector[i];  
-	    continue;    
-	    }
-	el->vector[i] = (kid1Weight*kid1->vector[i] + kid2Weight*kid2->vector[i]);  
-	float diff; 
-	if (((float)(kid1->vector[i])) > ((float)(kid2->vector[i]))) diff = ((float)(kid1->vector[i])) - ((float)(kid2->vector[i]));
-	else diff = ((float)(kid2->vector[i])) - ((float)(kid1->vector[i]));
-	if (diff != 0.0) // Only consider diffs that are non zero
-	    {
-	    ++el->contGenes; // This gene is non zero so it is contributing 
-	    ++gCount;
-	    int index = i + 1; 
-	    if (gCount <= 10){
-		struct slDoubleInt *newGene = slDoubleIntNew(diff, index); 
-		slAddHead(&el->topGeneIndeces, newGene); 
-		slSort(&el->topGeneIndeces, slDoubleIntCmp); 
-		}
-	    else{
-		if (el->vector[i] > el->topGeneIndeces->val){
-		    slPopHead(el->topGeneIndeces); 
-		    struct slDoubleInt *newGene = slDoubleIntNew(diff, index); 
-		    slAddHead(&el->topGeneIndeces, newGene); 
-		    slSort(&el->topGeneIndeces, slDoubleIntCmp); 
-		    }
-		}
-	    }
-	}
-    slReverse(&el->topGeneIndeces); 
-    el->mergeCount = mergeCount; 
-    el->children = kid1->children + kid2->children;
-    return (struct slList *)(el);
-    }
-
-const struct slList *lastLeaf = NULL; 
-
-bool slBioExpVectorCmp(const struct slList *item1, const struct slList *item2,
-				void *unusedExtraData)
-/* Compare two bioExpVectors to decide which one becomes the left child and which becomes the right. 
- * Return TRUE if item 1 is left child and item 2 is right child.  */ 
-    {
-    const struct bioExpVector *kid1 = (const struct bioExpVector *)item1;
-    const struct bioExpVector *kid2 = (const struct bioExpVector *)item2;
-    if (kid1->children !=1 && kid2->children ==1) lastLeaf = item2; 
-    if (kid1->children ==1 && kid2->children !=1) lastLeaf = item1;
-    
-    if (kid1->children > kid2->children)
-	{
-	return TRUE; // Whoever has more kids gets to be first born 
-	}
-    else if (kid1->children < kid2->children)return FALSE;
-    else{
-	if (kid1->mergeCount < kid2->mergeCount) return TRUE;
-	else if (kid1->mergeCount > kid2->mergeCount)  return FALSE;
-	else{
-	    // These are two leaf siblings, things get a bit tricky here... 
-	    // First we find the closest neighbor (who should be a firstborn)
-	    if (lastLeaf == NULL) 
-		{
-		// This is the very first merge, not certain how to handle it so lets just return True for now
-		lastLeaf = item1; 
-		return TRUE; 
-		}
-	    else{
-		double score1 = slBioExpVectorDistance(item1, lastLeaf, NULL);
-		double score2 = slBioExpVectorDistance(item2, lastLeaf, NULL);
-		if (score1 < score2) 
-		    {
-		    lastLeaf = item2; 
-		    return TRUE; 
-		    }
-		else{
-		    lastLeaf = item1; 
-		    return FALSE;
-		    }
-		}
-	    }
-	}
-    }
-
-void colorLeaves(struct slRef *leafList)
-/* Assign colors to leaves. I am very unhappy with the inconsistencies here.  Namely the when you have two leaf siblings
- * they are essentially assigned at random, which makes this metric fairly useless?  Im less than stoked*/
-    {
-    float total = 0.0;
-    struct slRef *el, *nextEl;
-
-    /* Loop through list once to figure out the longest distance since we need to normalize */
-    for (el = leafList; el != NULL; el = nextEl)
-	{
-	nextEl = el->next;
-	if (nextEl == NULL)
-	    break;
-	struct bioExpVector *bio1 = el->val;
-	struct bioExpVector *bio2 = nextEl->val;
-	double distance = slBioExpVectorDistance((struct slList *)bio1, (struct slList *)bio2, NULL);
-	if (distance > total) total = distance; 
-	}
-
-    /* Loop through list a second time to generate actual colors. */
-    bool firstLine = TRUE; 
-    for (el = leafList; el != NULL; el = nextEl)
-	{
-	nextEl = el->next;
-	if (nextEl == NULL)
-	    break;
-	struct bioExpVector *bio1 = el->val;
-	struct bioExpVector *bio2 = nextEl->val;
-	double distance = slBioExpVectorDistance((struct slList *)bio1, (struct slList *)bio2, NULL);
-	if (firstLine) // Handle the first two nodes, color them both the same 
-	    {
-	    double normalized = distance/total; 
-	    struct rgbColor col = whiteToBlackRainbowAtPos(normalized); 
-	    bio1->color = col; 
-	    bio2->color = col; 
-	    firstLine = FALSE;
-	    continue;
-	    }
-	double normalized = distance/total; 
-	// Color the next node based on distance from previous leaf node
-	if (normalized >= .95) bio2->color = whiteToBlackRainbowAtPos(.95);
-	else bio2->color = whiteToBlackRainbowAtPos(normalized); 
-	}
-    }
-
-void convertInput(char *expMatrix, char *descFile, bool csv)
-/* Takes in a expression matrix and makes the inputs that this program will use. 
- * Namely a transposed table with the first column removed.  Makes use of system calls
- * to use cut, sed, kent utility rowsToCols, and paste (for descFile option). */
-    {
-    char cmd[1024],cmd1[1024], cmd2[1024];
-    if (csv)
-	/* A sed one liner will convert comma separated values into a tab separated values*/ 
-	{
-	char cmd3[1024]; 
-	safef(cmd3, 1024, "sed -i 's/,/\\t/g' %s ",expMatrix);  
-	verbose(2,"%s\n", cmd3);
-	mustSystem(cmd3); 
-	}
-    safef(cmd, 1024, "cut -f 1 %s | sed \'1d\' > %s.genes", expMatrix, expMatrix); 
-    mustSystem(cmd); 
-    safef(cmd1, 1024, "cat %s | sed '1d' | rowsToCols stdin %s.transposedMatrix", expMatrix, expMatrix); 
-    /* Exp matrices are X axis of cell lines and Y axis of transcripts. This causes long Y axis and short
-     * X axis, which are not handled well in C.  The matrix is transposed to get around this issue. */ 
-    verbose(2,"%s\n", cmd1);
-    mustSystem(cmd1);
-    /* Pull out the cell names, and store them in a separate file. This allows the actual data matrix to 
-     * have the first row identify the transcript, then all following rows contain only expression values.
-     * By removing the name before hand the computation was made faster and easier. */ 
-    if (descFile) 
-	{
-	char cmd3[1024]; 
-	safef(cmd2, 1024, "rowsToCols %s stdout | cut -f1 | sed \'1d\' > %s.cellNamesTemp", expMatrix, expMatrix); 
-	safef(cmd3, 1024, "paste %s.cellNamesTemp %s > %s.cellNames", expMatrix, descFile, expMatrix);
-	verbose(2,"%s\n", cmd2); 
-	mustSystem(cmd2);
-	verbose(2,"%s\n", cmd3); 
-	mustSystem(cmd3);
-	}
-    else
-	{
-	safef(cmd2, 1024, "rowsToCols %s stdout | cut -f1 | sed \'1d\' > %s.cellNames", expMatrix, expMatrix);  
-	verbose(2,"%s\n", cmd2); 
-	mustSystem(cmd2);
-	}
-    }
-
-void generateHtml(FILE *outputFile, int nameSize, char* jsonFile)
-// Generates a new .html file for the dendrogram. Will do some size calculations as well. 
-    {
-    char *pageName = cloneString(jsonFile);
-    chopSuffix(pageName);
-    int textSize = 12 - log(nodeCount);  
-    int labelLength = 10+nameSize*(15-textSize);
-    if (labelLength > 100) labelLength = 100;
-
-    fprintf(outputFile,"<!DOCTYPE html>\n"); 
-    fprintf(outputFile,"<head>\n"); 
-    fprintf(outputFile,"<title>%s</title>\n", pageName); 
-    fprintf(outputFile,"<link rel=\"stylesheet\" href=\"http://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css\">\n"); 
-    fprintf(outputFile,"<script src=\"https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js\"></script>\n"); 
-    fprintf(outputFile,"<script src=\"http://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js\"></script>\n"); 
-    fprintf(outputFile,"<script src=\"http://d3js.org/d3.v3.min.js\" type=\"text/javascript\"></script>\n"); 
-    fprintf(outputFile,"<script src=\"/js/d3.dendrograms.js\" type=\"text/javascript\"></script>\n"); 
-    fprintf(outputFile,"<div class = \"dropdown\">\n"); 
-    fprintf(outputFile,"	<div id = dropdown>\n");  
-    fprintf(outputFile,"</div>\n");
-    fprintf(outputFile,"<script>\n"); 
-    fprintf(outputFile,"	function load() {\n"); 
-    fprintf(outputFile,"	var data;\n\n"); 
-    fprintf(outputFile,"	d3.json(\"%s\", function(error,json){\n", jsonFile); 
-    fprintf(outputFile,"		if (error) return console.warn(error);\n"); 
-    fprintf(outputFile,"		data = json;\n"); 
-    fprintf(outputFile,"			d3.dendrogram.makeRadialDendrogram('#dendrogram', data,{\n"); 
-    fprintf(outputFile,"			});\n"); 
-    fprintf(outputFile,"		});\n"); 
-    fprintf(outputFile,"  	}\n"); 
-    fprintf(outputFile,"</script>\n"); 
-    fprintf(outputFile,"<style type=\"text/css\" media=\"screen\">\n"); 
-    fprintf(outputFile,"  body { font-family: \"Helvetica Neue\", Helvetica, sans-serif; }\n"); 
-    fprintf(outputFile,"  td { vertical-align: top; }\n"); 
-    fprintf(outputFile,"</style>\n"); 
-    fprintf(outputFile,"</head>\n"); 
-    fprintf(outputFile,"<body onload=\"load()\">\n"); 
-    fprintf(outputFile,"<table>\n"); 
-    fprintf(outputFile,"  <tr>\n"); 
-    fprintf(outputFile,"	<td>\n"); 
-    fprintf(outputFile,"	  <h1>Dendrogram</h1>\n"); 
-    fprintf(outputFile,"	  <div id='dendrogram'></div>\n"); 
-    fprintf(outputFile,"	</td>\n"); 
-    fprintf(outputFile,"  </tr>\n"); 
-    fprintf(outputFile,"</table>\n"); 
-    fprintf(outputFile,"</body>\n");
-    fprintf(outputFile,"</html>\n"); 
-
-    carefulClose(&outputFile); 
-    }
-
-
-
-void expData(char *matrixFile, char *outDir, char *descFile)
-/* Read matrix and names into a list of bioExpVectors, run hacTree to
- * associate them, and write output. */
-    {
-    verbose(2,"Start binary clustering of the expression matrix by euclidean distance (expMatrixToJson).\n");
-    clock_t begin, end; 
-    begin = clock(); 
-
-    convertInput(matrixFile, descFile, clCSV); 
-    struct bioExpVector *list = bioExpVectorListFromFile(catTwoStrings(matrixFile,".transposedMatrix"));
-    verbose(2,"%lld allocated after bioExpVectorListFromFile\n", (long long)carefulTotalAllocated());
-    FILE *f = mustOpen(catTwoStrings(outDir,".json"),"w");
-    struct lm *localMem = lmInit(0);
-    int size = fillInNames(list, catTwoStrings(matrixFile,".cellNames"));
-    /* Allocate new string that is a concatenation of two strings. */
-    struct hacTree *clusters = NULL;
-
-    verbose(2,"Using %i threads. \n", clThreads);  
-    clusters = hacTreeMultiThread(clThreads, (struct slList *)list, localMem, slBioExpVectorDistance,
-				slBioExpVectorMerge, slBioExpVectorCmp,  NULL, NULL);
-
-    struct slRef *orderedList = getOrderedLeafList(clusters);
-    colorLeaves(orderedList);
-    printHierarchicalJson(f, clusters, catTwoStrings(matrixFile, ".genes"));
-    FILE *htmlF = mustOpen(catTwoStrings(outDir,".html"),"w");
-    generateHtml(htmlF,size,catTwoStrings(outDir,".json")); 
-
-    // Remove temporary files
-    // These temporary files are awful sloppy... Long term goal to get rid of them. 
-    char cleanup[1024], cleanup2[1024], cleanup3[1024];
-    safef(cleanup, 1024, "rm %s.cellNames", matrixFile); 
-    safef(cleanup2, 1024, "rm %s.transposedMatrix", matrixFile); 
-    safef(cleanup3, 1024, "rm %s.genes", matrixFile); 
-    mustSystem(cleanup);
-    mustSystem(cleanup2);
-    mustSystem(cleanup3);
-
-    end = clock(); 
-    verbose(2,"%lld allocated at end. The program took %f seconds to complete.\n", (long long)carefulTotalAllocated(), (double)(end-begin)/CLOCKS_PER_SEC);
-    verbose(2,"Completed binary clustering of the expression matrix by euclidean distance (expMatrixToJson).\n");
-    }
-
-int main(int argc, char *argv[])
-/* Process command line. */
-    {
-    optionInit(&argc, argv, options);
-    clCSV = optionExists("CSV");
-    clThreads = optionInt("threads", clThreads);
-    clMemLim = optionInt("memLim", clMemLim); 
-    clDescFile = optionVal("descFile", clDescFile);
-    if (argc != 3)
-	usage();
-    pushCarefulMemHandler(1L*1024*1024*1024*clMemLim);
-    expData(argv[1], argv[2], clDescFile);
-    return 0;
-    }