3d163f073f89dd5a9d3da02d12db1ec46c9f3d51 ceisenhart Mon Mar 7 13:03:23 2016 -0800 Modified the clustering algorithm, added in a sibling comparison function that decides the first born child. Refs #16216 diff --git src/hg/expMatrixToJson/expMatrixToJson.c src/hg/expMatrixToJson/expMatrixToJson.c index 9058b02..f5aae6e 100644 --- src/hg/expMatrixToJson/expMatrixToJson.c +++ src/hg/expMatrixToJson/expMatrixToJson.c @@ -1,596 +1,637 @@ /* expData - Takes in an expression matrix and clusters it using a hierarchical agglomerative clustering algorithm. The output defaults to a hierarchichal .json format, with two additional options. */ #include "common.h" #include "linefile.h" #include "hash.h" #include "options.h" #include "obscure.h" #include "memalloc.h" #include "jksql.h" #include "expData.h" +#include "jsonWrite.h" #include "sqlList.h" #include "hacTree.h" #include "rainbow.h" boolean clCSV = FALSE; // Converts the comma separated matrix into a tab based file. -boolean clMultiThreads = FALSE; // Allows the user to run the program with multiple threads, default is off. int clThreads = 10; // The number of threads to run with the multiThreads option int clMemLim = 4; // The amount of memeory the program can use, read in Gigabytes. float clLongest = 0; // Used to normalize link distances in rlinkJson. char* clDescFile = NULL; // The user can provide a description file char* clAttributeTable = NULL; // The user can provide an attributes table... this may get removed soon. int nodeCount; //The number of nodes. int internalNodes = 0; void usage() /* Explain usage and exit. */ { errAbort( "expMatrixToJson - Takes in an expression matrix and outputs a binary tree clustering the data.\n" " The tree is output as a .json file, a .html file is generated to view the \n" " tree. The files are named using the output argument (ex output.json, output.html).\n" "usage:\n" " expMatrixToJson [options] matrix output\n" "options:\n" - " -multiThreads The program will run on multiple threads. \n" " -CSV The input matrix is in .csv format. \n" " -threads=int Sets the thread count for the multiThreads option, default is 10 \n" " -memLim=int Sets the amount of memeory the program can use before aborting. The default is 4G. \n" " -verbose=2 Show basic run stats. \n" " -verbose=3 Show all run stats. Very ugly, avoid at all costs. \n" " -descFile=string The user is providing a description file. The description file must provide a \n" " description for each cell line in the expression matrix. There should be one description per \n" " line, starting on the left side of the expression matrix. The description will appear over a \n" " leaf node when hovered over.\n" ); } /* Command line validation table. */ static struct optionSpec options[] = { - {"multiThreads", OPTION_BOOLEAN}, {"CSV", OPTION_BOOLEAN}, {"threads", OPTION_INT}, {"memLim", OPTION_INT}, {"descFile", OPTION_STRING}, {"attributeTable", OPTION_STRING}, {NULL, 0}, }; struct slDoubleInt /* Used to keep track of the top ten genes contributed */ { struct slDoubleInt *next; double val; int index; }; struct bioExpVector /* Contains expression information for a biosample on many genes. */ { struct bioExpVector *next; char *name; // name of biosample. char *desc; // description of biosample. int count; // Number of genes we have data for. double *vector; // An array allocated dynamically. struct rgbColor color; // Color for this one int children; // Number of bioExpVectors used to build the current struct slDoubleInt *topGeneIndeces; // The indeces for the top 10 genes that drove the clustering up to this point int contGenes; //The number of contributing genes + int mergeCount; //Number in the merge chain. }; struct slDoubleInt *slDoubleIntNew(double x, int y) /* Return a new doubleInt */ { struct slDoubleInt *a; AllocVar(a); a->val = x; a->index = y; return a; } int slDoubleIntCmp(const void *va, const void *vb) /* Compare two doubleInts */ { const struct slDoubleInt *a = *((struct slDoubleInt **)va); const struct slDoubleInt *b = *((struct slDoubleInt **)vb); double diff = a->val - b->val; if (diff < 0) return -1; else if (diff > 0) return 1; else return 0; } double stringToDouble(char *s) /* Convert string to a double. Assumes all of string is number * and aborts on an error. Errors on 'nan'*/ { char* end; double val = strtod(s, &end); if (val != val) errAbort("A value of %f was encountered. Please change this value then re run the program.", val); if ((end == s) || (*end != '\0')) errAbort("invalid double: %s", s); return val; } struct bioExpVector *bioExpVectorListFromFile(char *matrixFile) /* Read a tab-delimited file and return list of bioExpVectors */ { int vectorSize = 0; struct lineFile *lf = lineFileOpen(matrixFile, TRUE); char *line, **row = NULL; struct bioExpVector *list = NULL, *el; while (lineFileNextReal(lf, &line)) { ++nodeCount; if (vectorSize == 0) { // Detect first row. vectorSize = chopByWhite(line, NULL, 0); // counting up AllocArray(row, vectorSize); continue; } AllocVar(el); AllocArray(el->vector, vectorSize); el->count = chopByWhite(line, row, vectorSize); assert(el->count == vectorSize); int i; for (i = 0; i < el->count; ++i) el->vector[i] = stringToDouble(row[i]); el->children = 1; el->contGenes = 0; slAddHead(&list, el); } lineFileClose(&lf); slReverse(&list); return list; } int fillInNames(struct bioExpVector *list, char *nameFile) /* Fill in name field from file. */ { struct lineFile *lf = lineFileOpen(nameFile, TRUE); char *line; struct bioExpVector *el = list; int maxSize = 0 ; while (lineFileNextReal(lf, &line)) { if (el == NULL) { warn("More names than items in list"); break; } char *fields[2]; if (strlen(line) > maxSize) maxSize = strlen(line); int fieldCount = chopTabs(line, fields); if (fieldCount >= 1) { el->name = cloneString(fields[0]); if (fieldCount >= 2) el->desc = cloneString(fields[1]); else el->desc = cloneString("0"); } el = el->next; } if (el != NULL) errAbort("More items in matrix file than %s", nameFile); lineFileClose(&lf); return maxSize; } static void rAddLeaf(struct hacTree *tree, struct slRef **pList) /* Recursively add leaf to list */ { if (tree->left == NULL && tree->right == NULL) refAdd(pList, tree->itemOrCluster); else { rAddLeaf(tree->left, pList); rAddLeaf(tree->right, pList); } } struct slRef *getOrderedLeafList(struct hacTree *tree) /* Return list of references to bioExpVectors from leaf nodes * ordered by position in hacTree */ { struct slRef *leafList = NULL; rAddLeaf(tree, &leafList); slReverse(&leafList); return leafList; } -static void rPrintHierarchicalJson(FILE *f, struct hacTree *tree, int level, double distance, struct slName *geneNames) -/* Recursively prints out the elements of the hierarchical .json file. */ + +static void printJsonNode(FILE *f, struct hacTree *tree, int level, double distance, struct slName *geneNames) { + int i; struct bioExpVector *bio = (struct bioExpVector *)tree->itemOrCluster; - char *tissue = bio->name; struct rgbColor colors = bio->color; - if (tree->childDistance > clLongest) - /* In practice the first distance will be the longest, and is used for normalization. */ - clLongest = tree->childDistance; - int i; - for (i = 0; i < level; i++) - fputc(' ', f); // correct spacing for .json format - - // *****LEAVES***** - if (tree->left == NULL && tree->right == NULL) - // Print the leaf nodes - { - fprintf(f, "{\"name\":\"%s\",\"kids\":\"0\",\"length\":\"%f\",\"colorGroup\":\"rgb(%i,%i,%i)\"}", - tissue, tree->parent->childDistance, colors.r, colors.g, colors.b); - return; - } - else if (tree->left == NULL || tree->right == NULL) - errAbort("\nHow did we get a node with one NULL kid??"); - - // NOTE: There are no leaves past this point for (i = 0; i < level + 1; i++) fputc(' ', f); distance = tree->childDistance/clLongest; double length = 0; if (tree->parent != NULL) { length = tree->parent->childDistance; } - // *****NODES***** ++internalNodes; - fprintf(f, "{\"name\":\" \", \"number\":\"%i\", \"kids\":\"%i\", \"tpmDistance\": \"%f\", \"length\": \"%f\", \"colorGroup\": \"rgb(%i," - "%i,%i)\",\"contGenes\":\"%i\",\"geneList\": {",internalNodes, bio->children , tree->childDistance, length, colors.r,colors.g,colors.b, bio->contGenes); + fprintf(f, "{\"name\":\" \", \"number\":\"%i\", \"kids\":\"%i\", \"tpmDistance\":" + "\"%f\", \"length\": \"%f\", \"colorGroup\": \"rgb(%i,""%i,%i)\"," + "\"contGenes\":\"%i\",\"geneList\": {",internalNodes, bio->children, + tree->childDistance, length, colors.r,colors.g,colors.b, bio->contGenes); struct slDoubleInt *j; for (j = bio->topGeneIndeces; j != NULL; j = j->next) { struct slName *geneName = slElementFromIx(geneNames, j->index); fprintf(f,"\"%s\":\"%f\"", geneName->name, j->val); if (j->next != NULL) fprintf(f, ", "); } fprintf(f , "},"); if (distance != distance) distance = 0; struct rgbColor wTB; struct rgbColor wTBsqrt; struct rgbColor wTBquad; if (distance == 0) { wTB = whiteToBlackRainbowAtPos(0); wTBsqrt = whiteToBlackRainbowAtPos(0); wTBquad = whiteToBlackRainbowAtPos(0); } else { wTB = whiteToBlackRainbowAtPos(distance*.95); wTBsqrt = whiteToBlackRainbowAtPos(sqrt(distance*.95)); wTBquad = whiteToBlackRainbowAtPos(sqrt(sqrt(distance*.95))); } fprintf(f, "\"normalizedDistance\": \"%f\", \"whiteToBlack\":\"rgb(%i,%i,%i)\", \"whiteTo", distance, wTB.r, wTB.g, wTB.b); fprintf(f, "BlackSqrt\":\"rgb(%i,%i,%i)\", \"whiteToBlackQuad\":\"rgb(%i,%i,%i)\",\n", wTBsqrt.r, wTBsqrt.g, wTBsqrt.b, wTBquad.r, wTBquad.g, wTBquad.b); for (i = 0; i < level + 1; i++) fputc(' ', f); fprintf(f, "\"children\":[\n"); + } + +static void rPrintHierarchicalJson(FILE *f, struct hacTree *tree, int level, double distance, struct slName *geneNames) +/* Recursively prints out the elements of the hierarchical .json file. */ + { + struct bioExpVector *bio = (struct bioExpVector *)tree->itemOrCluster; + char *tissue = bio->name; + struct rgbColor colors = bio->color; + if (tree->childDistance > clLongest) + /* In practice the first distance will be the longest, and is used for normalization. */ + clLongest = tree->childDistance; + + int i; + for (i = 0; i < level; i++) + fputc(' ', f); // correct spacing for .json format + + if (tree->left == NULL && tree->right == NULL) // Check if the current node is a leaf + { + fprintf(f, "{\"name\":\"%s\",\"kids\":\"0\",\"length\":\"%f\",\"colorGroup\":\"rgb(%i,%i,%i)\"}", + tissue, tree->parent->childDistance, colors.r, colors.g, colors.b); + return; + } + // Sanity check leaf nodes, they should not have any kids, definitely should not have a single kid. + else if (tree->left == NULL || tree->right == NULL) + errAbort("\nHow did we get a node with one NULL kid??"); + + printJsonNode(f, tree, level, distance, geneNames); + rPrintHierarchicalJson(f, tree->left, level+1, distance, geneNames); fputs(",\n", f); rPrintHierarchicalJson(f, tree->right, level+1, distance, geneNames); fputc('\n', f); // Closes the children block for node objects for (i=0; i < level + 1; i++) fputc(' ', f); fputs("]\n", f); for (i = 0; i < level; i++) fputc(' ', f); fputs("}", f); } void printHierarchicalJson(FILE *f, struct hacTree *tree, char *geneNamesFile) /* Prints out the binary tree into .json format intended for d3 * hierarchical layouts */ { if (tree == NULL) { fputs("Empty tree.\n", f); return; } double distance = 0; struct lineFile *lf = lineFileOpen(geneNamesFile, TRUE); char *line; struct slName *geneNames; AllocVar(geneNames); while (lineFileNextReal(lf, &line)) { struct slName *geneName = newSlName(cloneString(line)); slAddTail(&geneNames, geneName); } lineFileClose(&lf); rPrintHierarchicalJson(f, tree, 0, distance, geneNames); - fputc('\n', f); } - double slBioExpVectorDistance(const struct slList *item1, const struct slList *item2, void *extraData) /* Return the absolute difference between the two kids' values. Weight based on how many nodes have been merged * to create the current node. Designed for HAC tree use*/ { verbose(3,"Calculating Distance...\n"); const struct bioExpVector *kid1 = (const struct bioExpVector *)item1; const struct bioExpVector *kid2 = (const struct bioExpVector *)item2; int j; double diff = 0, sum = 0; for (j = 0; j < kid1->count; ++j) { diff = kid1->vector[j] - kid2->vector[j]; sum += (diff * diff); } return sqrt(sum); } +int mergeCount = 0; + struct slList *slBioExpVectorMerge(const struct slList *item1, const struct slList *item2, void *unusedExtraData) /* Make a new slPair where the name is the children names concattenated and the * value is the average of kids' values. * Designed for HAC tree use*/ { verbose(3,"Merging...\n"); const struct bioExpVector *kid1 = (const struct bioExpVector *)item1; const struct bioExpVector *kid2 = (const struct bioExpVector *)item2; float kid1Weight = kid1->children / (float)(kid1->children + kid2->children); //Weight based on number of children. float kid2Weight = kid2->children / (float)(kid1->children + kid2->children); struct bioExpVector *el; AllocVar(el); AllocArray(el->vector, kid1->count); assert(kid1->count == kid2->count); el->count = kid1->count; el->name = catTwoStrings(kid1->name, kid2->name); int i; + mergeCount++; int gCount = 0; for (i = 0; i < el->count; ++i) { if (kid1->vector[i] == kid2->vector[i]) // We are doing thousands of merges, lets cut out useless compute where we can. { el->vector[i] = kid1->vector[i]; continue; } el->vector[i] = (kid1Weight*kid1->vector[i] + kid2Weight*kid2->vector[i]); float diff; if (((float)(kid1->vector[i])) > ((float)(kid2->vector[i]))) diff = ((float)(kid1->vector[i])) - ((float)(kid2->vector[i])); else diff = ((float)(kid2->vector[i])) - ((float)(kid1->vector[i])); - ++el->contGenes; + if (diff != 0.0) // Only consider diffs that are non zero + { + ++el->contGenes; // This gene is non zero so it is contributing ++gCount; int index = i + 1; if (gCount <= 10){ struct slDoubleInt *newGene = slDoubleIntNew(diff, index); slAddHead(&el->topGeneIndeces, newGene); slSort(&el->topGeneIndeces, slDoubleIntCmp); } else{ if (el->vector[i] > el->topGeneIndeces->val){ slPopHead(el->topGeneIndeces); struct slDoubleInt *newGene = slDoubleIntNew(diff, index); slAddHead(&el->topGeneIndeces, newGene); slSort(&el->topGeneIndeces, slDoubleIntCmp); } } } + } slReverse(&el->topGeneIndeces); + el->mergeCount = mergeCount; el->children = kid1->children + kid2->children; return (struct slList *)(el); } +int tempC = 0; +const struct slList *temp = NULL; + +bool slBioExpVectorCmp(const struct slList *item1, const struct slList *item2, + void *unusedExtraData) +/* Compare two bioExpVectors to decide which one becomes the left child and which becomes the right. + * Return TRUE if item 1 is left child and item 2 is right child. */ + { + const struct bioExpVector *kid1 = (const struct bioExpVector *)item1; + const struct bioExpVector *kid2 = (const struct bioExpVector *)item2; + if (kid1->children !=1 && kid2->children ==1) temp = item2; + if (kid1->children ==1 && kid2->children !=1) temp = item1; + + if (kid1->children > kid2->children) + { + return TRUE; // Whoever has more kids gets to be first born + } + else if (kid1->children < kid2->children)return FALSE; + else{ + if (kid1->mergeCount < kid2->mergeCount) return TRUE; + else if (kid1->mergeCount > kid2->mergeCount) return FALSE; + else{ + // These are two leaf siblings, things get a bit tricky here... + // First we find the closest neighbor (who should be a firstborn) + if (temp == NULL) + { + // This is the very first merge, not certain how to handle it so lets just return True for now + temp = item1; + return TRUE; + } + else{ + double score1 = slBioExpVectorDistance(item1, temp, NULL); + double score2 = slBioExpVectorDistance(item2, temp, NULL); + if (score1 < score2) + { + temp = item2; + return TRUE; + } + else{ + temp = item1; + return FALSE; + } + } + } + } + } + void colorLeaves(struct slRef *leafList) -/* Assign colors of rainbow to leaves. */ +/* Assign colors to leaves. I am very unhappy with the inconsistencies here. Namely the when you have two leaf siblings + * they are essentially assigned at random, which makes this metric fairly useless? Im less than stoked*/ { float total = 0.0; struct slRef *el, *nextEl; - /* Loop through list once to figure out total, since we need to normalize */ + /* Loop through list once to figure out the longest distance since we need to normalize */ for (el = leafList; el != NULL; el = nextEl) { nextEl = el->next; if (nextEl == NULL) break; struct bioExpVector *bio1 = el->val; struct bioExpVector *bio2 = nextEl->val; double distance = slBioExpVectorDistance((struct slList *)bio1, (struct slList *)bio2, NULL); - if (distance != distance ) distance = 0; - total += distance; + if (distance > total) total = distance; } - if (total == 0) errAbort("There doesn't seem to be any difference between these matrix columns. Aborting."); - double soFar = 0; /* Loop through list a second time to generate actual colors. */ bool firstLine = TRUE; for (el = leafList; el != NULL; el = nextEl) { nextEl = el->next; if (nextEl == NULL) break; struct bioExpVector *bio1 = el->val; struct bioExpVector *bio2 = nextEl->val; double distance = slBioExpVectorDistance((struct slList *)bio1, (struct slList *)bio2, NULL); - if (firstLine) + if (firstLine) // Handle the first two nodes, color them both the same { double normalized = distance/total; - bio1->color = whiteToBlackRainbowAtPos(normalized); + struct rgbColor col = whiteToBlackRainbowAtPos(normalized); + bio1->color = col; + bio2->color = col; firstLine = FALSE; + continue; } double normalized = distance/total; - if (normalized * 100 >= .95) bio2->color = whiteToBlackRainbowAtPos(.95); - else bio2->color = whiteToBlackRainbowAtPos(normalized*100); - soFar += normalized; + // Color the next node based on distance from previous leaf node + if (normalized >= .95) bio2->color = whiteToBlackRainbowAtPos(.95); + else bio2->color = whiteToBlackRainbowAtPos(normalized); } - /* Set first color to correspond to 0, since not set in above loop */ - struct bioExpVector *bio = leafList->val; - bio->color = whiteToBlackRainbowAtPos(.95); } void convertInput(char *expMatrix, char *descFile, bool csv) /* Takes in a expression matrix and makes the inputs that this program will use. * Namely a transposed table with the first column removed. Makes use of system calls * to use cut, sed, kent utility rowsToCols, and paste (for descFile option). */ { char cmd[1024],cmd1[1024], cmd2[1024]; if (csv) /* A sed one liner will convert comma separated values into a tab separated values*/ { char cmd3[1024]; safef(cmd3, 1024, "sed -i 's/,/\\t/g' %s ",expMatrix); verbose(2,"%s\n", cmd3); mustSystem(cmd3); } safef(cmd, 1024, "cut -f 1 %s | sed \'1d\' > %s.genes", expMatrix, expMatrix); mustSystem(cmd); safef(cmd1, 1024, "cat %s | sed '1d' | rowsToCols stdin %s.transposedMatrix", expMatrix, expMatrix); /* Exp matrices are X axis of cell lines and Y axis of transcripts. This causes long Y axis and short * X axis, which are not handled well in C. The matrix is transposed to get around this issue. */ verbose(2,"%s\n", cmd1); mustSystem(cmd1); /* Pull out the cell names, and store them in a separate file. This allows the actual data matrix to * have the first row identify the transcript, then all following rows contain only expression values. * By removing the name before hand the computation was made faster and easier. */ if (descFile) { char cmd3[1024]; safef(cmd2, 1024, "rowsToCols %s stdout | cut -f1 | sed \'1d\' > %s.cellNamesTemp", expMatrix, expMatrix); safef(cmd3, 1024, "paste %s.cellNamesTemp %s > %s.cellNames", expMatrix, descFile, expMatrix); verbose(2,"%s\n", cmd2); mustSystem(cmd2); verbose(2,"%s\n", cmd3); mustSystem(cmd3); } else { safef(cmd2, 1024, "rowsToCols %s stdout | cut -f1 | sed \'1d\' > %s.cellNames", expMatrix, expMatrix); verbose(2,"%s\n", cmd2); mustSystem(cmd2); } } void generateHtml(FILE *outputFile, int nameSize, char* jsonFile) // Generates a new .html file for the dendrogram. Will do some size calculations as well. { char *pageName = cloneString(jsonFile); chopSuffix(pageName); int textSize = 12 - log(nodeCount); - int width = 10 * nodeCount; - int height = 10 * nodeCount; int labelLength = 10+nameSize*(15-textSize); if (labelLength > 100) labelLength = 100; fprintf(outputFile,"\n"); fprintf(outputFile,"\n"); fprintf(outputFile,"%s\n", pageName); fprintf(outputFile,"\n"); fprintf(outputFile,"\n"); fprintf(outputFile,"\n"); fprintf(outputFile,"\n"); fprintf(outputFile,"\n"); fprintf(outputFile,"
\n"); fprintf(outputFile," \n"); fprintf(outputFile,"\n"); fprintf(outputFile,"\n"); fprintf(outputFile,"\n"); fprintf(outputFile,"\n"); fprintf(outputFile,"\n"); fprintf(outputFile," \n"); fprintf(outputFile," \n"); - fprintf(outputFile," \n"); fprintf(outputFile," \n"); fprintf(outputFile,"
\n"); fprintf(outputFile,"

Dendrogram

\n"); fprintf(outputFile,"
\n"); fprintf(outputFile,"
\n"); - fprintf(outputFile,"

Phylogram

\n"); - fprintf(outputFile,"
\n"); - fprintf(outputFile,"
\n"); fprintf(outputFile,"\n"); fprintf(outputFile,"\n"); carefulClose(&outputFile); } void expData(char *matrixFile, char *outDir, char *descFile) /* Read matrix and names into a list of bioExpVectors, run hacTree to * associate them, and write output. */ { verbose(2,"Start binary clustering of the expression matrix by euclidean distance (expMatrixToJson).\n"); clock_t begin, end; begin = clock(); convertInput(matrixFile, descFile, clCSV); struct bioExpVector *list = bioExpVectorListFromFile(catTwoStrings(matrixFile,".transposedMatrix")); verbose(2,"%lld allocated after bioExpVectorListFromFile\n", (long long)carefulTotalAllocated()); FILE *f = mustOpen(catTwoStrings(outDir,".json"),"w"); struct lm *localMem = lmInit(0); int size = fillInNames(list, catTwoStrings(matrixFile,".cellNames")); /* Allocate new string that is a concatenation of two strings. */ struct hacTree *clusters = NULL; - if (clMultiThreads) - { verbose(2,"Using %i threads. \n", clThreads); - clusters = hacTreeMultiThread(clThreads, (struct slList *)list, localMem, - slBioExpVectorDistance, slBioExpVectorMerge, NULL, NULL); - } - else - { - verbose(2,"Using 1 threads. \n"); - clusters = hacTreeFromItems((struct slList *)list, localMem, - slBioExpVectorDistance, slBioExpVectorMerge, NULL, NULL); - } + clusters = hacTreeMultiThread(clThreads, (struct slList *)list, localMem, slBioExpVectorDistance, + slBioExpVectorMerge, slBioExpVectorCmp, NULL, NULL); struct slRef *orderedList = getOrderedLeafList(clusters); colorLeaves(orderedList); printHierarchicalJson(f, clusters, catTwoStrings(matrixFile, ".genes")); FILE *htmlF = mustOpen(catTwoStrings(outDir,".html"),"w"); generateHtml(htmlF,size,catTwoStrings(outDir,".json")); // Remove temporary files + // These temporary files are awful sloppy... Long term goal to get rid of them. char cleanup[1024], cleanup2[1024], cleanup3[1024]; safef(cleanup, 1024, "rm %s.cellNames", matrixFile); safef(cleanup2, 1024, "rm %s.transposedMatrix", matrixFile); safef(cleanup3, 1024, "rm %s.genes", matrixFile); mustSystem(cleanup); mustSystem(cleanup2); mustSystem(cleanup3); + end = clock(); verbose(2,"%lld allocated at end. The program took %f seconds to complete.\n", (long long)carefulTotalAllocated(), (double)(end-begin)/CLOCKS_PER_SEC); verbose(2,"Completed binary clustering of the expression matrix by euclidean distance (expMatrixToJson).\n"); } int main(int argc, char *argv[]) /* Process command line. */ { optionInit(&argc, argv, options); clCSV = optionExists("CSV"); - clMultiThreads = optionExists("multiThreads"); clThreads = optionInt("threads", clThreads); clMemLim = optionInt("memLim", clMemLim); clDescFile = optionVal("descFile", clDescFile); if (argc != 3) usage(); pushCarefulMemHandler(1L*1024*1024*1024*clMemLim); expData(argv[1], argv[2], clDescFile); return 0; }