56531279ae19b09424550f6af2c9c8326ecafc29 ceisenhart Wed Mar 2 09:44:36 2016 -0800 Cleaning up the code, removing commented out code lines as reccommended by Braney, refs #16216 diff --git src/hg/expMatrixToJson/expMatrixToJson.c src/hg/expMatrixToJson/expMatrixToJson.c index 19b3938..9058b02 100644 --- src/hg/expMatrixToJson/expMatrixToJson.c +++ src/hg/expMatrixToJson/expMatrixToJson.c @@ -43,63 +43,64 @@ ); } /* Command line validation table. */ static struct optionSpec options[] = { {"multiThreads", OPTION_BOOLEAN}, {"CSV", OPTION_BOOLEAN}, {"threads", OPTION_INT}, {"memLim", OPTION_INT}, {"descFile", OPTION_STRING}, {"attributeTable", OPTION_STRING}, {NULL, 0}, }; struct slDoubleInt +/* Used to keep track of the top ten genes contributed */ { struct slDoubleInt *next; double val; int index; }; struct bioExpVector /* Contains expression information for a biosample on many genes. */ { struct bioExpVector *next; char *name; // name of biosample. char *desc; // description of biosample. int count; // Number of genes we have data for. double *vector; // An array allocated dynamically. struct rgbColor color; // Color for this one int children; // Number of bioExpVectors used to build the current struct slDoubleInt *topGeneIndeces; // The indeces for the top 10 genes that drove the clustering up to this point int contGenes; //The number of contributing genes }; struct slDoubleInt *slDoubleIntNew(double x, int y) -/* Return a new double. */ +/* Return a new doubleInt */ { struct slDoubleInt *a; AllocVar(a); a->val = x; a->index = y; return a; } int slDoubleIntCmp(const void *va, const void *vb) -/* Compare two slDoubles. */ +/* Compare two doubleInts */ { const struct slDoubleInt *a = *((struct slDoubleInt **)va); const struct slDoubleInt *b = *((struct slDoubleInt **)vb); double diff = a->val - b->val; if (diff < 0) return -1; else if (diff > 0) return 1; else return 0; } double stringToDouble(char *s) /* Convert string to a double. Assumes all of string is number * and aborts on an error. Errors on 'nan'*/ @@ -281,32 +282,30 @@ for (i = 0; i < level; i++) fputc(' ', f); fputs("}", f); } void printHierarchicalJson(FILE *f, struct hacTree *tree, char *geneNamesFile) /* Prints out the binary tree into .json format intended for d3 * hierarchical layouts */ { if (tree == NULL) { fputs("Empty tree.\n", f); return; } double distance = 0; - - struct lineFile *lf = lineFileOpen(geneNamesFile, TRUE); char *line; struct slName *geneNames; AllocVar(geneNames); while (lineFileNextReal(lf, &line)) { struct slName *geneName = newSlName(cloneString(line)); slAddTail(&geneNames, geneName); } lineFileClose(&lf); rPrintHierarchicalJson(f, tree, 0, distance, geneNames); fputc('\n', f); } @@ -315,92 +314,86 @@ * to create the current node. Designed for HAC tree use*/ { verbose(3,"Calculating Distance...\n"); const struct bioExpVector *kid1 = (const struct bioExpVector *)item1; const struct bioExpVector *kid2 = (const struct bioExpVector *)item2; int j; double diff = 0, sum = 0; for (j = 0; j < kid1->count; ++j) { diff = kid1->vector[j] - kid2->vector[j]; sum += (diff * diff); } return sqrt(sum); } - struct slList *slBioExpVectorMerge(const struct slList *item1, const struct slList *item2, void *unusedExtraData) /* Make a new slPair where the name is the children names concattenated and the * value is the average of kids' values. * Designed for HAC tree use*/ { verbose(3,"Merging...\n"); const struct bioExpVector *kid1 = (const struct bioExpVector *)item1; const struct bioExpVector *kid2 = (const struct bioExpVector *)item2; - float kid1Weight = kid1->children / (float)(kid1->children + kid2->children); + float kid1Weight = kid1->children / (float)(kid1->children + kid2->children); //Weight based on number of children. float kid2Weight = kid2->children / (float)(kid1->children + kid2->children); struct bioExpVector *el; AllocVar(el); AllocArray(el->vector, kid1->count); assert(kid1->count == kid2->count); el->count = kid1->count; el->name = catTwoStrings(kid1->name, kid2->name); int i; int gCount = 0; for (i = 0; i < el->count; ++i) { - if (kid1->vector[i] == kid2->vector[i]) // Were doing thousands of merges, lets cut out useless compute where we can. + if (kid1->vector[i] == kid2->vector[i]) // We are doing thousands of merges, lets cut out useless compute where we can. { el->vector[i] = kid1->vector[i]; continue; } - el->vector[i] = (kid1Weight*kid1->vector[i] + kid2Weight*kid2->vector[i]); // Weight based on number of children. + el->vector[i] = (kid1Weight*kid1->vector[i] + kid2Weight*kid2->vector[i]); float diff; - //if (((float)(kid1Weight*kid1->vector[i])) > ((float)(kid2Weight*kid2->vector[i]))) - // {diff = ((float)(kid1Weight*kid1->vector[i])) - ((float)(kid2Weight*kid2->vector[i]));} - //else {diff = ((float)(kid2Weight*kid2->vector[i])) - ((float)(kid1Weight*kid1->vector[i]));} - if (((float)(kid1->vector[i])) > ((float)(kid2->vector[i]))) - {diff = ((float)(kid1->vector[i])) - ((float)(kid2->vector[i]));} - else {diff = ((float)(kid2->vector[i])) - ((float)(kid1->vector[i]));} + if (((float)(kid1->vector[i])) > ((float)(kid2->vector[i]))) diff = ((float)(kid1->vector[i])) - ((float)(kid2->vector[i])); + else diff = ((float)(kid2->vector[i])) - ((float)(kid1->vector[i])); ++el->contGenes; ++gCount; int index = i + 1; if (gCount <= 10){ struct slDoubleInt *newGene = slDoubleIntNew(diff, index); slAddHead(&el->topGeneIndeces, newGene); slSort(&el->topGeneIndeces, slDoubleIntCmp); } else{ if (el->vector[i] > el->topGeneIndeces->val){ slPopHead(el->topGeneIndeces); struct slDoubleInt *newGene = slDoubleIntNew(diff, index); slAddHead(&el->topGeneIndeces, newGene); slSort(&el->topGeneIndeces, slDoubleIntCmp); } } } slReverse(&el->topGeneIndeces); el->children = kid1->children + kid2->children; return (struct slList *)(el); } void colorLeaves(struct slRef *leafList) /* Assign colors of rainbow to leaves. */ { float total = 0.0; - //double purplePos = 0.80; struct slRef *el, *nextEl; /* Loop through list once to figure out total, since we need to normalize */ for (el = leafList; el != NULL; el = nextEl) { nextEl = el->next; if (nextEl == NULL) break; struct bioExpVector *bio1 = el->val; struct bioExpVector *bio2 = nextEl->val; double distance = slBioExpVectorDistance((struct slList *)bio1, (struct slList *)bio2, NULL); if (distance != distance ) distance = 0; total += distance; } @@ -410,42 +403,37 @@ bool firstLine = TRUE; for (el = leafList; el != NULL; el = nextEl) { nextEl = el->next; if (nextEl == NULL) break; struct bioExpVector *bio1 = el->val; struct bioExpVector *bio2 = nextEl->val; double distance = slBioExpVectorDistance((struct slList *)bio1, (struct slList *)bio2, NULL); if (firstLine) { double normalized = distance/total; bio1->color = whiteToBlackRainbowAtPos(normalized); firstLine = FALSE; } - //if (distance != distance ) distance = 0 ; - //soFar += distance; - //double normalized = soFar/total; double normalized = distance/total; if (normalized * 100 >= .95) bio2->color = whiteToBlackRainbowAtPos(.95); else bio2->color = whiteToBlackRainbowAtPos(normalized*100); - //bio2->color = saturatedRainbowAtPos(distance); soFar += normalized; } /* Set first color to correspond to 0, since not set in above loop */ struct bioExpVector *bio = leafList->val; - //bio->color = saturatedRainbowAtPos(0); bio->color = whiteToBlackRainbowAtPos(.95); } void convertInput(char *expMatrix, char *descFile, bool csv) /* Takes in a expression matrix and makes the inputs that this program will use. * Namely a transposed table with the first column removed. Makes use of system calls * to use cut, sed, kent utility rowsToCols, and paste (for descFile option). */ { char cmd[1024],cmd1[1024], cmd2[1024]; if (csv) /* A sed one liner will convert comma separated values into a tab separated values*/ { char cmd3[1024]; safef(cmd3, 1024, "sed -i 's/,/\\t/g' %s ",expMatrix); verbose(2,"%s\n", cmd3); @@ -473,39 +461,38 @@ } else { safef(cmd2, 1024, "rowsToCols %s stdout | cut -f1 | sed \'1d\' > %s.cellNames", expMatrix, expMatrix); verbose(2,"%s\n", cmd2); mustSystem(cmd2); } } void generateHtml(FILE *outputFile, int nameSize, char* jsonFile) // Generates a new .html file for the dendrogram. Will do some size calculations as well. { char *pageName = cloneString(jsonFile); chopSuffix(pageName); int textSize = 12 - log(nodeCount); - //int radius = 540 + 270*log10(nodeCount); int width = 10 * nodeCount; int height = 10 * nodeCount; int labelLength = 10+nameSize*(15-textSize); if (labelLength > 100) labelLength = 100; fprintf(outputFile,"<!DOCTYPE html>\n"); fprintf(outputFile,"<head>\n"); - fprintf(outputFile,"<title>New dendrogram tests</title>\n"); + fprintf(outputFile,"<title>%s</title>\n", pageName); fprintf(outputFile,"<link rel=\"stylesheet\" href=\"http://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/css/bootstrap.min.css\">\n"); fprintf(outputFile,"<script src=\"https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js\"></script>\n"); fprintf(outputFile,"<script src=\"http://maxcdn.bootstrapcdn.com/bootstrap/3.3.5/js/bootstrap.min.js\"></script>\n"); fprintf(outputFile,"<script src=\"http://d3js.org/d3.v3.min.js\" type=\"text/javascript\"></script>\n"); fprintf(outputFile,"<script src=\"/js/d3.dendrograms.js\" type=\"text/javascript\"></script>\n"); fprintf(outputFile,"<div class = \"dropdown\">\n"); fprintf(outputFile," <div id = dropdown>\n"); fprintf(outputFile,"</div>\n"); fprintf(outputFile,"<script>\n"); fprintf(outputFile," function load() {\n"); fprintf(outputFile," var data;\n\n"); fprintf(outputFile," d3.json(\"%s\", function(error,json){\n", jsonFile); fprintf(outputFile," if (error) return console.warn(error);\n"); fprintf(outputFile," data = json;\n"); fprintf(outputFile," d3.dendrogram.makeRadialDendrogram('#dendrogram', data,{\n");