4f43c7ab8cc963765bd20fa4d01f6e28d3416414 ceisenhart Wed May 20 11:10:30 2015 -0700 The binary tree is now a weighted binary tree diff --git src/hg/expMatrixToJson/expMatrixToJson.c src/hg/expMatrixToJson/expMatrixToJson.c index aebf139..c977304 100644 --- src/hg/expMatrixToJson/expMatrixToJson.c +++ src/hg/expMatrixToJson/expMatrixToJson.c @@ -356,64 +356,65 @@ double distance = 0; rPrintHierarchicalJson(f, tree, 0, distance, normConstant, cgConstant); fputc('\n', f); } double slBioExpVectorDistance(const struct slList *item1, const struct slList *item2, void *extraData) /* Return the absolute difference between the two kids' values. * Designed for HAC tree use*/ { verbose(1,"Calculating Distance...\n"); const struct bioExpVector *kid1 = (const struct bioExpVector *)item1; const struct bioExpVector *kid2 = (const struct bioExpVector *)item2; int j; double diff = 0, sum = 0; -float kid1Weight = 0.0, kid2Weight = 0.0; +//float kid1Weight = 0.0, kid2Weight = 0.0; +float kid1Weight = kid1->children / (float)(kid1->children + kid2->children); +float kid2Weight = kid2->children / (float)(kid1->children + kid2->children); +//printf("Kid1 weight is %f kid2 weight is %f \n", kid1Weight, kid2Weight); //uglyAbort("%f %f\n", kid1Weight, kid2Weight); for (j = 0; j < kid1->count; ++j) { - kid1Weight = kid1->children / (float)(kid1->children + kid2->children); - kid2Weight = kid2->children / (float)(kid1->children + kid2->children); diff = (kid1Weight*kid1->vector[j]) - (kid2Weight*kid2->vector[j]); sum += (diff * diff); } -printf("%f\n",sqrt(sum)); +//printf("%f\n",sqrt(sum)); return sqrt(sum); } struct slList *slBioExpVectorMerge(const struct slList *item1, const struct slList *item2, void *unusedExtraData) /* Make a new slPair where the name is the children names concattenated and the * value is the average of kids' values. * Designed for HAC tree use*/ { verbose(1,"Merging...\n"); const struct bioExpVector *kid1 = (const struct bioExpVector *)item1; const struct bioExpVector *kid2 = (const struct bioExpVector *)item2; struct bioExpVector *el; AllocVar(el); AllocArray(el->vector, kid1->count); el->count = kid1->count; el->name = catTwoStrings(kid1->name, kid2->name); int i; for (i = 0; i < el->count; ++i) { el->vector[i] = (kid1->vector[i] + kid2->vector[i])/2; } -el->children += 2; +el->children = kid1->children + kid2->children; return (struct slList *)(el); } void colorLeaves(struct slRef *leafList) /* Assign colors of rainbow to leaves. */ { /* Loop through list once to figure out total, since we need to * normalize */ double total = 0; double purplePos = 0.80; struct slRef *el, *nextEl; for (el = leafList; el != NULL; el = nextEl) { nextEl = el->next; if (nextEl == NULL) @@ -423,36 +424,33 @@ double distance = slBioExpVectorDistance((struct slList *)bio1, (struct slList *)bio2, NULL); total += distance; } /* Loop through list a second time to generate actual colors. */ double soFar = 0; for (el = leafList; el != NULL; el = nextEl) { nextEl = el->next; if (nextEl == NULL) break; struct bioExpVector *bio1 = el->val; struct bioExpVector *bio2 = nextEl->val; double distance = slBioExpVectorDistance((struct slList *)bio1, (struct slList *)bio2, NULL); soFar += distance; double normalized = soFar/total; -// uglyAbort("Inside the for loop %f Mazui! %f %f \n ", normalized, soFar, total); bio2->color = saturatedRainbowAtPos(normalized * purplePos); -// uglyAbort("At the end of the first pass in the for loop"); } -//uglyAbort("Two for loops, this is after the second" ); /* Set first color to correspond to 0, since not set in above loop */ struct bioExpVector *bio = leafList->val; bio->color = saturatedRainbowAtPos(0); } void expData(char *matrixFile, char *nameFile, char *outFile, bool forceLayout, int normConstant, int cgConstant) /* Read matrix and names into a list of bioExpVectors, run hacTree to * associate them, and write output. */ { struct bioExpVector *list = bioExpVectorListFromFile(matrixFile); FILE *f = mustOpen(outFile,"w"); struct lm *localMem = lmInit(0); fillInNames(list, nameFile); struct hacTree *clusters = NULL; if (sameString(clHacTree, "multiThreads"))