b2a26f49df9172f0900b4516dbf7710fc13f681c
ceisenhart
  Wed Jul 16 15:51:50 2014 -0700
Added a new output which is a json formatted for hierarchical d3 visualizations
diff --git src/hg/expData/expData.c src/hg/expData/expData.c
index c874ef0..c25c336 100644
--- src/hg/expData/expData.c
+++ src/hg/expData/expData.c
@@ -1,20 +1,21 @@
 /* expData -  Takes in a relational database and outputs expression information. */
 #include "common.h"
 #include "linefile.h"
 #include "hash.h"
 #include "options.h"
+#include "obscure.h"
 #include "jksql.h"
 #include "expData.h"
 #include "sqlList.h"
 #include "hacTree.h"
 #include "jsonWrite.h"
 
 void usage()
 /* Explain usage and exit. */
 {
 errAbort(
   "expData -  Takes in a relational database and outputs expression information\n"
   "usage:\n"
   "   expData biosamples matrix output\n"
   "options:\n"
   "   -xxx=XXX\n"
@@ -76,61 +77,63 @@
 char *line;
 struct bioExpVector *el = list;
 while (lineFileNextReal(lf, &line))
      {
      if (el == NULL)
 	 {
          warn("More names than items in list");
 	 break;
 	 }
      el->name = cloneString(line);
      el = el->next;
      }
 lineFileClose(&lf);
 }
 
-void rPrintNodes(FILE *f, struct hacTree *tree)
+void rPrintNodes(struct jsonWrite *jw, FILE *f, struct hacTree *tree)
 {
 // Recursively prints out the nodes in a depth first order starting on the left
 char *tissue = ((struct bioExpVector *)(tree->itemOrCluster))->name;
-struct jsonWrite *jw = jsonWriteNew();
+jsonWriteObjectStart(jw);
 if (tree->childDistance != 0) 
     // If the current object is a node then we assign no name.
     {
     fprintf(f,"    %s\"%s\"%s\"%s\"%s", "{","name", ":", " ", ",");
     jsonWriteString(jw, "name", " ");
     jsonWriteNumber(jw, "y", tree->childDistance);
     fprintf(f,"\"%s\"%s%0.31f%s\n", "y", ":" , tree->childDistance, "},");
     }
 else {
     // Otherwise the current object is a leaf, and the tissue name is printed. 
     jsonWriteString(jw, "name", tissue);
     jsonWriteNumber(jw, "y", tree->childDistance);
     fprintf(f,"    %s\"%s\"%s\"%s\"%s", "{","name", ":", tissue, ",");
     fprintf(f,"\"%s\"%s%0.31f%s\n", "y", ":" , tree->childDistance, "},");
     }
+jsonWriteObjectEnd(jw);
 if (tree->left == NULL && tree->right == NULL)
     { 
     return;
     }
 else if (tree->left == NULL || tree->right == NULL)
     errAbort("\nHow did we get a node with one NULL kid??");
-rPrintNodes(f, tree->left);
-rPrintNodes(f, tree->right);
+rPrintNodes(jw, f, tree->left);
+rPrintNodes(jw, f, tree->right);
 }
 
    
+
 void rPrintLinks(FILE *f, struct hacTree *tree, int source)
 {
 // Recursively prints the links in json format. 
 if (tree->childDistance > longest)
     // the first distance will be the longest, and is used for normalization
     longest = tree->childDistance;
 /* if the current location is a leaf */
 if (tree->left == NULL && tree->right == NULL)
     { 
     return;
     }
 else if (tree->left == NULL || tree->right == NULL)
     errAbort("\nHow did we get a node with one NULL kid??");
 /* check for the end of the tree */
 
@@ -143,42 +146,45 @@
 source = target;
 rPrintLinks(f, tree->left, source);
 /* Right recursion. */
 ++target;
 fprintf(f,"    %s\"%s\"%s%d%s", "{","source", ":", source - 1 , ",");
 fprintf(f,"\"%s\"%s%d%s", "target", ":" , target, ",");  
 fprintf(f,"\"%s\"%s%0.31f%s\n", "distance", ":" , 100*(tree->childDistance/longest) , "},");  
 rPrintLinks(f, tree->right, ++source);
 }
 
 void printJson(FILE *f, struct hacTree *tree)
 /* Prints the hacTree into a Json file format */
 {
 int source = 0;
 // Basic json template for d3 visualizations
-//struct jsonWrite *jw = jsonWriteNew();
+struct jsonWrite *jw = jsonWriteNew();
+jsonWriteObjectStart(jw);
 fprintf(f,"%s\n", "{");
-//jsonWriteTag(struct jsonWrite *jw, char *var);
+jsonWriteListStart(jw, "nodes");
 fprintf(f,"  \"%s\"%s\n", "nodes", ":[" );
-rPrintNodes(f, tree);
+rPrintNodes(jw, f, tree);
 fprintf(f,  "%s\n", "],");
 // Basic json template for d3 visualizations
 fprintf(f,  "\"%s\"%s\n", "links", ":[" );
 rPrintLinks(f,tree, source);
 fprintf(f,"  %s\n", "]");
 
 fprintf(f,"%s\n", "}");
+jsonWriteObjectEnd(jw);
+writeGulp("jsonWrite.out", jw->dy->string, jw->dy->stringSize);
 }
 
 
 static void rPrintSlBioExpVectorTree(FILE *f, struct hacTree *tree, int level,double distance)
 /* Recursively print out cluster as nested-parens with {}'s around leaf nodes. */
 {
 char *tissue = ((struct bioExpVector *)(tree->itemOrCluster))->name;
 int i;
 for (i = 0;  i < level;  i++)
     fputc(' ', f);
 if (tree->left == NULL && tree->right == NULL)
     {
     fprintf(f, "{%s}%0.31f", tissue, distance);
     return;
     }
@@ -196,30 +202,78 @@
 }
 
 void printSlBioExpVectorTree(FILE *f, struct hacTree *tree)
 /* Print out cluster as nested-parens with {}'s around leaf nodes. */
 {
 if (tree == NULL)
     {
     fputs("Empty tree.\n", f);
     return;
     }
 double distance = 0;
 rPrintSlBioExpVectorTree(f, tree, 0, distance);
 fputc('\n', f);
 }
 
+static void rPrintNestedJson(FILE *f, struct hacTree *tree, int level, double distance)
+/* Recursively print out cluster as nested-parens with {}'s around leaf nodes. */
+{
+char *tissue = ((struct bioExpVector *)(tree->itemOrCluster))->name;
+int i;
+if (tree->childDistance > longest)
+    // the first distance will be the longest, and is used for normalization
+    longest = tree->childDistance;
+for (i = 0;  i < level;  i++)
+    fputc(' ', f);
+if (tree->left == NULL && tree->right == NULL)
+    {
+    fprintf(f, "{\"%s\"%s \"%s\"%s\"%s\"%s %f}", "name", ":", tissue, ", ", "size", ":", distance);
+    return;
+    }
+else if (tree->left == NULL || tree->right == NULL)
+    errAbort("\nHow did we get a node with one NULL kid??");
+//fprintf(f, "{\"%s\"%s \"%f\"%s\n", "name", ":", " ", ",");
+fprintf(f, "{\"%s\"%s \"%f\"%s\n", "name", ":", 100*(tree->childDistance/longest), ",");
+for (i = 0;  i < level + 1;  i++)
+    fputc(' ', f);
+fprintf(f, "\"%s\"%s\n", "children", ": [");
+distance += tree->childDistance;
+rPrintNestedJson(f, tree->left, level+1, distance);
+fputs(",\n", f);
+rPrintNestedJson(f, tree->right, level+1, distance);
+fputc('\n', f);
+for (i=0;  i < level + 1;  i++)
+    fputc(' ', f);
+fputs("]\n", f);
+for (i=0;  i < level;  i++)
+    fputc(' ', f);
+fputs("}", f);
+}
+
+void printNestedJson(FILE *f, struct hacTree *tree)
+/* Print out cluster as nested-parens with {}'s around leaf nodes. */
+{
+if (tree == NULL)
+    {
+    fputs("Empty tree.\n", f);
+    return;
+    }
+double distance = 0;
+rPrintNestedJson(f, tree, 0, distance);
+fputc('\n', f);
+}
+
 char* floatToString(float input)
 {
 char* result = needMem(sizeof(result));
 sprintf(result,"%f", input);
 return result;
 freez(result);
 }
 
 double slBioExpVectorDistance(const struct slList *item1, const struct slList *item2, void *extraData)
 /* Return the absolute difference between the two kids' values. 
  * Designed for HAC tree use*/
 {
 const struct bioExpVector *kid1 = (const struct bioExpVector *)item1;
 const struct bioExpVector *kid2 = (const struct bioExpVector *)item2;
 int j;
@@ -252,28 +306,29 @@
     el->vector[i] = (kid1->vector[i] + kid2->vector[i])/2;
     }
 return (struct slList *)(el);
 }
 
 void expData(char *matrixFile, char *nameFile, char *outFile)
 /* Read matrix and names into a list of bioExpVectors, run hacTree to
  * associate them, and write output. */
 {
 struct bioExpVector *list = bioExpVectorListFromFile(matrixFile);
 FILE *f = mustOpen(outFile,"w");
 struct lm *localMem = lmInit(0);
 fillInNames(list, nameFile);
 struct hacTree *clusters = hacTreeFromItems((struct slList *)list, localMem,
 					    slBioExpVectorDistance, slBioExpVectorMerge, NULL, NULL);
-printJson(f,clusters);
+//printJson(f,clusters);
 //printSlBioExpVectorTree(f,clusters);
+printNestedJson(f,clusters);
 }
 
 int main(int argc, char *argv[])
 /* Process command line. */
 {
 optionInit(&argc, argv, options);
 if (argc != 4)
     usage();
 expData(argv[1], argv[2], argv[3]);
 return 0;
 }