5d3d772b5d1d3475f7212dd3728e128a20011e76
kate
  Wed Oct 21 16:32:50 2015 -0700
Code cleanup in preparation for mergingin into main. refs #15645

diff --git src/hg/hgc/gtexClick.c src/hg/hgc/gtexClick.c
index 06411ca..fd7ecca 100644
--- src/hg/hgc/gtexClick.c
+++ src/hg/hgc/gtexClick.c
@@ -1,116 +1,94 @@
 /* Details pages for GTEx tracks */
 
 /* Copyright (C) 2015 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "common.h"
 #include "hash.h"
 #include "hdb.h"
+#include "hvGfx.h"
 #include "trashDir.h"
 #include "hgc.h"
-#include "jsHelper.h"
 
-#include "rainbow.h"
-#include "hvGfx.h"
-#include "gtexUi.h"
 #include "gtexGeneBed.h"
 #include "gtexTissue.h"
 #include "gtexSampleData.h"
-
-boolean doLogTransform = FALSE;
-struct gtexGeneBed *gtexGene = NULL;
+#include "gtexUi.h"
 
 struct tissueSampleVals
 /* RPKM expression values for multiple samples */
     {
     struct tissueSampleVals *next;
     char *name;         /* GTEx tissue name */
     char *description;  /* GTEx tissue description */
     int color;          /* GTEx tissue color */
     int count;          /* number of samples */
     double *vals;       /* RPKM values */
     double min, max;    /* minimum, maximum value */
     double q1, median, q3;      /* quartiles */
     struct slDouble *valList;   /* used to create val array */
     };
 
-int cmpTissueSampleValsMedianScore(const void *va, const void *vb)
-/* Compare RPKM scores */
-{
-const struct tissueSampleVals *a = *((struct tissueSampleVals **)va);
-const struct tissueSampleVals *b = *((struct tissueSampleVals **)vb);
-if (a->median == b->median)
-    return 0;
-return (a->median > b->median ? 1: -1);
-}
+/********************************************************/
+/* R implementation.  Invokes R script */
 
-void RGtexBoxplot(struct tissueSampleVals *tsvList)
+void drawGtexRBoxplot(struct gtexGeneBed *gtexGene, struct tissueSampleVals *tsvList,
+                        boolean doLogTransform)
+/* Draw a box-and-whiskers plot from GTEx sample data, using R boxplot */
 {
-// Tissue list is now sorted by GTEx tissue ordering.  
-// Sort by score descending.
-slSort(&tsvList, cmpTissueSampleValsMedianScore);
-slReverse(&tsvList);
-
-// Create R data frame
+/* Create R data frame
+      This is a tab-sep file, a row per sample, with columns for sample, tissue, rpkm */
 struct tempName dfTn;
 trashDirFile(&dfTn, "hgc", "gtexGene", ".df.txt");
 FILE *f = fopen(dfTn.forCgi, "w");
 if (f == NULL)
     errAbort("can't create temp file %s", dfTn.forCgi);
 fprintf(f, "sample\ttissue\trpkm\n");
 struct tissueSampleVals *tsv;
-int i;
 int sampleId=1;
+int i;
 for (tsv = tsvList; tsv != NULL; tsv = tsv->next)
     {
     int count = tsv->count;
     for (i=0; i<count; i++)
         fprintf(f, "%d\t%s\t%0.3f\n", sampleId++, tsv->name, tsv->vals[i]);
     }
 fclose(f);
 
 // Plot to PNG file
 struct tempName pngTn;
 trashDirFile(&pngTn, "hgc", "gtexGene", ".png");
 char cmd[256];
-// Exec R in quiet mode, without reading/saving environment or workspace
-//safef(cmd, sizeof(cmd), "Rscript --gui=X11 --vanilla --slave gtex/geneBoxplot.R %s %s %s",  
-safef(cmd, sizeof(cmd), "Rscript --vanilla --slave gtex/geneBoxplot.R %s %s %s %s",  
+
+/* Exec R in quiet mode, without reading/saving environment or workspace */
+//safef(cmd, sizeof(cmd), "Rscript --gui=X11 --vanilla --slave hgcData/gtexBoxplot.R %s %s %s",  
+safef(cmd, sizeof(cmd), "Rscript --vanilla --slave hgcData/gtexBoxplot.R %s %s %s %s",  
                                 gtexGene->name, dfTn.forCgi, pngTn.forHtml, 
                                 doLogTransform ? "log=TRUE" : "log=FALSE");
 int ret = system(cmd);
 if (ret == 0)
     {
     printf("<IMG SRC = \"%s\" BORDER=1><BR>\n", pngTn.forHtml);
     //printf("<IMG SRC = \"%s\" BORDER=1 WIDTH=%d HEIGHT=%d><BR>\n",
                     //pngTn.forHtml, imageWidth, imageHeight);
                     //pngTn.forHtml, 900, 500);
     }
 }
 
-void d3GtexBoxplot(struct tissueSampleVals *tsvList)
-{
-//puts("<script src='http://www.d3plus.org/js/d3.js'></script>\n");
-//puts("<script src='http://www.d3plus.org/js/d3plus.js'></script>\n");
-printf("<div id='expGraph'></div>\n");
-jsIncludeFile("d3.min.js", NULL);
-jsIncludeFile("d3plus.gtex.js", NULL);
-jsTrackingVar("geneName", gtexGene->name);
-jsTrackingVar("geneId", gtexGene->geneId);
-jsIncludeFile("gtex.js", NULL);
-}
+/*****************************************************************/
+/* C implementation (partial -- currently lacks axes and labels) */
 
 /* Dimensions of image parts */
 int pad = 2;
 int margin = 1;
 //int graphHeight = 400;
 int graphHeight = 325;
 //int barWidth = 3;
 int barWidth = 12;
 int titleHeight = 30;
 
 int xAxisHeight = 100;
 int yAxisWidth = 50;
 int axisMargin = 5;
 
 /* Dimensions of image overall and our portion within it. */
@@ -131,58 +109,68 @@
 }
 
 struct rgbColor rgbFromIntColor(int color)
 {
 return (struct rgbColor){.r=COLOR_32_BLUE(color), .g=COLOR_32_GREEN(color), .b=COLOR_32_RED(color)};
 }
 
 int valToY(double val, double maxVal, int height)
 /* Convert a value from 0 to maxVal to 0 to height-1 */
 {
 double scaled = val/maxVal;
 int y = scaled * (height-1);
 return height - 1 - y;
 }
 
+int cmpTissueSampleValsMedianScore(const void *va, const void *vb)
+/* Compare RPKM scores */
+{
+const struct tissueSampleVals *a = *((struct tissueSampleVals **)va);
+const struct tissueSampleVals *b = *((struct tissueSampleVals **)vb);
+if (a->median == b->median)
+    return 0;
+return (a->median > b->median ? 1: -1);
+}
+
 void drawPointSmear(struct hvGfx *hvg, int xOff, int yOff, int height,
     struct tissueSampleVals *tsv, double maxExp, int colorIx, double midClipMin, double midClipMax)
 /* Draw a point for each sample value. */
 {
 int i, size = tsv->count;
 double *vals = tsv->vals;
 
 // Min becomes max after valToY because graph flipped
 int yMin = valToY(midClipMax, maxExp, height);
 int yMax = valToY(midClipMin, maxExp, height);
 for (i=0; i<size; ++i)
     {
     double exp = vals[i];
     int y = valToY(exp, maxExp, height);
     if (y < yMin || y > yMax)
         {
         y += yOff;
         hvGfxDot(hvg, xOff-1, y, colorIx);
         hvGfxDot(hvg, xOff+1, y, colorIx);
         hvGfxDot(hvg, xOff, y-1, colorIx);
         hvGfxDot(hvg, xOff, y+1, colorIx);
         }
     }
 }
 
 void drawBoxAndWhiskers(struct hvGfx *hvg, int fillColorIx, int x, int y, 
                 struct tissueSampleVals *tsv, double maxExp)
-/* Draw a Tukey-type box plot. JK */
+/* Draw a box-and-whiskers element of a Tukey-type box plot. Code mostly from JK */
 {
 struct rgbColor lineColor = {.r=128, .g=128, .b=128};
 int lineColorIx = hvGfxFindColorIx(hvg, lineColor.r, lineColor.g, lineColor.b);
 int medianColorIx = hvGfxFindColorIx(hvg,0,0,0);
 int whiskColorIx = hvGfxFindColorIx(hvg,192,192,192);
 double xCen = x + barWidth/2;
 if (tsv->count > 1)
     {
     /* Cache a few fields from tsv in local variables */
     double q1 = tsv->q1, q3 = tsv->q3, median = tsv->median;
 
     /* Figure out position of first quarter, median, and third quarter in screen Y coordinates */
     int yQ1 = valToY(q1, maxExp, graphHeight) + y;
     int yQ3 = valToY(q3, maxExp, graphHeight) + y;
     int yMedian = valToY(median, maxExp, graphHeight);
@@ -213,255 +201,192 @@
     hvGfxBox(hvg, x, yWhisk1, barWidth, 1, whiskColorIx);
     hvGfxBox(hvg, x, yWhisk2, barWidth, 1, whiskColorIx);
 
     /* Draw median, 2 thick */
     hvGfxBox(hvg, x, y + yMedian-1, barWidth, 2, medianColorIx);
 
     /* Draw lines from mid quarters to whiskers */
     hvGfxBox(hvg, xCen, yWhisk2, 1, yQ3 - yWhisk2, whiskColorIx);
     hvGfxBox(hvg, xCen, yQ1, 1, yWhisk1 - yQ1, whiskColorIx);
 
     /* Draw any outliers outside of whiskers */
     drawPointSmear(hvg, xCen, y, graphHeight, tsv, maxExp, lineColorIx, whisk1, whisk2);
     }
 }
 
-
-void drawGtexBoxplot(struct tissueSampleVals *tsList, double maxVal)
+void drawGtexBoxplot(struct gtexGeneBed *gtexGene, struct tissueSampleVals *tsList, double maxVal,
+                        boolean doLogTransform)
+/* Draw a box-and-whiskers plot from GTEx sample data */
 {
-// Tissue list is now sorted by GTEx tissue ordering.  
-// Sort by score descending.
+/* Sort by score descending.  (Tissue list is alpha sorted by tissue name) */
 slSort(&tsList, cmpTissueSampleValsMedianScore);
 slReverse(&tsList);
 
-// Initialize graphics for Box-and-whiskers plot
+/* Initialize graphics for Box-and-whiskers plot */
 setImageDims(slCount(tsList));
 struct tempName pngTn;
 trashDirFile(&pngTn, "hgc", "gtexGene", ".png");
-//uglyf("Trash file: %s\n", pngTn.forCgi);
-
 struct hvGfx *hvg = hvGfxOpenPng(imageWidth, imageHeight, pngTn.forCgi, FALSE);
 //hvGfxSetClip(hvg, 0, 0, imageWidth, imageHeight); // needed ?
-
 int x=innerXoff + pad, y = innerYoff + pad;
 struct tissueSampleVals *tsv;
+
+/* Draw boxes */
 for (tsv = tsList; tsv != NULL; tsv = tsv->next)
     {
     struct rgbColor fillColor = rgbFromIntColor(tsv->color);
     int fillColorIx = hvGfxFindColorIx(hvg, fillColor.r, fillColor.g, fillColor.b);
     drawBoxAndWhiskers(hvg, fillColorIx, x, y, tsv, maxVal);
     x += barWidth + pad;
     }
 //hvGfxUnclip(hvg);
 
 /* Draw title */
 int blackColorIx = hvGfxFindColorIx(hvg, 0,0,0);
 char title[256];
 // TODO: get tissue count and release from tables 
 safef(title, sizeof(title), "%s Gene Expression in 53 Tissues from GTEx (V4 release, 2014) - %s", 
                 gtexGene->name, doLogTransform ? "log10(RPKM+1)" : "(RPKM)");
 hvGfxTextCentered(hvg, 0, innerYoff, imageWidth, titleHeight, blackColorIx, mgMediumFont(), title);
 
 /* Draw axes */
 x = innerXoff - margin;
 
-// Y axis, tick marks, and labels
+/* Y axis, tick marks, and labels */
 hvGfxLine(hvg, x, innerYoff, x, plotHeight, blackColorIx);
 
-// for now, just mark the max value
+/* For now, just mark the max value */
 char buf[16];
 safef(buf, sizeof(buf), "%d -", round(maxVal));
 hvGfxText(hvg, x-24, innerYoff-8, blackColorIx, mgMediumFont(), buf);
 
-// X axis
+/* X axis */
 y = innerYoff + plotHeight;
 hvGfxLine(hvg, innerXoff, y, plotWidth, y, blackColorIx);
+// TODO: Title, tick marks and labels
 
-// Draw vertical labels on separate graphics object, rotate, then merge
-/*
-struct memGvx *mg = mgNew(imageHeight, imageWidth);
-MgFont *font = mgMediumFont();
-int lineHeight = mgFontLineHeight(font)-1;
-y += lineHeight;
-mgTextRight(mg, 0+1, y+1, width-1, lineHeight, MG_BLACK, font, labels[i]);
-hvGfxMergeWithPng(hvg, mgRotate90(mg));
-// or try using mg lib entirely, via mgSavePng.
-*/
-
-#ifdef TICKS
-/* Figure tick intervals.  GTEx portal uses .5 for log, 2, 5, 10, 20, 50, 100  for linear.
- * With 7-12 ticks.  5-10 seems nicer to me. Here's how it's done for browser ruler */
-
-static long figureTickSpan(long totalLength, int maxNumTicks)
-/* Figure out whether ticks on ruler should be 1, 5, 10, 50, 100, 500, 
- * 1000, etc. units apart.  */
-{
-int roughTickLen = totalLength/maxNumTicks;
-int i;
-int tickLen = 1;
-
-for (i=0; i<9; ++i)
-    {
-    if (roughTickLen < tickLen)
-        return tickLen;
-    tickLen *= 5;
-    if (roughTickLen < tickLen)
-        return tickLen;
-    tickLen *= 2;
-    }
-return 1000000000;
-}
-#endif
-
-
+// TODO: Y axis tick marks and labels (45 degree or vertical)
+// Approaches:  Expand memgvx or use postscript to rotate
 
 hvGfxClose(&hvg);
 printf("<IMG SRC = \"%s\" BORDER=1><BR>\n", pngTn.forHtml);
 /*printf("<IMG SRC = \"%s\" BORDER=1 WIDTH=%d HEIGHT=%d><BR>\n",
                 pngTn.forHtml, imageWidth, imageHeight);
 */
-//cloneString(gifTn.forHtml);
 }
-void doGtexGeneExpr(struct trackDb *tdb, char *item)
-/* Details of GTEX gene expression item */
-{
-// Load item from table */
-
-// TODO:  Get full details from Data table 
-//char sampleTable[128];
-//safef(sampleTable, sizeof(able), "%sSampleData", tdb->table);
 
+struct gtexGeneBed *getGtexGene(char *item, char *table)
+/* Retrieve gene info for this item from the main track table */
+{
+struct gtexGeneBed *gtexGene = NULL;
 struct sqlConnection *conn = hAllocConn(database);
 char **row;
 char query[512];
 struct sqlResult *sr;
-int expCount = 0;
-if (sqlTableExists(conn, tdb->table))
+if (sqlTableExists(conn, table))
     {
-    sqlSafef(query, sizeof(query), "select * from %s where name = '%s'", tdb->table, item);
+    sqlSafef(query, sizeof(query), "select * from %s where name = '%s'", table, item);
     sr = sqlGetResult(conn, query);
     row = sqlNextRow(sr);
     if (row != NULL)
         {
         gtexGene = gtexGeneBedLoad(row);
-        expCount = gtexGene->expCount;
         }
     sqlFreeResult(&sr);
     }
 hFreeConn(&conn);
-
-genericHeader(tdb, item);
-
-if (gtexGene != NULL)
-    {
-    // TODO: link to UCSC gene
-    printf("<b>Gene:</b> %s<br>", gtexGene->name);
-    printf("<b>Ensembl ID:</b> %s<br>\n", gtexGene->geneId);
-    printf("<a target='_blank' href='http://www.gtexportal.org/home/gene/%s'>View at GTEx portal</a><br>\n", gtexGene->geneId);
-    puts("<p>");
+return gtexGene;
 }
 
-// Get full sample data for this gene
-char *sampleDataTable = "gtexSampleData";
-conn = hAllocConn("hgFixed");
-assert(sqlTableExists(conn, sampleDataTable));
-sqlSafef(query, sizeof(query), "select * from %s where geneId='%s'", 
-                sampleDataTable, gtexGene->geneId);
-sr = sqlGetResult(conn, query);
+struct tissueSampleVals *getTissueSampleVals(struct gtexGeneBed *gtexGene, boolean doLogTransform,
+                                                double *maxValRet)
+/* Get sample data for the gene.  Optionally log10 it. Return maximum value seen */
+{
+// TODO: support version table name.  Likely move to lib.
 struct hash *tsHash = hashNew(0);
 struct tissueSampleVals *tsv;
 struct hashEl *hel;
 struct slDouble *val;
 double maxVal = 0;
 struct gtexSampleData *sd = NULL;
+char query[256];
+char **row;
+char *sampleDataTable = "gtexSampleData";
+struct sqlConnection *conn = hAllocConn("hgFixed");
+assert(sqlTableExists(conn, sampleDataTable));
+sqlSafef(query, sizeof(query), "select * from %s where geneId='%s'", sampleDataTable, gtexGene->geneId);
+struct sqlResult *sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     sd = gtexSampleDataLoad(row);
     if ((hel = hashLookup(tsHash, sd->tissue)) == NULL)
         {
         AllocVar(tsv);
         hashAdd(tsHash, sd->tissue, tsv);
         }
     else
         tsv = (struct tissueSampleVals *)hel->val;
     maxVal = max(maxVal, sd->score);
     val = slDoubleNew(sd->score);
     slAddHead(&tsv->valList, val);
     }
-
-// Fill in tissue descriptions, fill values array and calculate stats for plotting
-// Then make a list, suitable for sorting by tissue or score
+/*  Fill in tissue descriptions, fill values array and calculate stats for plotting
+        Then make a list, suitable for sorting by tissue or score
+    NOTE: Most of this not needed for R implementation */
 struct gtexTissue *tis = NULL, *tissues = gtexGetTissues();
 struct tissueSampleVals *tsList = NULL;
 int i;
-doLogTransform = cartUsualBooleanClosestToHome(cart, tdb, FALSE, GTEX_LOG_TRANSFORM,
-                                                GTEX_LOG_TRANSFORM_DEFAULT);
 if (doLogTransform)
     maxVal = log10(maxVal+1.0);
 for (tis = tissues; tis != NULL; tis = tis->next)
     {
     tsv = hashMustFindVal(tsHash, tis->name);
     tsv->name = tis->name;
     tsv->description = tis->description;
     tsv->color = tis->color;
     int count = tsv->count = slCount(tsv->valList);
     double *vals = AllocArray(tsv->vals, count);
     for (i=0; i<count; i++)
         {
         val = slPopHead(&tsv->valList);
         if (doLogTransform)
             vals[i] = log10(val->val+1.0);
         else
             vals[i] = val->val;
         }
     doubleBoxWhiskerCalc(tsv->count, tsv->vals, 
                                 &tsv->min, &tsv->q1, &tsv->median, &tsv->q3, &tsv->max);
     slAddHead(&tsList, tsv);
     }
+if (maxValRet != NULL)
+    *maxValRet = maxVal;
+return tsList;
+}
+
+void doGtexGeneExpr(struct trackDb *tdb, char *item)
+/* Details of GTEx gene expression item */
+{
+struct gtexGeneBed *gtexGene = getGtexGene(item, tdb->table);
+if (gtexGene == NULL)
+    errAbort("Can't find gene %s in GTEx gene table %s\n", item, tdb->table);
+
+genericHeader(tdb, item);
+// TODO: link to UCSC gene
+printf("<b>Gene:</b> %s<br>", gtexGene->name);
+printf("<b>Ensembl ID:</b> %s<br>\n", gtexGene->geneId);
+printf("<a target='_blank' href='http://www.gtexportal.org/home/gene/%s'>View at GTEx portal</a><br>\n", gtexGene->geneId);
+puts("<p>");
+
+boolean doLogTransform = cartUsualBooleanClosestToHome(cart, tdb, FALSE, GTEX_LOG_TRANSFORM,
+                                                GTEX_LOG_TRANSFORM_DEFAULT);
+double maxVal = 0.0;
+struct tissueSampleVals *tsvs = getTissueSampleVals(gtexGene, doLogTransform, &maxVal);
+
 // TODO: Remove one
 char *viz = cgiUsualString("viz", "R");
-if (sameString(viz, "d3"))
-    d3GtexBoxplot(tsList);
-else if (sameString(viz, "c"))
-    drawGtexBoxplot(tsList, maxVal);
+if (sameString(viz, "C"))
+    drawGtexBoxplot(gtexGene, tsvs, maxVal, doLogTransform);
 else
-    RGtexBoxplot(tsList);
+    drawGtexRBoxplot(gtexGene, tsvs, doLogTransform);
 
-// Track description
 printTrackHtml(tdb);
-
-// Print out tissue table with color assignments
-#ifdef DEBUG
-conn = hAllocConn("hgFixed");
-char *tissueTable = "gtexTissue";
-if (sqlTableExists(conn, tissueTable))
-    {
-    dyStringPrintf(dy, "<table>");
-    dyStringPrintf(dy, "<tr><td><b>Color<b></td><td><b>Tissue<b></td></tr>\n");
-    int i;
-    double invExpCount = 1.0/expCount;
-    char query[512];
-    sqlSafef(query, sizeof(query), "select * from %s", tissueTable);
-    struct sqlResult *sr = sqlGetResult(conn, query);
-    for (i=0; i<expCount; i++)
-        {
-        row = sqlNextRow(sr);
-        if (row == NULL)
-            break;
-        struct gtexTissue *tissue = gtexTissueLoad(row);
-        double colPos = invExpCount * i;
-        struct rgbColor color = saturatedRainbowAtPos(colPos);
-        dyStringPrintf(dy, "<tr><td bgcolor='#%02X%02X%02X'></td><td>%s</td></tr>\n",
-                    color.r, color.g, color.b, tissue->description);
-        }
-    sqlFreeResult(&sr);
-    }
-hFreeConn(&conn);
-dyStringPrintf(dy, "</table>");
-puts(dy->string);
-
-//cartWebStart(cart, database, "List of items assayed in %s", clusterTdb->shortLabel);
-
-//genericClickHandlerPlus(tdb, item, item, dy->string);
-dyStringFree(&dy);
-#endif
 }