b46d372807d2ffc6fd3d03ec2ad71dd72a361672
kate
  Thu Apr 28 11:15:14 2016 -0700
Refactor GTEx boxplot code in preparation for moving to library for shared use with hgGene

diff --git src/hg/hgc/gtexClick.c src/hg/hgc/gtexClick.c
index 36ecd74..8c37bad 100644
--- src/hg/hgc/gtexClick.c
+++ src/hg/hgc/gtexClick.c
@@ -40,116 +40,116 @@
     return unknown;
 if (sameString(geneClass, "coding"))
     return "#0C0C78";
 if (sameString(geneClass, "nonCoding"))
     return "#006400";
 if (sameString(geneClass, "pseudo"))
     return "#FF33FF";
 if (sameString(geneClass, "problem"))
     return "#FE0000";
 return unknown;
 }
 
 /********************************************************/
 /* R implementation.  Invokes R script */
 
-void drawGtexRBoxplot(struct gtexGeneBed *gtexGene, struct tissueSampleVals *tsvList,
-                        boolean doLogTransform, char *version)
+boolean drawGtexRBoxplot(char *geneName, struct tissueSampleVals *tsvList,
+                        boolean doLogTransform, char *version, struct tempName *pngTn)
 /* Draw a box-and-whiskers plot from GTEx sample data, using R boxplot */
 {
 /* Create R data frame.  This is a tab-sep file, one row per sample, 
  * with columns for sample, tissue, rpkm */
 struct tempName dfTn;
 trashDirFile(&dfTn, "hgc", "gtexGene", ".df.txt");
 FILE *f = fopen(dfTn.forCgi, "w");
 if (f == NULL)
     errAbort("can't create temp file %s", dfTn.forCgi);
 fprintf(f, "sample\ttissue\trpkm\n");
 struct tissueSampleVals *tsv;
 int sampleId=1;
 int i;
 for (tsv = tsvList; tsv != NULL; tsv = tsv->next)
     {
     int count = tsv->count;
     // remove trailing parenthesized phrases as not worth label length
     chopSuffixAt(tsv->description, '(');
     for (i=0; i<count; i++)
         fprintf(f, "%d\t%s\t%0.3f\n", sampleId++, tsv->description, tsv->vals[i]);
     }
 fclose(f);
 
 // Plot to PNG file
-struct tempName pngTn;
-trashDirFile(&pngTn, "hgc", "gtexGene", ".png");
+if (!pngTn)
+    return FALSE;
+trashDirFile(pngTn, "hgc", "gtexGene", ".png");
 char cmd[256];
 
 /* Exec R in quiet mode, without reading/saving environment or workspace */
 safef(cmd, sizeof(cmd), "Rscript --vanilla --slave hgcData/gtexBoxplot.R %s %s %s %s %s %s",  
-                                gtexGene->name, dfTn.forCgi, pngTn.forHtml, 
+                                geneName, dfTn.forCgi, pngTn->forHtml, 
                                 doLogTransform ? "log=TRUE" : "log=FALSE", "order=alpha", version);
 //NOTE: use "order=score" to order bargraph by median RPKM, descending
 
 int ret = system(cmd);
 if (ret == 0)
-    printf("<IMG SRC = \"%s\" BORDER=1><BR>\n", pngTn.forHtml);
+    return TRUE;
+return FALSE;
 }
 
 static struct gtexGeneBed *getGtexGene(char *item, char *chrom, int start, int end, char *table)
 /* Retrieve gene info for this item from the main track table */
 {
 struct gtexGeneBed *gtexGene = NULL;
 struct sqlConnection *conn = hAllocConn(database);
 char **row;
 char query[512];
 struct sqlResult *sr;
 if (sqlTableExists(conn, table))
     {
     sqlSafef(query, sizeof query, "select * from %s where name = '%s' and chrom = '%s' "
                                   " and chromStart = %d and chromEnd = %d", 
                                         table, item, chrom, start, end);
     sr = sqlGetResult(conn, query);
     row = sqlNextRow(sr);
     if (row != NULL)
         {
         gtexGene = gtexGeneBedLoad(row);
         }
     sqlFreeResult(&sr);
     }
 hFreeConn(&conn);
 return gtexGene;
 }
 
-struct tissueSampleVals *getTissueSampleVals(struct gtexGeneBed *gtexGene, boolean doLogTransform,
+struct tissueSampleVals *getTissueSampleVals(char *geneId, boolean doLogTransform,
                                                 char *version, double *maxValRet)
 /* Get sample data for the gene.  Optionally log10 it. Return maximum value seen */
 {
-// TODO: support version table name.  Likely move to lib.
 struct hash *tsHash = hashNew(0);
 struct tissueSampleVals *tsv;
 struct hashEl *hel;
 struct slDouble *val;
 double maxVal = 0;
 struct gtexSampleData *sd = NULL;
 char query[256];
 char **row;
 char buf[256];
 char *sampleDataTable = "gtexSampleData";
-safef(buf, sizeof(buf), "%s%s", sampleDataTable, version);
+safef(buf, sizeof(buf), "%s%s", sampleDataTable, gtexVersionSuffixFromVersion(version));
 struct sqlConnection *conn = hAllocConn("hgFixed");
 assert(sqlTableExists(conn, buf));
-sqlSafef(query, sizeof(query), "select * from %s where geneId='%s'", 
-                buf, gtexGene->geneId);
+sqlSafef(query, sizeof(query), "select * from %s where geneId='%s'", buf, geneId);
 struct sqlResult *sr = sqlGetResult(conn, query);
 while ((row = sqlNextRow(sr)) != NULL)
     {
     sd = gtexSampleDataLoad(row);
     if ((hel = hashLookup(tsHash, sd->tissue)) == NULL)
         {
         AllocVar(tsv);
         hashAdd(tsHash, sd->tissue, tsv);
         }
     else
         tsv = (struct tissueSampleVals *)hel->val;
     maxVal = max(maxVal, sd->score);
     val = slDoubleNew(sd->score);
     slAddHead(&tsv->valList, val);
     }
@@ -181,30 +181,42 @@
         val = slPopHead(&tsv->valList);
         if (doLogTransform)
             vals[i] = log10(val->val+1.0);
         else
             vals[i] = val->val;
         }
     doubleBoxWhiskerCalc(tsv->count, tsv->vals, 
                                 &tsv->min, &tsv->q1, &tsv->median, &tsv->q3, &tsv->max);
     slAddHead(&tsList, tsv);
     }
 if (maxValRet != NULL)
     *maxValRet = maxVal;
 return tsList;
 }
 
+boolean gtexGeneBoxplot(char *geneId, char *geneName, char *version, 
+                                boolean doLogTransform, struct tempName *pngTn)
+/* Create a png temp file with boxplot of GTEx expression values for this gene. 
+ * GeneId is the Ensembl gene ID.  GeneName is the HUGO name, used for graph title;
+ * If NULL, label with the Ensembl gene ID */
+{
+struct tissueSampleVals *tsvs;
+tsvs  = getTissueSampleVals(geneId, doLogTransform, version, NULL);
+char *label = geneName ? geneName : geneId;
+return drawGtexRBoxplot(label, tsvs, doLogTransform, version, pngTn);
+}
+
 char *getGeneDescription(struct gtexGeneBed *gtexGene)
 /* Get description for gene. Needed because knownGene table semantics have changed in hg38 */
 {
 char query[256];
 if (sameString(database, "hg38"))
     {
     char *geneId = cloneString(gtexGene->geneId);
     chopSuffix(geneId);
     sqlSafef(query, sizeof(query), 
         "select kgXref.description from kgXref, knownCanonical where knownCanonical.protein like '%%%s%%' and knownCanonical.transcript=kgXref.kgID", geneId);
     }
 else
     {
     sqlSafef(query, sizeof(query), 
                 "select kgXref.description from kgXref where geneSymbol='%s'", gtexGene->name);
@@ -243,25 +255,23 @@
 printf("<b>Gene class: </b><span style='color: %s'>%s</span><br>\n", 
             geneClassColorCode(geneClass), geneClass);
 printf("<b>Total median expression: </b> %0.2f RPKM<br>\n", gtexGeneTotalMedianExpression(gtexGene));
 printf("<b>Score: </b> %d<br>\n", gtexGene->score); 
 printf("<b>Genomic position: </b>%s <a href='%s&db=%s&position=%s%%3A%d-%d'>%s:%d-%d</a><br>\n", 
                         database, hgTracksPathAndSettings(), database, 
                         gtexGene->chrom, gtexGene->chromStart+1, gtexGene->chromEnd,
                         gtexGene->chrom, gtexGene->chromStart+1, gtexGene->chromEnd);
 puts("<p>");
 
 // set gtexDetails (e.g. to 'log') to show log transformed details page 
 //      if hgTracks is log-transformed
 boolean doLogTransform = (trackDbSetting(tdb, "gtexDetails") &&
                                 cartUsualBooleanClosestToHome(cart, tdb, FALSE, GTEX_LOG_TRANSFORM,
                                                 GTEX_LOG_TRANSFORM_DEFAULT));
-double maxVal = 0.0;
-char *versionSuffix = gtexVersionSuffix(tdb->table);
-struct tissueSampleVals *tsvs = getTissueSampleVals(gtexGene, doLogTransform, 
-                                                        versionSuffix, &maxVal);
 char *version = gtexVersion(tdb->table);
-drawGtexRBoxplot(gtexGene, tsvs, doLogTransform, version);
+struct tempName pngTn;
+if (gtexGeneBoxplot(gtexGene->geneId, gtexGene->name, version, doLogTransform, &pngTn))
+    printf("<IMG SRC = \"%s\" BORDER=1><BR>\n", pngTn.forHtml);
 printf("<br><a target='_blank' href='http://www.gtexportal.org/home/gene/%s'>View at GTEx portal</a>\n", gtexGene->name);
 
 printTrackHtml(tdb);
 }