000226be7ecaa7674455d3968af3590ff2a047b3
kate
  Thu Apr 28 13:13:40 2016 -0700
Add GTEx gene expression boxplot to hg38 and hg19 hgGene pages. Remove old GTEx link. refs #17244

diff --git src/hg/hgc/gtexClick.c src/hg/hgc/gtexClick.c
index 8c37bad..1a73b20 100644
--- src/hg/hgc/gtexClick.c
+++ src/hg/hgc/gtexClick.c
@@ -1,277 +1,132 @@
 /* Details pages for GTEx tracks */
 
 /* Copyright (C) 2015 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "common.h"
 #include "hash.h"
 #include "hdb.h"
 #include "hvGfx.h"
 #include "trashDir.h"
 #include "hgc.h"
 #include "hCommon.h"
 
 #include "gtexGeneBed.h"
 #include "gtexTissue.h"
-#include "gtexSampleData.h"
 #include "gtexUi.h"
 #include "gtexInfo.h"
 
-struct tissueSampleVals
-/* RPKM expression values for multiple samples */
-    {
-    struct tissueSampleVals *next;
-    char *name;         /* GTEx tissue name */
-    char *description;  /* GTEx tissue description */
-    int color;          /* GTEx tissue color */
-    int count;          /* number of samples */
-    double *vals;       /* RPKM values */
-    double min, max;    /* minimum, maximum value */
-    double q1, median, q3;      /* quartiles */
-    struct slDouble *valList;   /* used to create val array */
-    };
-
 char *geneClassColorCode(char *geneClass)
 /* Get HTML color code used by GENCODE for transcript class
  * WARNING: should share code with gene color handling in hgTracks */
 {
 char *unknown = "#010101";
 if (geneClass == NULL)
     return unknown;
 if (sameString(geneClass, "coding"))
     return "#0C0C78";
 if (sameString(geneClass, "nonCoding"))
     return "#006400";
 if (sameString(geneClass, "pseudo"))
     return "#FF33FF";
 if (sameString(geneClass, "problem"))
     return "#FE0000";
 return unknown;
 }
 
-/********************************************************/
-/* R implementation.  Invokes R script */
-
-boolean drawGtexRBoxplot(char *geneName, struct tissueSampleVals *tsvList,
-                        boolean doLogTransform, char *version, struct tempName *pngTn)
-/* Draw a box-and-whiskers plot from GTEx sample data, using R boxplot */
-{
-/* Create R data frame.  This is a tab-sep file, one row per sample, 
- * with columns for sample, tissue, rpkm */
-struct tempName dfTn;
-trashDirFile(&dfTn, "hgc", "gtexGene", ".df.txt");
-FILE *f = fopen(dfTn.forCgi, "w");
-if (f == NULL)
-    errAbort("can't create temp file %s", dfTn.forCgi);
-fprintf(f, "sample\ttissue\trpkm\n");
-struct tissueSampleVals *tsv;
-int sampleId=1;
-int i;
-for (tsv = tsvList; tsv != NULL; tsv = tsv->next)
-    {
-    int count = tsv->count;
-    // remove trailing parenthesized phrases as not worth label length
-    chopSuffixAt(tsv->description, '(');
-    for (i=0; i<count; i++)
-        fprintf(f, "%d\t%s\t%0.3f\n", sampleId++, tsv->description, tsv->vals[i]);
-    }
-fclose(f);
-
-// Plot to PNG file
-if (!pngTn)
-    return FALSE;
-trashDirFile(pngTn, "hgc", "gtexGene", ".png");
-char cmd[256];
-
-/* Exec R in quiet mode, without reading/saving environment or workspace */
-safef(cmd, sizeof(cmd), "Rscript --vanilla --slave hgcData/gtexBoxplot.R %s %s %s %s %s %s",  
-                                geneName, dfTn.forCgi, pngTn->forHtml, 
-                                doLogTransform ? "log=TRUE" : "log=FALSE", "order=alpha", version);
-//NOTE: use "order=score" to order bargraph by median RPKM, descending
-
-int ret = system(cmd);
-if (ret == 0)
-    return TRUE;
-return FALSE;
-}
 
 static struct gtexGeneBed *getGtexGene(char *item, char *chrom, int start, int end, char *table)
 /* Retrieve gene info for this item from the main track table */
 {
 struct gtexGeneBed *gtexGene = NULL;
 struct sqlConnection *conn = hAllocConn(database);
 char **row;
 char query[512];
 struct sqlResult *sr;
 if (sqlTableExists(conn, table))
     {
     sqlSafef(query, sizeof query, "select * from %s where name = '%s' and chrom = '%s' "
                                   " and chromStart = %d and chromEnd = %d", 
                                         table, item, chrom, start, end);
     sr = sqlGetResult(conn, query);
     row = sqlNextRow(sr);
     if (row != NULL)
         {
         gtexGene = gtexGeneBedLoad(row);
         }
     sqlFreeResult(&sr);
     }
 hFreeConn(&conn);
 return gtexGene;
 }
 
-struct tissueSampleVals *getTissueSampleVals(char *geneId, boolean doLogTransform,
-                                                char *version, double *maxValRet)
-/* Get sample data for the gene.  Optionally log10 it. Return maximum value seen */
-{
-struct hash *tsHash = hashNew(0);
-struct tissueSampleVals *tsv;
-struct hashEl *hel;
-struct slDouble *val;
-double maxVal = 0;
-struct gtexSampleData *sd = NULL;
-char query[256];
-char **row;
-char buf[256];
-char *sampleDataTable = "gtexSampleData";
-safef(buf, sizeof(buf), "%s%s", sampleDataTable, gtexVersionSuffixFromVersion(version));
-struct sqlConnection *conn = hAllocConn("hgFixed");
-assert(sqlTableExists(conn, buf));
-sqlSafef(query, sizeof(query), "select * from %s where geneId='%s'", buf, geneId);
-struct sqlResult *sr = sqlGetResult(conn, query);
-while ((row = sqlNextRow(sr)) != NULL)
-    {
-    sd = gtexSampleDataLoad(row);
-    if ((hel = hashLookup(tsHash, sd->tissue)) == NULL)
-        {
-        AllocVar(tsv);
-        hashAdd(tsHash, sd->tissue, tsv);
-        }
-    else
-        tsv = (struct tissueSampleVals *)hel->val;
-    maxVal = max(maxVal, sd->score);
-    val = slDoubleNew(sd->score);
-    slAddHead(&tsv->valList, val);
-    }
-/*  Fill in tissue descriptions, fill values array and calculate stats for plotting
-        Then make a list, suitable for sorting by tissue or score
-    NOTE: Most of this not needed for R implementation */
-struct gtexTissue *tis = NULL, *tissues = gtexGetTissues(version);
-struct tissueSampleVals *tsList = NULL;
-int i;
-if (doLogTransform)
-    maxVal = log10(maxVal+1.0);
-for (tis = tissues; tis != NULL; tis = tis->next)
-    {
-    tsv = hashFindVal(tsHash, tis->name);
-    if (tsv == NULL)
-        {
-        /* no non-zero values for this tissue/gene */
-        AllocVar(tsv);
-        val = slDoubleNew(0.0);
-        slAddHead(&tsv->valList, val);
-        }
-    tsv->name = tis->name;
-    tsv->description = tis->description;
-    tsv->color = tis->color;
-    int count = tsv->count = slCount(tsv->valList);
-    double *vals = AllocArray(tsv->vals, count);
-    for (i=0; i<count; i++)
-        {
-        val = slPopHead(&tsv->valList);
-        if (doLogTransform)
-            vals[i] = log10(val->val+1.0);
-        else
-            vals[i] = val->val;
-        }
-    doubleBoxWhiskerCalc(tsv->count, tsv->vals, 
-                                &tsv->min, &tsv->q1, &tsv->median, &tsv->q3, &tsv->max);
-    slAddHead(&tsList, tsv);
-    }
-if (maxValRet != NULL)
-    *maxValRet = maxVal;
-return tsList;
-}
-
-boolean gtexGeneBoxplot(char *geneId, char *geneName, char *version, 
-                                boolean doLogTransform, struct tempName *pngTn)
-/* Create a png temp file with boxplot of GTEx expression values for this gene. 
- * GeneId is the Ensembl gene ID.  GeneName is the HUGO name, used for graph title;
- * If NULL, label with the Ensembl gene ID */
-{
-struct tissueSampleVals *tsvs;
-tsvs  = getTissueSampleVals(geneId, doLogTransform, version, NULL);
-char *label = geneName ? geneName : geneId;
-return drawGtexRBoxplot(label, tsvs, doLogTransform, version, pngTn);
-}
-
 char *getGeneDescription(struct gtexGeneBed *gtexGene)
 /* Get description for gene. Needed because knownGene table semantics have changed in hg38 */
 {
 char query[256];
 if (sameString(database, "hg38"))
     {
     char *geneId = cloneString(gtexGene->geneId);
     chopSuffix(geneId);
     sqlSafef(query, sizeof(query), 
         "select kgXref.description from kgXref, knownCanonical where knownCanonical.protein like '%%%s%%' and knownCanonical.transcript=kgXref.kgID", geneId);
     }
 else
     {
     sqlSafef(query, sizeof(query), 
                 "select kgXref.description from kgXref where geneSymbol='%s'", gtexGene->name);
     }
 struct sqlConnection *conn = hAllocConn(database);
 char *desc = sqlQuickString(conn, query);
 hFreeConn(&conn);
 return desc;
 }
 
 void doGtexGeneExpr(struct trackDb *tdb, char *item)
 /* Details of GTEx gene expression item */
 {
 int start = cartInt(cart, "o");
 int end = cartInt(cart, "t");
 struct gtexGeneBed *gtexGene = getGtexGene(item, seqName, start, end, tdb->table);
 if (gtexGene == NULL)
     errAbort("Can't find gene %s in GTEx gene table %s\n", item, tdb->table);
 
 genericHeader(tdb, item);
 printf("<b>Gene: </b>");
 char *desc = getGeneDescription(gtexGene);
 if (desc == NULL)
     printf("%s<br>\n", gtexGene->name);
 else
     {
     printf("<a target='_blank' href='%s?db=%s&hgg_gene=%s'>%s</a><br>\n", 
                         hgGeneName(), database, gtexGene->name, gtexGene->name);
     printf("<b>Description:</b> %s<br>\n", desc);
     }
 printf("<b>Ensembl gene ID:</b> %s<br>\n", gtexGene->geneId);
 // The actual transcript model is a union, so this identification is approximate
 // (used just to find a transcript class)
 char *geneClass = gtexGeneClass(gtexGene);
 printf("<b>GENCODE biotype: </b> %s<br>\n", gtexGene->geneType); 
 printf("<b>Gene class: </b><span style='color: %s'>%s</span><br>\n", 
             geneClassColorCode(geneClass), geneClass);
 printf("<b>Total median expression: </b> %0.2f RPKM<br>\n", gtexGeneTotalMedianExpression(gtexGene));
 printf("<b>Score: </b> %d<br>\n", gtexGene->score); 
 printf("<b>Genomic position: </b>%s <a href='%s&db=%s&position=%s%%3A%d-%d'>%s:%d-%d</a><br>\n", 
                         database, hgTracksPathAndSettings(), database, 
                         gtexGene->chrom, gtexGene->chromStart+1, gtexGene->chromEnd,
                         gtexGene->chrom, gtexGene->chromStart+1, gtexGene->chromEnd);
 puts("<p>");
 
 // set gtexDetails (e.g. to 'log') to show log transformed details page 
 //      if hgTracks is log-transformed
 boolean doLogTransform = (trackDbSetting(tdb, "gtexDetails") &&
                                 cartUsualBooleanClosestToHome(cart, tdb, FALSE, GTEX_LOG_TRANSFORM,
                                                 GTEX_LOG_TRANSFORM_DEFAULT));
 char *version = gtexVersion(tdb->table);
 struct tempName pngTn;
 if (gtexGeneBoxplot(gtexGene->geneId, gtexGene->name, version, doLogTransform, &pngTn))
     printf("<IMG SRC = \"%s\" BORDER=1><BR>\n", pngTn.forHtml);
-printf("<br><a target='_blank' href='http://www.gtexportal.org/home/gene/%s'>View at GTEx portal</a>\n", gtexGene->name);
+gtexPortalLink(gtexGene->geneId);
 
 printTrackHtml(tdb);
 }