118ba0035c84f46e68f28d35bfabf7bcf5b77216 kate Tue Feb 9 15:25:15 2016 -0800 Tear out partial C implementation of gene expression boxplot. We will use R. refs #15645 diff --git src/hg/hgc/gtexClick.c src/hg/hgc/gtexClick.c index 1bcaf75..337d186 100644 --- src/hg/hgc/gtexClick.c +++ src/hg/hgc/gtexClick.c @@ -24,268 +24,68 @@ int color; /* GTEx tissue color */ int count; /* number of samples */ double *vals; /* RPKM values */ double min, max; /* minimum, maximum value */ double q1, median, q3; /* quartiles */ struct slDouble *valList; /* used to create val array */ }; /********************************************************/ /* R implementation. Invokes R script */ void drawGtexRBoxplot(struct gtexGeneBed *gtexGene, struct tissueSampleVals *tsvList, boolean doLogTransform) /* Draw a box-and-whiskers plot from GTEx sample data, using R boxplot */ { -/* Create R data frame - This is a tab-sep file, a row per sample, with columns for sample, tissue, rpkm */ +/* Create R data frame. This is a tab-sep file, one row per sample, + * with columns for sample, tissue, rpkm */ struct tempName dfTn; trashDirFile(&dfTn, "hgc", "gtexGene", ".df.txt"); FILE *f = fopen(dfTn.forCgi, "w"); if (f == NULL) errAbort("can't create temp file %s", dfTn.forCgi); fprintf(f, "sample\ttissue\trpkm\n"); struct tissueSampleVals *tsv; int sampleId=1; int i; for (tsv = tsvList; tsv != NULL; tsv = tsv->next) { int count = tsv->count; for (i=0; iname, tsv->vals[i]); } fclose(f); // Plot to PNG file struct tempName pngTn; trashDirFile(&pngTn, "hgc", "gtexGene", ".png"); char cmd[256]; /* Exec R in quiet mode, without reading/saving environment or workspace */ -//safef(cmd, sizeof(cmd), "Rscript --gui=X11 --vanilla --slave hgcData/gtexBoxplot.R %s %s %s", safef(cmd, sizeof(cmd), "Rscript --vanilla --slave hgcData/gtexBoxplot.R %s %s %s %s", gtexGene->name, dfTn.forCgi, pngTn.forHtml, doLogTransform ? "log=TRUE" : "log=FALSE"); int ret = system(cmd); if (ret == 0) { printf("
\n", pngTn.forHtml); //printf("
\n", //pngTn.forHtml, imageWidth, imageHeight); //pngTn.forHtml, 900, 500); } } -/*****************************************************************/ -/* C implementation (partial -- currently lacks axes and labels) */ - -/* Dimensions of image parts */ -int pad = 2; -int margin = 1; -//int graphHeight = 400; -int graphHeight = 325; -//int barWidth = 3; -int barWidth = 12; -int titleHeight = 30; - -int xAxisHeight = 100; -int yAxisWidth = 50; -int axisMargin = 5; - -/* Dimensions of image overall and our portion within it. */ -int imageWidth, imageHeight, innerWidth, innerHeight, innerXoff, innerYoff; -int plotWidth, plotHeight; /* just the plot */ - -void setImageDims(int expCount) -/* Set up image according to count */ -{ -innerHeight = graphHeight + 2*pad; -innerWidth = pad + expCount*(pad + barWidth); -plotWidth = innerWidth + 2*margin; -plotHeight = innerHeight + 2*margin; -imageWidth = plotWidth + yAxisWidth; -imageHeight = plotHeight + xAxisHeight + titleHeight; -innerXoff = yAxisWidth + margin; -innerYoff = titleHeight + margin; -} - -struct rgbColor rgbFromIntColor(int color) -{ -return (struct rgbColor){.r=COLOR_32_BLUE(color), .g=COLOR_32_GREEN(color), .b=COLOR_32_RED(color)}; -} - -int valToY(double val, double maxVal, int height) -/* Convert a value from 0 to maxVal to 0 to height-1 */ -{ -double scaled = val/maxVal; -int y = scaled * (height-1); -return height - 1 - y; -} - -int cmpTissueSampleValsMedianScore(const void *va, const void *vb) -/* Compare RPKM scores */ -{ -const struct tissueSampleVals *a = *((struct tissueSampleVals **)va); -const struct tissueSampleVals *b = *((struct tissueSampleVals **)vb); -if (a->median == b->median) - return 0; -return (a->median > b->median ? 1: -1); -} - -void drawPointSmear(struct hvGfx *hvg, int xOff, int yOff, int height, - struct tissueSampleVals *tsv, double maxExp, int colorIx, double midClipMin, double midClipMax) -/* Draw a point for each sample value. */ -{ -int i, size = tsv->count; -double *vals = tsv->vals; - -// Min becomes max after valToY because graph flipped -int yMin = valToY(midClipMax, maxExp, height); -int yMax = valToY(midClipMin, maxExp, height); -for (i=0; i yMax) - { - y += yOff; - hvGfxDot(hvg, xOff-1, y, colorIx); - hvGfxDot(hvg, xOff+1, y, colorIx); - hvGfxDot(hvg, xOff, y-1, colorIx); - hvGfxDot(hvg, xOff, y+1, colorIx); - } - } -} - -void drawBoxAndWhiskers(struct hvGfx *hvg, int fillColorIx, int x, int y, - struct tissueSampleVals *tsv, double maxExp) -/* Draw a box-and-whiskers element of a Tukey-type box plot. Code mostly from JK */ -{ -struct rgbColor lineColor = {.r=128, .g=128, .b=128}; -int lineColorIx = hvGfxFindColorIx(hvg, lineColor.r, lineColor.g, lineColor.b); -int medianColorIx = hvGfxFindColorIx(hvg,0,0,0); -int whiskColorIx = hvGfxFindColorIx(hvg,192,192,192); -double xCen = x + barWidth/2; -if (tsv->count > 1) - { - /* Cache a few fields from tsv in local variables */ - double q1 = tsv->q1, q3 = tsv->q3, median = tsv->median; - - /* Figure out position of first quarter, median, and third quarter in screen Y coordinates */ - int yQ1 = valToY(q1, maxExp, graphHeight) + y; - int yQ3 = valToY(q3, maxExp, graphHeight) + y; - int yMedian = valToY(median, maxExp, graphHeight); - - /* Draw a filled box that covers the middle two quarters */ - int qHeight = yQ1 - yQ3 + 1; - hvGfxOutlinedBox(hvg, x, yQ3, barWidth, qHeight, fillColorIx, lineColorIx); - - /* Figure out whiskers as 1.5x distance from median to nearest quarter */ - double iq3 = q3 - median; - double iq1 = median - q1; - double whisk1 = q1 - 1.5 * iq1; - double whisk2 = q3 + 1.5 * iq3; - - /* If whiskers extend past min or max point, clip them */ - if (whisk1 < tsv->min) - whisk1 = tsv->min; - if (whisk2 > tsv->max) - whisk2 = tsv->max; - - /* Convert whiskers to screen coordinates and do some sanity checks */ - int yWhisk1 = valToY(whisk1, maxExp, graphHeight) + y; - int yWhisk2 = valToY(whisk2, maxExp, graphHeight) + y; - assert(yQ1 <= yWhisk1); - assert(yQ3 >= yWhisk2); - - /* Draw horizontal lines of whiskers and median */ - hvGfxBox(hvg, x, yWhisk1, barWidth, 1, whiskColorIx); - hvGfxBox(hvg, x, yWhisk2, barWidth, 1, whiskColorIx); - - /* Draw median, 2 thick */ - hvGfxBox(hvg, x, y + yMedian-1, barWidth, 2, medianColorIx); - - /* Draw lines from mid quarters to whiskers */ - hvGfxBox(hvg, xCen, yWhisk2, 1, yQ3 - yWhisk2, whiskColorIx); - hvGfxBox(hvg, xCen, yQ1, 1, yWhisk1 - yQ1, whiskColorIx); - - /* Draw any outliers outside of whiskers */ - drawPointSmear(hvg, xCen, y, graphHeight, tsv, maxExp, lineColorIx, whisk1, whisk2); - } -} - -void drawGtexBoxplot(struct gtexGeneBed *gtexGene, struct tissueSampleVals *tsList, double maxVal, - boolean doLogTransform) -/* Draw a box-and-whiskers plot from GTEx sample data */ -{ -/* Sort by score descending. (Tissue list is alpha sorted by tissue name) */ -slSort(&tsList, cmpTissueSampleValsMedianScore); -slReverse(&tsList); - -/* Initialize graphics for Box-and-whiskers plot */ -setImageDims(slCount(tsList)); -struct tempName pngTn; -trashDirFile(&pngTn, "hgc", "gtexGene", ".png"); -struct hvGfx *hvg = hvGfxOpenPng(imageWidth, imageHeight, pngTn.forCgi, FALSE); -//hvGfxSetClip(hvg, 0, 0, imageWidth, imageHeight); // needed ? -int x=innerXoff + pad, y = innerYoff + pad; -struct tissueSampleVals *tsv; - -/* Draw boxes */ -for (tsv = tsList; tsv != NULL; tsv = tsv->next) - { - struct rgbColor fillColor = rgbFromIntColor(tsv->color); - int fillColorIx = hvGfxFindColorIx(hvg, fillColor.r, fillColor.g, fillColor.b); - drawBoxAndWhiskers(hvg, fillColorIx, x, y, tsv, maxVal); - x += barWidth + pad; - } -//hvGfxUnclip(hvg); - -/* Draw title */ -int blackColorIx = hvGfxFindColorIx(hvg, 0,0,0); -char title[256]; -// TODO: get tissue count and release from tables -safef(title, sizeof(title), "%s Gene Expression in 53 Tissues from GTEx (V4 release, 2014) - %s", - gtexGene->name, doLogTransform ? "log10(RPKM+1)" : "(RPKM)"); -hvGfxTextCentered(hvg, 0, innerYoff, imageWidth, titleHeight, blackColorIx, mgMediumFont(), title); - -/* Draw axes */ -x = innerXoff - margin; - -/* Y axis, tick marks, and labels */ -hvGfxLine(hvg, x, innerYoff, x, plotHeight, blackColorIx); - -/* For now, just mark the max value */ -char buf[16]; -safef(buf, sizeof(buf), "%d -", round(maxVal)); -hvGfxText(hvg, x-24, innerYoff-8, blackColorIx, mgMediumFont(), buf); - -/* X axis */ -y = innerYoff + plotHeight; -hvGfxLine(hvg, innerXoff, y, plotWidth, y, blackColorIx); -// TODO: Title, tick marks and labels - -// TODO: Y axis tick marks and labels (45 degree or vertical) -// Approaches: Expand memgvx or use postscript to rotate - -hvGfxClose(&hvg); -printf("
\n", pngTn.forHtml); -/*printf("
\n", - pngTn.forHtml, imageWidth, imageHeight); -*/ -} - struct gtexGeneBed *getGtexGene(char *item, char *table) /* Retrieve gene info for this item from the main track table */ { struct gtexGeneBed *gtexGene = NULL; struct sqlConnection *conn = hAllocConn(database); char **row; char query[512]; struct sqlResult *sr; if (sqlTableExists(conn, table)) { sqlSafef(query, sizeof(query), "select * from %s where name = '%s'", table, item); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) { @@ -301,31 +101,32 @@ double *maxValRet) /* Get sample data for the gene. Optionally log10 it. Return maximum value seen */ { // TODO: support version table name. Likely move to lib. struct hash *tsHash = hashNew(0); struct tissueSampleVals *tsv; struct hashEl *hel; struct slDouble *val; double maxVal = 0; struct gtexSampleData *sd = NULL; char query[256]; char **row; char *sampleDataTable = "gtexSampleData"; struct sqlConnection *conn = hAllocConn("hgFixed"); assert(sqlTableExists(conn, sampleDataTable)); -sqlSafef(query, sizeof(query), "select * from %s where geneId='%s'", sampleDataTable, gtexGene->geneId); +sqlSafef(query, sizeof(query), "select * from %s where geneId='%s'", + sampleDataTable, gtexGene->geneId); struct sqlResult *sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { sd = gtexSampleDataLoad(row); if ((hel = hashLookup(tsHash, sd->tissue)) == NULL) { AllocVar(tsv); hashAdd(tsHash, sd->tissue, tsv); } else tsv = (struct tissueSampleVals *)hel->val; maxVal = max(maxVal, sd->score); val = slDoubleNew(sd->score); slAddHead(&tsv->valList, val); } @@ -384,24 +185,19 @@ "select kgXref.description from kgXref, knownToEnsembl where knownToEnsembl.value='%s' and knownToEnsembl.name=kgXref.kgID", gtexGene->transcriptId); struct sqlConnection *conn = hAllocConn(database); char *desc = sqlQuickString(conn, query); hFreeConn(&conn); if (desc != NULL) printf("Description: %s
\n", desc); printf("Ensembl ID: %s
\n", gtexGene->geneId); printf("View at GTEx portal
\n", gtexGene->geneId); puts("

"); boolean doLogTransform = cartUsualBooleanClosestToHome(cart, tdb, FALSE, GTEX_LOG_TRANSFORM, GTEX_LOG_TRANSFORM_DEFAULT); double maxVal = 0.0; struct tissueSampleVals *tsvs = getTissueSampleVals(gtexGene, doLogTransform, &maxVal); -// TODO: Remove one -char *viz = cgiUsualString("viz", "R"); -if (sameString(viz, "C")) - drawGtexBoxplot(gtexGene, tsvs, maxVal, doLogTransform); -else drawGtexRBoxplot(gtexGene, tsvs, doLogTransform); printTrackHtml(tdb); }