7a99183408463d07d77151ab288713ea6427876a kate Wed Oct 7 10:32:36 2015 -0700 First cut gene expression boxplot image for GTEx details page. refs #15645 diff --git src/hg/hgc/gtexClick.c src/hg/hgc/gtexClick.c index 2d02174..12a1109 100644 --- src/hg/hgc/gtexClick.c +++ src/hg/hgc/gtexClick.c @@ -1,88 +1,312 @@ /* Details pages for GTEx tracks */ /* Copyright (C) 2015 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" +#include "hash.h" #include "hdb.h" +#include "trashDir.h" #include "hgc.h" #include "rainbow.h" +#include "hvGfx.h" #include "gtexUi.h" #include "gtexGeneBed.h" #include "gtexTissue.h" +#include "gtexSampleData.h" +/* Dimensions of image parts */ +int pad = 2; +int margin = 1; +//int graphHeight = 400; +int graphHeight = 325; +//int barWidth = 3; +int barWidth = 9; + +/* Dimensions of image overall and our portion within it. */ +int imageWidth, imageHeight, innerWidth, innerHeight, innerXoff, innerYoff; + +void setImageDims(int expCount) +/* Set up image according to count */ +{ +innerHeight = graphHeight + 2*pad; +innerWidth = pad + expCount*(pad + barWidth); +imageWidth = innerWidth + 2*margin; +imageHeight = innerHeight + 2*margin; +innerXoff = margin; +innerYoff = margin; +} + +struct rgbColor rgbFromIntColor(int color) +{ +return (struct rgbColor){.r=COLOR_32_BLUE(color), .g=COLOR_32_GREEN(color), .b=COLOR_32_RED(color)}; +} + +struct tissueSampleVals +/* RPKM expression values for multiple samples */ + { + struct tissueSampleVals *next; + char *description; /* GTEx tissue description */ + int color; /* GTEx tissue color */ + int count; /* number of samples */ + double *vals; /* RPKM values */ + double min, max; /* minimum, maximum value */ + double q1, median, q3; /* quartiles */ + struct slDouble *valList; /* used to create val array */ + }; + +int valToY(double val, double maxVal, int height) +/* Convert a value from 0 to maxVal to 0 to height-1 */ +{ +double scaled = val/maxVal; +int y = scaled * (height-1); +return height - 1 - y; +} + +void drawPointSmear(struct hvGfx *hvg, int xOff, int yOff, int height, + struct tissueSampleVals *tsv, double maxExp, int colorIx, double midClipMin, double midClipMax) +/* Draw a point for each sample value. */ +{ +int i, size = tsv->count; +double *vals = tsv->vals; + +// Min becomes max after valToY because graph flipped +int yMin = valToY(midClipMax, maxExp, height); +int yMax = valToY(midClipMin, maxExp, height); +for (i=0; i<size; ++i) + { + double exp = vals[i]; + int y = valToY(exp, maxExp, height); + if (y < yMin || y > yMax) + { + y += yOff; + hvGfxDot(hvg, xOff-1, y, colorIx); + hvGfxDot(hvg, xOff+1, y, colorIx); + hvGfxDot(hvg, xOff, y-1, colorIx); + hvGfxDot(hvg, xOff, y+1, colorIx); + } + } +} +void drawBoxAndWhiskers(struct hvGfx *hvg, int fillColorIx, int lineColorIx, int x, int y, + struct tissueSampleVals *tsv, double maxExp) +/* Draw a Tukey-type box plot. JK */ +{ +int medianColorIx = lineColorIx; +int whiskColorIx = lineColorIx; +double xCen = x + barWidth/2; +if (tsv->count > 1) + { + /* Cache a few fields from tsv in local variables */ + double q1 = tsv->q1, q3 = tsv->q3, median = tsv->median; + + /* Figure out position of first quarter, median, and third quarter in screen Y coordinates */ + int yQ1 = valToY(q1, maxExp, graphHeight) + y; + int yQ3 = valToY(q3, maxExp, graphHeight) + y; + int yMedian = valToY(median, maxExp, graphHeight); + + /* Draw a filled box that covers the middle two quarters */ + int qHeight = yQ1 - yQ3 + 1; + hvGfxOutlinedBox(hvg, x, yQ3, barWidth, qHeight, fillColorIx, lineColorIx); + + /* Figure out whiskers as 1.5x distance from median to nearest quarter */ + double iq3 = q3 - median; + double iq1 = median - q1; + double whisk1 = q1 - 1.5 * iq1; + double whisk2 = q3 + 1.5 * iq3; + + /* If whiskers extend past min or max point, clip them */ + if (whisk1 < tsv->min) + whisk1 = tsv->min; + if (whisk2 > tsv->max) + whisk2 = tsv->max; + + /* Convert whiskers to screen coordinates and do some sanity checks */ + int yWhisk1 = valToY(whisk1, maxExp, graphHeight) + y; + int yWhisk2 = valToY(whisk2, maxExp, graphHeight) + y; + assert(yQ1 <= yWhisk1); + assert(yQ3 >= yWhisk2); + + /* Draw horizontal lines of whiskers and median */ + hvGfxBox(hvg, x, yWhisk1, barWidth, 1, whiskColorIx); + hvGfxBox(hvg, x, yWhisk2, barWidth, 1, whiskColorIx); + + /* Draw median, 2 thick */ + hvGfxBox(hvg, x, y + yMedian-1, barWidth, 2, medianColorIx); + + /* Draw lines from mid quarters to whiskers */ + hvGfxBox(hvg, xCen, yWhisk2, 1, yQ3 - yWhisk2, whiskColorIx); + hvGfxBox(hvg, xCen, yQ1, 1, yWhisk1 - yQ1, whiskColorIx); + + /* Draw any outliers outside of whiskers */ + drawPointSmear(hvg, xCen, y, graphHeight, tsv, maxExp, lineColorIx, whisk1, whisk2); + } +} void doGtexGeneExpr(struct trackDb *tdb, char *item) /* Details of GTEX gene expression item */ { // Load item from table */ // TODO: Get full details from Data table -struct dyString *dy = dyStringNew(0); //char sampleTable[128]; //safef(sampleTable, sizeof(able), "%sSampleData", tdb->table); struct sqlConnection *conn = hAllocConn(database); char **row; +char query[512]; +struct sqlResult *sr; struct gtexGeneBed *gtexGene = NULL; int expCount = 0; if (sqlTableExists(conn, tdb->table)) { - char query[512]; sqlSafef(query, sizeof(query), "select * from %s where name = '%s'", tdb->table, item); - struct sqlResult *sr = sqlGetResult(conn, query); + sr = sqlGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) { gtexGene = gtexGeneBedLoad(row); expCount = gtexGene->expCount; } sqlFreeResult(&sr); } hFreeConn(&conn); genericHeader(tdb, item); if (gtexGene != NULL) { printf("<b>Gene name:</b> %s<br>\n", gtexGene->name); printf("<b>Ensembl gene:</b> %s<br>\n", gtexGene->geneId); printf("<b>Ensembl transcript:</b> %s<br>\n", gtexGene->transcriptId); } + +// Get full sample data for this gene +char *sampleDataTable = "gtexSampleData"; +conn = hAllocConn("hgFixed"); +assert(sqlTableExists(conn, sampleDataTable)); +sqlSafef(query, sizeof(query), "select * from %s where geneId='%s'", + sampleDataTable, gtexGene->geneId); +sr = sqlGetResult(conn, query); +struct hash *tsHash = hashNew(0); +struct tissueSampleVals *tsv; +struct hashEl *hel; +struct slDouble *val; +double maxVal = 0; +struct gtexSampleData *sd = NULL; +while ((row = sqlNextRow(sr)) != NULL) + { + sd = gtexSampleDataLoad(row); + if ((hel = hashLookup(tsHash, sd->tissue)) == NULL) + { + AllocVar(tsv); + hashAdd(tsHash, sd->tissue, tsv); + } + else + tsv = (struct tissueSampleVals *)hel->val; + maxVal = max(maxVal, sd->score); + val = slDoubleNew(sd->score); + slAddHead(&tsv->valList, val); + } + +// Fill in tissue descriptions, fill values array and calculate stats for plotting +// Then make a list, suitable for sorting by tissue or score +struct gtexTissue *tis = NULL, *tissues = gtexGetTissues(); +struct tissueSampleVals *tsList = NULL; +int i; +boolean doLogTransform = cartUsualBooleanClosestToHome(cart, tdb, FALSE, GTEX_LOG_TRANSFORM, + GTEX_LOG_TRANSFORM_DEFAULT); +if (doLogTransform) + maxVal = log10(maxVal+1.0); +for (tis = tissues; tis != NULL; tis = tis->next) + { + tsv = hashMustFindVal(tsHash, tis->name); + tsv->description = tis->description; + tsv->color = tis->color; + int count = tsv->count = slCount(tsv->valList); + double *vals = AllocArray(tsv->vals, count); + for (i=0; i<count; i++) + { + val = slPopHead(&tsv->valList); + if (doLogTransform) + vals[i] = log10(val->val+1.0); + else + vals[i] = val->val; + } + doubleBoxWhiskerCalc(tsv->count, tsv->vals, + &tsv->min, &tsv->q1, &tsv->median, &tsv->q3, &tsv->max); + slAddHead(&tsList, tsv); + } + +slReverse(&tsList); +// Tissue list is now sorted by GTEx tissue ordering. +// TODO: Option to sort by score descending. + +// Initialize graphics for Box-and-whiskers plot +setImageDims(slCount(tsList)); +struct tempName pngTn; +trashDirFile(&pngTn, "hgc", "gtexGene", ".png"); +//uglyf("Trash file: %s\n", pngTn.forCgi); + +struct hvGfx *hvg = hvGfxOpenPng(imageWidth, imageHeight, pngTn.forCgi, FALSE); +//hvGfxSetClip(hvg, 0, 0, imageWidth, imageHeight); // needed ? + +struct rgbColor lineColor = {.r=0}; +int lineColorIx = hvGfxFindColorIx(hvg, lineColor.r, lineColor.g, lineColor.b); + +int x=innerXoff + pad, y = innerYoff + pad; +for (tsv = tsList; tsv != NULL; tsv = tsv->next) + { + struct rgbColor fillColor = rgbFromIntColor(tsv->color); + int fillColorIx = hvGfxFindColorIx(hvg, fillColor.r, fillColor.g, fillColor.b); + drawBoxAndWhiskers(hvg, fillColorIx, lineColorIx, x, y, tsv, maxVal); + x += barWidth + pad; + } + +//hvGfxUnclip(hvg); +hvGfxClose(&hvg); +printf("<IMG SRC = \"%s\" BORDER=1><BR>\n", pngTn.forHtml); +/*printf("<IMG SRC = \"%s\" BORDER=1 WIDTH=%d HEIGHT=%d><BR>\n", + pngTn.forHtml, imageWidth, imageHeight); +*/ +//cloneString(gifTn.forHtml); + +// Track description printTrackHtml(tdb); // Print out tissue table with color assignments +#ifdef DEBUG conn = hAllocConn("hgFixed"); char *tissueTable = "gtexTissue"; if (sqlTableExists(conn, tissueTable)) { dyStringPrintf(dy, "<table>"); dyStringPrintf(dy, "<tr><td><b>Color<b></td><td><b>Tissue<b></td></tr>\n"); int i; double invExpCount = 1.0/expCount; char query[512]; sqlSafef(query, sizeof(query), "select * from %s", tissueTable); struct sqlResult *sr = sqlGetResult(conn, query); for (i=0; i<expCount; i++) { row = sqlNextRow(sr); if (row == NULL) break; struct gtexTissue *tissue = gtexTissueLoad(row); double colPos = invExpCount * i; struct rgbColor color = saturatedRainbowAtPos(colPos); dyStringPrintf(dy, "<tr><td bgcolor='#%02X%02X%02X'></td><td>%s</td></tr>\n", color.r, color.g, color.b, tissue->description); } sqlFreeResult(&sr); } hFreeConn(&conn); dyStringPrintf(dy, "</table>"); puts(dy->string); //cartWebStart(cart, database, "List of items assayed in %s", clusterTdb->shortLabel); //genericClickHandlerPlus(tdb, item, item, dy->string); dyStringFree(&dy); +#endif }