5d3d772b5d1d3475f7212dd3728e128a20011e76 kate Wed Oct 21 16:32:50 2015 -0700 Code cleanup in preparation for mergingin into main. refs #15645 diff --git src/hg/hgc/gtexClick.c src/hg/hgc/gtexClick.c index 06411ca..fd7ecca 100644 --- src/hg/hgc/gtexClick.c +++ src/hg/hgc/gtexClick.c @@ -1,116 +1,94 @@ /* Details pages for GTEx tracks */ /* Copyright (C) 2015 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "hash.h" #include "hdb.h" +#include "hvGfx.h" #include "trashDir.h" #include "hgc.h" -#include "jsHelper.h" -#include "rainbow.h" -#include "hvGfx.h" -#include "gtexUi.h" #include "gtexGeneBed.h" #include "gtexTissue.h" #include "gtexSampleData.h" - -boolean doLogTransform = FALSE; -struct gtexGeneBed *gtexGene = NULL; +#include "gtexUi.h" struct tissueSampleVals /* RPKM expression values for multiple samples */ { struct tissueSampleVals *next; char *name; /* GTEx tissue name */ char *description; /* GTEx tissue description */ int color; /* GTEx tissue color */ int count; /* number of samples */ double *vals; /* RPKM values */ double min, max; /* minimum, maximum value */ double q1, median, q3; /* quartiles */ struct slDouble *valList; /* used to create val array */ }; -int cmpTissueSampleValsMedianScore(const void *va, const void *vb) -/* Compare RPKM scores */ -{ -const struct tissueSampleVals *a = *((struct tissueSampleVals **)va); -const struct tissueSampleVals *b = *((struct tissueSampleVals **)vb); -if (a->median == b->median) - return 0; -return (a->median > b->median ? 1: -1); -} +/********************************************************/ +/* R implementation. Invokes R script */ -void RGtexBoxplot(struct tissueSampleVals *tsvList) +void drawGtexRBoxplot(struct gtexGeneBed *gtexGene, struct tissueSampleVals *tsvList, + boolean doLogTransform) +/* Draw a box-and-whiskers plot from GTEx sample data, using R boxplot */ { -// Tissue list is now sorted by GTEx tissue ordering. -// Sort by score descending. -slSort(&tsvList, cmpTissueSampleValsMedianScore); -slReverse(&tsvList); - -// Create R data frame +/* Create R data frame + This is a tab-sep file, a row per sample, with columns for sample, tissue, rpkm */ struct tempName dfTn; trashDirFile(&dfTn, "hgc", "gtexGene", ".df.txt"); FILE *f = fopen(dfTn.forCgi, "w"); if (f == NULL) errAbort("can't create temp file %s", dfTn.forCgi); fprintf(f, "sample\ttissue\trpkm\n"); struct tissueSampleVals *tsv; -int i; int sampleId=1; +int i; for (tsv = tsvList; tsv != NULL; tsv = tsv->next) { int count = tsv->count; for (i=0; iname, tsv->vals[i]); } fclose(f); // Plot to PNG file struct tempName pngTn; trashDirFile(&pngTn, "hgc", "gtexGene", ".png"); char cmd[256]; -// Exec R in quiet mode, without reading/saving environment or workspace -//safef(cmd, sizeof(cmd), "Rscript --gui=X11 --vanilla --slave gtex/geneBoxplot.R %s %s %s", -safef(cmd, sizeof(cmd), "Rscript --vanilla --slave gtex/geneBoxplot.R %s %s %s %s", + +/* Exec R in quiet mode, without reading/saving environment or workspace */ +//safef(cmd, sizeof(cmd), "Rscript --gui=X11 --vanilla --slave hgcData/gtexBoxplot.R %s %s %s", +safef(cmd, sizeof(cmd), "Rscript --vanilla --slave hgcData/gtexBoxplot.R %s %s %s %s", gtexGene->name, dfTn.forCgi, pngTn.forHtml, doLogTransform ? "log=TRUE" : "log=FALSE"); int ret = system(cmd); if (ret == 0) { printf("
\n", pngTn.forHtml); //printf("
\n", //pngTn.forHtml, imageWidth, imageHeight); //pngTn.forHtml, 900, 500); } } -void d3GtexBoxplot(struct tissueSampleVals *tsvList) -{ -//puts("\n"); -//puts("\n"); -printf("
\n"); -jsIncludeFile("d3.min.js", NULL); -jsIncludeFile("d3plus.gtex.js", NULL); -jsTrackingVar("geneName", gtexGene->name); -jsTrackingVar("geneId", gtexGene->geneId); -jsIncludeFile("gtex.js", NULL); -} +/*****************************************************************/ +/* C implementation (partial -- currently lacks axes and labels) */ /* Dimensions of image parts */ int pad = 2; int margin = 1; //int graphHeight = 400; int graphHeight = 325; //int barWidth = 3; int barWidth = 12; int titleHeight = 30; int xAxisHeight = 100; int yAxisWidth = 50; int axisMargin = 5; /* Dimensions of image overall and our portion within it. */ @@ -131,58 +109,68 @@ } struct rgbColor rgbFromIntColor(int color) { return (struct rgbColor){.r=COLOR_32_BLUE(color), .g=COLOR_32_GREEN(color), .b=COLOR_32_RED(color)}; } int valToY(double val, double maxVal, int height) /* Convert a value from 0 to maxVal to 0 to height-1 */ { double scaled = val/maxVal; int y = scaled * (height-1); return height - 1 - y; } +int cmpTissueSampleValsMedianScore(const void *va, const void *vb) +/* Compare RPKM scores */ +{ +const struct tissueSampleVals *a = *((struct tissueSampleVals **)va); +const struct tissueSampleVals *b = *((struct tissueSampleVals **)vb); +if (a->median == b->median) + return 0; +return (a->median > b->median ? 1: -1); +} + void drawPointSmear(struct hvGfx *hvg, int xOff, int yOff, int height, struct tissueSampleVals *tsv, double maxExp, int colorIx, double midClipMin, double midClipMax) /* Draw a point for each sample value. */ { int i, size = tsv->count; double *vals = tsv->vals; // Min becomes max after valToY because graph flipped int yMin = valToY(midClipMax, maxExp, height); int yMax = valToY(midClipMin, maxExp, height); for (i=0; i yMax) { y += yOff; hvGfxDot(hvg, xOff-1, y, colorIx); hvGfxDot(hvg, xOff+1, y, colorIx); hvGfxDot(hvg, xOff, y-1, colorIx); hvGfxDot(hvg, xOff, y+1, colorIx); } } } void drawBoxAndWhiskers(struct hvGfx *hvg, int fillColorIx, int x, int y, struct tissueSampleVals *tsv, double maxExp) -/* Draw a Tukey-type box plot. JK */ +/* Draw a box-and-whiskers element of a Tukey-type box plot. Code mostly from JK */ { struct rgbColor lineColor = {.r=128, .g=128, .b=128}; int lineColorIx = hvGfxFindColorIx(hvg, lineColor.r, lineColor.g, lineColor.b); int medianColorIx = hvGfxFindColorIx(hvg,0,0,0); int whiskColorIx = hvGfxFindColorIx(hvg,192,192,192); double xCen = x + barWidth/2; if (tsv->count > 1) { /* Cache a few fields from tsv in local variables */ double q1 = tsv->q1, q3 = tsv->q3, median = tsv->median; /* Figure out position of first quarter, median, and third quarter in screen Y coordinates */ int yQ1 = valToY(q1, maxExp, graphHeight) + y; int yQ3 = valToY(q3, maxExp, graphHeight) + y; int yMedian = valToY(median, maxExp, graphHeight); @@ -213,255 +201,192 @@ hvGfxBox(hvg, x, yWhisk1, barWidth, 1, whiskColorIx); hvGfxBox(hvg, x, yWhisk2, barWidth, 1, whiskColorIx); /* Draw median, 2 thick */ hvGfxBox(hvg, x, y + yMedian-1, barWidth, 2, medianColorIx); /* Draw lines from mid quarters to whiskers */ hvGfxBox(hvg, xCen, yWhisk2, 1, yQ3 - yWhisk2, whiskColorIx); hvGfxBox(hvg, xCen, yQ1, 1, yWhisk1 - yQ1, whiskColorIx); /* Draw any outliers outside of whiskers */ drawPointSmear(hvg, xCen, y, graphHeight, tsv, maxExp, lineColorIx, whisk1, whisk2); } } - -void drawGtexBoxplot(struct tissueSampleVals *tsList, double maxVal) +void drawGtexBoxplot(struct gtexGeneBed *gtexGene, struct tissueSampleVals *tsList, double maxVal, + boolean doLogTransform) +/* Draw a box-and-whiskers plot from GTEx sample data */ { -// Tissue list is now sorted by GTEx tissue ordering. -// Sort by score descending. +/* Sort by score descending. (Tissue list is alpha sorted by tissue name) */ slSort(&tsList, cmpTissueSampleValsMedianScore); slReverse(&tsList); -// Initialize graphics for Box-and-whiskers plot +/* Initialize graphics for Box-and-whiskers plot */ setImageDims(slCount(tsList)); struct tempName pngTn; trashDirFile(&pngTn, "hgc", "gtexGene", ".png"); -//uglyf("Trash file: %s\n", pngTn.forCgi); - struct hvGfx *hvg = hvGfxOpenPng(imageWidth, imageHeight, pngTn.forCgi, FALSE); //hvGfxSetClip(hvg, 0, 0, imageWidth, imageHeight); // needed ? - int x=innerXoff + pad, y = innerYoff + pad; struct tissueSampleVals *tsv; + +/* Draw boxes */ for (tsv = tsList; tsv != NULL; tsv = tsv->next) { struct rgbColor fillColor = rgbFromIntColor(tsv->color); int fillColorIx = hvGfxFindColorIx(hvg, fillColor.r, fillColor.g, fillColor.b); drawBoxAndWhiskers(hvg, fillColorIx, x, y, tsv, maxVal); x += barWidth + pad; } //hvGfxUnclip(hvg); /* Draw title */ int blackColorIx = hvGfxFindColorIx(hvg, 0,0,0); char title[256]; // TODO: get tissue count and release from tables safef(title, sizeof(title), "%s Gene Expression in 53 Tissues from GTEx (V4 release, 2014) - %s", gtexGene->name, doLogTransform ? "log10(RPKM+1)" : "(RPKM)"); hvGfxTextCentered(hvg, 0, innerYoff, imageWidth, titleHeight, blackColorIx, mgMediumFont(), title); /* Draw axes */ x = innerXoff - margin; -// Y axis, tick marks, and labels +/* Y axis, tick marks, and labels */ hvGfxLine(hvg, x, innerYoff, x, plotHeight, blackColorIx); -// for now, just mark the max value +/* For now, just mark the max value */ char buf[16]; safef(buf, sizeof(buf), "%d -", round(maxVal)); hvGfxText(hvg, x-24, innerYoff-8, blackColorIx, mgMediumFont(), buf); -// X axis +/* X axis */ y = innerYoff + plotHeight; hvGfxLine(hvg, innerXoff, y, plotWidth, y, blackColorIx); +// TODO: Title, tick marks and labels -// Draw vertical labels on separate graphics object, rotate, then merge -/* -struct memGvx *mg = mgNew(imageHeight, imageWidth); -MgFont *font = mgMediumFont(); -int lineHeight = mgFontLineHeight(font)-1; -y += lineHeight; -mgTextRight(mg, 0+1, y+1, width-1, lineHeight, MG_BLACK, font, labels[i]); -hvGfxMergeWithPng(hvg, mgRotate90(mg)); -// or try using mg lib entirely, via mgSavePng. -*/ - -#ifdef TICKS -/* Figure tick intervals. GTEx portal uses .5 for log, 2, 5, 10, 20, 50, 100 for linear. - * With 7-12 ticks. 5-10 seems nicer to me. Here's how it's done for browser ruler */ - -static long figureTickSpan(long totalLength, int maxNumTicks) -/* Figure out whether ticks on ruler should be 1, 5, 10, 50, 100, 500, - * 1000, etc. units apart. */ -{ -int roughTickLen = totalLength/maxNumTicks; -int i; -int tickLen = 1; - -for (i=0; i<9; ++i) - { - if (roughTickLen < tickLen) - return tickLen; - tickLen *= 5; - if (roughTickLen < tickLen) - return tickLen; - tickLen *= 2; - } -return 1000000000; -} -#endif - - +// TODO: Y axis tick marks and labels (45 degree or vertical) +// Approaches: Expand memgvx or use postscript to rotate hvGfxClose(&hvg); printf("
\n", pngTn.forHtml); /*printf("
\n", pngTn.forHtml, imageWidth, imageHeight); */ -//cloneString(gifTn.forHtml); } -void doGtexGeneExpr(struct trackDb *tdb, char *item) -/* Details of GTEX gene expression item */ -{ -// Load item from table */ - -// TODO: Get full details from Data table -//char sampleTable[128]; -//safef(sampleTable, sizeof(able), "%sSampleData", tdb->table); +struct gtexGeneBed *getGtexGene(char *item, char *table) +/* Retrieve gene info for this item from the main track table */ +{ +struct gtexGeneBed *gtexGene = NULL; struct sqlConnection *conn = hAllocConn(database); char **row; char query[512]; struct sqlResult *sr; -int expCount = 0; -if (sqlTableExists(conn, tdb->table)) +if (sqlTableExists(conn, table)) { - sqlSafef(query, sizeof(query), "select * from %s where name = '%s'", tdb->table, item); + sqlSafef(query, sizeof(query), "select * from %s where name = '%s'", table, item); sr = sqlGetResult(conn, query); row = sqlNextRow(sr); if (row != NULL) { gtexGene = gtexGeneBedLoad(row); - expCount = gtexGene->expCount; } sqlFreeResult(&sr); } hFreeConn(&conn); - -genericHeader(tdb, item); - -if (gtexGene != NULL) - { - // TODO: link to UCSC gene - printf("Gene: %s
", gtexGene->name); - printf("Ensembl ID: %s
\n", gtexGene->geneId); - printf("View at GTEx portal
\n", gtexGene->geneId); - puts("

"); +return gtexGene; } -// Get full sample data for this gene -char *sampleDataTable = "gtexSampleData"; -conn = hAllocConn("hgFixed"); -assert(sqlTableExists(conn, sampleDataTable)); -sqlSafef(query, sizeof(query), "select * from %s where geneId='%s'", - sampleDataTable, gtexGene->geneId); -sr = sqlGetResult(conn, query); +struct tissueSampleVals *getTissueSampleVals(struct gtexGeneBed *gtexGene, boolean doLogTransform, + double *maxValRet) +/* Get sample data for the gene. Optionally log10 it. Return maximum value seen */ +{ +// TODO: support version table name. Likely move to lib. struct hash *tsHash = hashNew(0); struct tissueSampleVals *tsv; struct hashEl *hel; struct slDouble *val; double maxVal = 0; struct gtexSampleData *sd = NULL; +char query[256]; +char **row; +char *sampleDataTable = "gtexSampleData"; +struct sqlConnection *conn = hAllocConn("hgFixed"); +assert(sqlTableExists(conn, sampleDataTable)); +sqlSafef(query, sizeof(query), "select * from %s where geneId='%s'", sampleDataTable, gtexGene->geneId); +struct sqlResult *sr = sqlGetResult(conn, query); while ((row = sqlNextRow(sr)) != NULL) { sd = gtexSampleDataLoad(row); if ((hel = hashLookup(tsHash, sd->tissue)) == NULL) { AllocVar(tsv); hashAdd(tsHash, sd->tissue, tsv); } else tsv = (struct tissueSampleVals *)hel->val; maxVal = max(maxVal, sd->score); val = slDoubleNew(sd->score); slAddHead(&tsv->valList, val); } - -// Fill in tissue descriptions, fill values array and calculate stats for plotting -// Then make a list, suitable for sorting by tissue or score +/* Fill in tissue descriptions, fill values array and calculate stats for plotting + Then make a list, suitable for sorting by tissue or score + NOTE: Most of this not needed for R implementation */ struct gtexTissue *tis = NULL, *tissues = gtexGetTissues(); struct tissueSampleVals *tsList = NULL; int i; -doLogTransform = cartUsualBooleanClosestToHome(cart, tdb, FALSE, GTEX_LOG_TRANSFORM, - GTEX_LOG_TRANSFORM_DEFAULT); if (doLogTransform) maxVal = log10(maxVal+1.0); for (tis = tissues; tis != NULL; tis = tis->next) { tsv = hashMustFindVal(tsHash, tis->name); tsv->name = tis->name; tsv->description = tis->description; tsv->color = tis->color; int count = tsv->count = slCount(tsv->valList); double *vals = AllocArray(tsv->vals, count); for (i=0; ivalList); if (doLogTransform) vals[i] = log10(val->val+1.0); else vals[i] = val->val; } doubleBoxWhiskerCalc(tsv->count, tsv->vals, &tsv->min, &tsv->q1, &tsv->median, &tsv->q3, &tsv->max); slAddHead(&tsList, tsv); } +if (maxValRet != NULL) + *maxValRet = maxVal; +return tsList; +} + +void doGtexGeneExpr(struct trackDb *tdb, char *item) +/* Details of GTEx gene expression item */ +{ +struct gtexGeneBed *gtexGene = getGtexGene(item, tdb->table); +if (gtexGene == NULL) + errAbort("Can't find gene %s in GTEx gene table %s\n", item, tdb->table); + +genericHeader(tdb, item); +// TODO: link to UCSC gene +printf("Gene: %s
", gtexGene->name); +printf("Ensembl ID: %s
\n", gtexGene->geneId); +printf("View at GTEx portal
\n", gtexGene->geneId); +puts("

"); + +boolean doLogTransform = cartUsualBooleanClosestToHome(cart, tdb, FALSE, GTEX_LOG_TRANSFORM, + GTEX_LOG_TRANSFORM_DEFAULT); +double maxVal = 0.0; +struct tissueSampleVals *tsvs = getTissueSampleVals(gtexGene, doLogTransform, &maxVal); + // TODO: Remove one char *viz = cgiUsualString("viz", "R"); -if (sameString(viz, "d3")) - d3GtexBoxplot(tsList); -else if (sameString(viz, "c")) - drawGtexBoxplot(tsList, maxVal); +if (sameString(viz, "C")) + drawGtexBoxplot(gtexGene, tsvs, maxVal, doLogTransform); else - RGtexBoxplot(tsList); + drawGtexRBoxplot(gtexGene, tsvs, doLogTransform); -// Track description printTrackHtml(tdb); - -// Print out tissue table with color assignments -#ifdef DEBUG -conn = hAllocConn("hgFixed"); -char *tissueTable = "gtexTissue"; -if (sqlTableExists(conn, tissueTable)) - { - dyStringPrintf(dy, ""); - dyStringPrintf(dy, "\n"); - int i; - double invExpCount = 1.0/expCount; - char query[512]; - sqlSafef(query, sizeof(query), "select * from %s", tissueTable); - struct sqlResult *sr = sqlGetResult(conn, query); - for (i=0; i\n", - color.r, color.g, color.b, tissue->description); - } - sqlFreeResult(&sr); - } -hFreeConn(&conn); -dyStringPrintf(dy, "
ColorTissue
%s
"); -puts(dy->string); - -//cartWebStart(cart, database, "List of items assayed in %s", clusterTdb->shortLabel); - -//genericClickHandlerPlus(tdb, item, item, dy->string); -dyStringFree(&dy); -#endif }