34f8fea5dc5f758066c82471bca3d866bb71a971 kate Wed Aug 5 11:16:43 2015 -0700 Implement medians computed on sub-samples (initially for M/F comparison. refs #15645 diff --git src/hg/hgTracks/gtexTracks.c src/hg/hgTracks/gtexTracks.c index 3c0c01d..db78a5f 100644 --- src/hg/hgTracks/gtexTracks.c +++ src/hg/hgTracks/gtexTracks.c @@ -118,63 +118,95 @@ { int barWidth = gtexBarWidth(); int padding = gtexGraphPadding(); int count = gtex->expCount; return (barWidth * count) + (padding * (count-1)); } static int valToHeight(double val, double maxVal, int maxHeight) /* Log-scale and Convert a value from 0 to maxVal to 0 to maxHeight-1 */ // TODO: support linear or log scale { if (val == 0.0) return 0; // smallest counts are 1x10e-3, translate to counter negativity -double scaled = (log10(val) + 3.001)/(log10(maxVal) + 3.001); +//double scaled = (log10(val) + 3.001)/(log10(maxVal) + 3.001); +double scaled = log10(val+1.0) / log10(maxVal+1.0); if (scaled < 0) warn("scaled=%f\n", scaled); //uglyf("%.2f -> %.2f height %d", val, scaled, (int)scaled * (maxHeight-1)); return (scaled * (maxHeight-1)); } // TODO: whack this static int valToY(double val, double maxVal, int maxHeight) /* Convert a value from 0 to maxVal to 0 to height-1 */ { if (val == 0.0) return 0; -double scaled = (log10(val)+3.001)/(log10(maxVal)+3.001); +//double scaled = (log10(val)+3.001)/(log10(maxVal)+3.001); +double scaled = log10(val+1.0) / log10(maxVal+1.0); if (scaled < 0) warn("scaled=%f\n", scaled); int y = scaled * (maxHeight); //uglyf("%.2f -> %.2f height %d", val, scaled, (int)scaled * (maxHeight-1)); return (maxHeight-1) - y; } /* Cache tissue metadata */ struct gtexTissue *getTissues() -/* Get tissue metadata from database */ +/* Get and cache tissue metadata from database */ { static struct gtexTissue *gtexTissues = NULL; -if (gtexTissues == NULL) + +if (!gtexTissues) gtexTissues = gtexGetTissues(); return gtexTissues; } +int getTissueCount() +/* Get and cache the number of tissues in GTEx tissue table */ +{ +static int tissueCount = 0; + +if (!tissueCount) + tissueCount = slCount(getTissues()); +return tissueCount; +} + +char *getTissueName(int id) +/* Get tissue name from id, cacheing */ +{ +static char **tissueNames = NULL; + +struct gtexTissue *tissue; +int count = getTissueCount(); +if (!tissueNames) + { + struct gtexTissue *tissues = getTissues(); + AllocArray(tissueNames, count); + for (tissue = tissues; tissue != NULL; tissue = tissue->next) + tissueNames[tissue->id] = cloneString(tissue->name); + } +if (id >= count) + errAbort("GTEx tissue table problem: can't find id %d\n", id); +return tissueNames[id]; +} + struct rgbColor *getGtexTissueColors() /* Get RGB colors from tissue table */ { struct gtexTissue *tissues = getTissues(); struct gtexTissue *tissue = NULL; int count = slCount(tissues); struct rgbColor *colors; AllocArray(colors, count); int i = 0; for (tissue = tissues; tissue != NULL; tissue = tissue->next) { // TODO: reconcile colors[i] = (struct rgbColor){.r=COLOR_32_BLUE(tissue->color), .g=COLOR_32_GREEN(tissue->color), .b=COLOR_32_RED(tissue->color)}; //colors[i] = mgColorIxToRgb(NULL, tissue->color); i++; @@ -214,83 +246,89 @@ /* Track info */ { double maxMedian; char *graphType; boolean isComparison; struct rgbColor *colors; }; struct gtexGeneInfo /* GTEx gene model, names, and expression medians */ { struct gtexGeneInfo *next; /* Next in singly linked list */ struct gtexGeneBed *geneBed;/* Gene name, id, canonical transcript, exp count and medians from BED table */ struct genePred *geneModel; /* Gene structure from model table */ - float *medians1; /* Computed medians */ - float *medians2; /* Computed medians for comparison (inverse) graph */ + double *medians1; /* Computed medians */ + double *medians2; /* Computed medians for comparison (inverse) graph */ }; -static struct gtexGeneBed *loadComputedMedians(struct gtexGeneBed *geneBed, char *graphType) + +static void loadComputedMedians(struct gtexGeneInfo *geneInfo, struct gtexGeneExtras *extras) /* Compute medians based on graph type. Returns a list of 2 for comparison graph types */ /* TODO: add support for filter function */ { -/* FIXME: dummy load of two for display implementation */ -struct gtexGeneBed *medians = NULL, *medians2 = NULL; - -AllocVar(medians); -medians->expCount = geneBed->expCount; -int i; +struct gtexGeneBed *geneBed = geneInfo->geneBed; +int expCount = geneBed->expCount; +//char *graphType = extras->graphType; -#ifdef NEW +if (extras->isComparison) + { + // create two score hashes, one for each sample subset + struct hash *scoreHash1 = hashNew(0), *scoreHash2 = hashNew(0); struct sqlConnection *conn = hAllocConn("hgFixed"); -if (conn == NULL) - return NULL; char query[1024]; - -// FIXME: experiment with query on sex - -sqlSafef(query, sizeof(query), "select gtexTissue.id, gtexSampleData.score from gtexTissue, gtexSampleData, gtexSample, gtexDonor where gtexSampleData.tissue=gtexTissue.name and gtexSampleData.geneId='%s' and gtexSampleData.sample=gtexSample.name and gtexSample.donor=gtexDonor.name and gtexDonor.gender='F'", geneBed->geneId); -struct slDouble **scores = NULL, *score = NULL; -AllocArray(scores, geneBed->expCount); -struct slPair *tissueScore = NULL, *tissueScores = sqlQuickPairList(conn, query); -for (tissueScore = tissueScores; tissueScore != NULL; tissueScore = tissueScore->next) + char **row; + sqlSafef(query, sizeof(query), "select gtexSampleData.sample, gtexDonor.gender, gtexSampleData.tissue, gtexSampleData.score from gtexSampleData, gtexSample, gtexDonor where gtexSampleData.geneId='%s' and gtexSampleData.sample=gtexSample.name and gtexSample.donor=gtexDonor.name", geneBed->geneId); + struct sqlResult *sr = sqlGetResult(conn, query); + while ((row = sqlNextRow(sr)) != NULL) { - AllocVar(score); - i = sqlUnsigned(tissueScore->name); - if (scores[i] == NULL) - scores[i] = score; + char gender = *row[1]; + struct hash *scoreHash = ((gender == 'F') ? scoreHash1 : scoreHash2); + char *tissue = cloneString(row[2]); + struct slDouble *score = slDoubleNew(sqlDouble(row[3])); + + // create hash of lists of scores, keyed by tissue name + double *tissueScores = hashFindVal(scoreHash, tissue); + if (tissueScores) + slAddHead(tissueScores, score); else - slAddHead(&scores[i], score); + hashAdd(scoreHash, tissue, score); } + sqlFreeResult(&sr); hFreeConn(&conn); -#endif -AllocArray(medians->expScores, medians->expCount); + // get tissue medians for each sample subset + double *medians1; + double *medians2; + AllocArray(medians1, expCount); + AllocArray(medians2, expCount); + int i; for (i=0; i<geneBed->expCount; i++) - //medians->expScores[i] = slDoubleMedian(scores[i]); - medians->expScores[i] = geneBed->expScores[i]; - -AllocVar(medians2); -medians2->expCount = geneBed->expCount; -AllocArray(medians2->expScores, medians2->expCount); -for (i = 0; i < medians2->expCount; ++i) - medians2->expScores[i] = geneBed->expScores[i]; - -medians->next = medians2; - -return medians; + { + //medians1[i] = -1, medians2[i] = -1; // mark missing tissues + struct slDouble *scores; + scores = hashFindVal(scoreHash1, getTissueName(i)); + if (scores) + medians1[i] = slDoubleMedian(scores); + scores = hashFindVal(scoreHash2, getTissueName(i)); + if (scores) + medians2[i] = slDoubleMedian(scores); + } + geneInfo->medians1 = medians1; + geneInfo->medians2 = medians2; + } } static struct hash *loadGeneModels(char *table) /* Load gene models from table */ { struct sqlConnection *conn = hAllocConn(database); struct sqlResult *sr; char **row; int rowOffset; sr = hRangeQuery(conn, table, chromName, winStart, winEnd, NULL, &rowOffset); struct hash *modelHash = newHash(0); struct genePred *model = NULL; while ((row = sqlNextRow(sr)) != NULL) { @@ -361,40 +399,38 @@ } static void gtexGeneDrawAt(struct track *tg, void *item, struct hvGfx *hvg, int xOff, int y, double scale, MgFont *font, Color color, enum trackVisibility vis) { struct gtexGeneInfo *geneInfo = (struct gtexGeneInfo *)item; struct gtexGeneBed *geneBed = geneInfo->geneBed; // Color in dense mode using transcriptClass Color statusColor = getTranscriptStatusColor(hvg, geneBed); if (vis != tvFull && vis != tvPack) { bedDrawSimpleAt(tg, geneBed, hvg, xOff, y, scale, font, statusColor, vis); return; } -struct gtexGeneBed *computedMedians = NULL; // 1 or 2 (if comparison) - // with medians computed for sample subsets struct gtexGeneExtras *extras = (struct gtexGeneExtras *)tg->extraUiData; if ((extras->isComparison) && (tg->visibility == tvFull || tg->visibility == tvPack)) //&& gtexGraphHeight() != MIN_GRAPH_HEIGHT) { // compute medians based on configuration (comparisons, and later, filters) - computedMedians = loadComputedMedians(geneBed, extras->graphType); + loadComputedMedians(geneInfo, extras); } int i; int expCount = geneBed->expCount; double maxMedian = ((struct gtexGeneExtras *)tg->extraUiData)->maxMedian; struct rgbColor lineColor = {.r=0}; int lineColorIx = hvGfxFindColorIx(hvg, lineColor.r, lineColor.g, lineColor.b); int heightPer = tg->heightPer; int graphX = gtexGraphX(geneBed); if (graphX < 0) return; // x1 is at left of graph int x1 = xOff + graphX; int startX = x1; @@ -412,73 +448,82 @@ int barWidth = gtexBarWidth(); int graphPadding = gtexGraphPadding(); // Move to loader char *colorScheme = cartUsualStringClosestToHome(cart, tg->tdb, FALSE, GTEX_COLORS, GTEX_COLORS_DEFAULT); for (i=0; i<expCount; i++) { struct rgbColor fillColor = extras->colors[i]; if (barWidth == 1 && sameString(colorScheme, GTEX_COLORS_GTEX)) { // brighten colors a bit so they'll be more visible at this scale fillColor = gtexTissueBrightenColor(fillColor); } int fillColorIx = hvGfxFindColorIx(hvg, fillColor.r, fillColor.g, fillColor.b); - double expScore = geneBed->expScores[i]; + + double expScore = (geneInfo->medians1 ? geneInfo->medians1[i] : geneBed->expScores[i]); + //double expScore = geneBed->expScores[i]; + /* + if (expScore < 1) + expScore = 0.0; + */ int height = valToHeight(expScore, maxMedian, gtexGraphHeight()); // TODO: adjust yGene to get gene track distance as desired //if (i ==0) uglyf("DRAW: expScore=%.2f, maxMedian=%.2f, graphHeight=%d, y=%d<br>", expScore, maxMedian, gtexGraphHeight(), y); //if (i ==0) uglyf("DRAW: yZero=%d, yMedian=%d, height=%d<br>", yZero, yMedian, height); if (graphPadding == 0 || sameString(colorScheme, GTEX_COLORS_GTEX)) hvGfxBox(hvg, x1, yZero-height, barWidth, height, fillColorIx); else hvGfxOutlinedBox(hvg, x1, yZero-height, barWidth, height, fillColorIx, lineColorIx); x1 = x1 + barWidth + graphPadding; } // mark gene extent int yGene = yZero + gtexGeneMargin() - 1; // draw gene model tg->heightPer = gtexGeneHeight()+1; struct linkedFeatures *lf = linkedFeaturesFromGenePred(tg, geneInfo->geneModel, FALSE); lf->filterColor = statusColor; linkedFeaturesDrawAt(tg, lf, hvg, xOff, yGene, scale, font, color, tvSquish); tg->heightPer = heightPer; -if (!extras->isComparison || slCount(computedMedians) != 2) +if (!geneInfo->medians2) return; // draw comparison graph (upside down) x1 = startX; // yZero is at top of graph yZero = yGene + gtexGeneHeight(); for (i=0; i<expCount; i++) { struct rgbColor fillColor = extras->colors[i]; if (barWidth == 1 && sameString(colorScheme, GTEX_COLORS_GTEX)) { // brighten colors a bit so they'll be more visible at this scale struct hslColor hsl = mgRgbToHsl(fillColor); hsl.s = min(1000, hsl.s + 300); fillColor = mgHslToRgb(hsl); } int fillColorIx = hvGfxFindColorIx(hvg, fillColor.r, fillColor.g, fillColor.b); - double expScore = geneBed->expScores[i]; + //double expScore = geneBed->expScores[i]; + double expScore = geneInfo->medians2[i]; + //if (expScore < 1) + //expScore = 0.0; int height = valToHeight(expScore, maxMedian, gtexGraphHeight()); // TODO: adjust yGene instead of yMedian+1 to get gene track distance as desired //if (i ==0) uglyf("DRAW2: expScore=%.2f, maxMedian=%.2f, graphHeight=%d, y=%d<br>", expScore, maxMedian, gtexGraphHeight(), y); //if (i ==0) uglyf("DRAW2: yZero=%d, height=%d<br>", yZero, height); if (graphPadding == 0 || sameString(colorScheme, GTEX_COLORS_GTEX)) hvGfxBox(hvg, x1, yZero, barWidth, height, fillColorIx); else hvGfxOutlinedBox(hvg, x1, yZero, barWidth, height, fillColorIx, lineColorIx); x1 = x1 + barWidth + graphPadding; } } static void gtexGeneMapItem(struct track *tg, struct hvGfx *hvg, void *item, char *itemName, char *mapItemName, int start, int end, int x, int y, int width, int height) /* Create a map box for each tissue (bar in the graph) or a single map for squish/dense modes */