34f8fea5dc5f758066c82471bca3d866bb71a971
kate
  Wed Aug 5 11:16:43 2015 -0700
Implement medians computed on sub-samples (initially for M/F comparison. refs #15645

diff --git src/hg/hgTracks/gtexTracks.c src/hg/hgTracks/gtexTracks.c
index 3c0c01d..db78a5f 100644
--- src/hg/hgTracks/gtexTracks.c
+++ src/hg/hgTracks/gtexTracks.c
@@ -118,63 +118,95 @@
 {
 int barWidth = gtexBarWidth();
 int padding = gtexGraphPadding();
 int count = gtex->expCount;
 return (barWidth * count) + (padding * (count-1));
 }
 
 
 static int valToHeight(double val, double maxVal, int maxHeight)
 /* Log-scale and Convert a value from 0 to maxVal to 0 to maxHeight-1 */
 // TODO: support linear or log scale
 {
 if (val == 0.0)
     return 0;
 // smallest counts are 1x10e-3, translate to counter negativity
-double scaled = (log10(val) + 3.001)/(log10(maxVal) + 3.001);
+//double scaled = (log10(val) + 3.001)/(log10(maxVal) + 3.001);
+double scaled = log10(val+1.0) / log10(maxVal+1.0);
 if (scaled < 0)
     warn("scaled=%f\n", scaled);
 //uglyf("%.2f -> %.2f height %d", val, scaled, (int)scaled * (maxHeight-1));
 return (scaled * (maxHeight-1));
 }
 
 // TODO: whack this
 static int valToY(double val, double maxVal, int maxHeight)
 /* Convert a value from 0 to maxVal to 0 to height-1 */
 {
 if (val == 0.0)
     return 0;
-double scaled = (log10(val)+3.001)/(log10(maxVal)+3.001);
+//double scaled = (log10(val)+3.001)/(log10(maxVal)+3.001);
+double scaled = log10(val+1.0) / log10(maxVal+1.0);
 if (scaled < 0)
     warn("scaled=%f\n", scaled);
 int y = scaled * (maxHeight);
 //uglyf("%.2f -> %.2f height %d", val, scaled, (int)scaled * (maxHeight-1));
 return (maxHeight-1) - y;
 }
 
 
 /* Cache tissue metadata */
 
 struct gtexTissue *getTissues()
-/* Get tissue metadata from database */
+/* Get and cache tissue metadata from database */
 {
 static struct gtexTissue *gtexTissues = NULL;
-if (gtexTissues == NULL)
+
+if (!gtexTissues)
     gtexTissues = gtexGetTissues();
 return gtexTissues;
 }
 
+int getTissueCount()
+/* Get and cache the number of tissues in GTEx tissue table */
+{
+static int tissueCount = 0;
+
+if (!tissueCount)
+    tissueCount = slCount(getTissues());
+return tissueCount;
+}
+
+char *getTissueName(int id)
+/* Get tissue name from id, cacheing */
+{
+static char **tissueNames = NULL;
+
+struct gtexTissue *tissue;
+int count = getTissueCount();
+if (!tissueNames)
+    {
+    struct gtexTissue *tissues = getTissues();
+    AllocArray(tissueNames, count);
+    for (tissue = tissues; tissue != NULL; tissue = tissue->next)
+        tissueNames[tissue->id] = cloneString(tissue->name);
+    }
+if (id >= count)
+    errAbort("GTEx tissue table problem: can't find id %d\n", id);
+return tissueNames[id];
+}
+
 struct rgbColor *getGtexTissueColors()
 /* Get RGB colors from tissue table */
 {
 struct gtexTissue *tissues = getTissues();
 struct gtexTissue *tissue = NULL;
 int count = slCount(tissues);
 struct rgbColor *colors;
 AllocArray(colors, count);
 int i = 0;
 for (tissue = tissues; tissue != NULL; tissue = tissue->next)
     {
     // TODO: reconcile 
     colors[i] = (struct rgbColor){.r=COLOR_32_BLUE(tissue->color), .g=COLOR_32_GREEN(tissue->color), .b=COLOR_32_RED(tissue->color)};
     //colors[i] = mgColorIxToRgb(NULL, tissue->color);
     i++;
@@ -214,83 +246,89 @@
 /* Track info */
     {
     double maxMedian;
     char *graphType;
     boolean isComparison;
     struct rgbColor *colors;
     };
 
 struct gtexGeneInfo
 /* GTEx gene model, names, and expression medians */
     {
     struct gtexGeneInfo *next;  /* Next in singly linked list */
     struct gtexGeneBed *geneBed;/* Gene name, id, canonical transcript, exp count and medians 
                                         from BED table */
     struct genePred *geneModel; /* Gene structure from model table */
-    float *medians1;            /* Computed medians */
-    float *medians2;            /* Computed medians for comparison (inverse) graph */
+    double *medians1;            /* Computed medians */
+    double *medians2;            /* Computed medians for comparison (inverse) graph */
     };
 
-static struct gtexGeneBed *loadComputedMedians(struct gtexGeneBed *geneBed, char *graphType)
+
+static void loadComputedMedians(struct gtexGeneInfo *geneInfo, struct gtexGeneExtras *extras)
 /* Compute medians based on graph type.  Returns a list of 2 for comparison graph types */
 /* TODO: add support for filter function */
 {
-/* FIXME: dummy load of two for display implementation */
-struct gtexGeneBed *medians = NULL, *medians2 = NULL;
-
-AllocVar(medians);
-medians->expCount = geneBed->expCount;
-int i;
+struct gtexGeneBed *geneBed = geneInfo->geneBed;
+int expCount = geneBed->expCount;
+//char *graphType = extras->graphType;
 
-#ifdef NEW
+if (extras->isComparison)
+    {
+    // create two score hashes, one for each sample subset
+    struct hash *scoreHash1 = hashNew(0), *scoreHash2 = hashNew(0);
     struct sqlConnection *conn = hAllocConn("hgFixed");
-if (conn == NULL)
-    return NULL;
     char query[1024];
-
-// FIXME: experiment with query on sex
-
-sqlSafef(query, sizeof(query), "select gtexTissue.id, gtexSampleData.score from gtexTissue, gtexSampleData, gtexSample, gtexDonor where gtexSampleData.tissue=gtexTissue.name and gtexSampleData.geneId='%s' and gtexSampleData.sample=gtexSample.name and gtexSample.donor=gtexDonor.name and gtexDonor.gender='F'", geneBed->geneId);
-struct slDouble **scores = NULL, *score = NULL;
-AllocArray(scores, geneBed->expCount);
-struct slPair *tissueScore = NULL, *tissueScores = sqlQuickPairList(conn, query);
-for (tissueScore = tissueScores; tissueScore != NULL; tissueScore = tissueScore->next)
+    char **row;
+    sqlSafef(query, sizeof(query), "select gtexSampleData.sample, gtexDonor.gender, gtexSampleData.tissue, gtexSampleData.score from gtexSampleData, gtexSample, gtexDonor where gtexSampleData.geneId='%s' and gtexSampleData.sample=gtexSample.name and gtexSample.donor=gtexDonor.name", geneBed->geneId);
+    struct sqlResult *sr = sqlGetResult(conn, query);
+    while ((row = sqlNextRow(sr)) != NULL)
         {
-    AllocVar(score);
-    i = sqlUnsigned(tissueScore->name);
-    if (scores[i] == NULL)
-        scores[i] = score;
+        char gender = *row[1];
+        struct hash *scoreHash = ((gender == 'F') ? scoreHash1 : scoreHash2);
+        char *tissue = cloneString(row[2]);
+        struct slDouble *score = slDoubleNew(sqlDouble(row[3]));
+
+        // create hash of lists of scores, keyed by tissue name
+        double *tissueScores = hashFindVal(scoreHash, tissue);
+        if (tissueScores)
+            slAddHead(tissueScores, score);
         else
-        slAddHead(&scores[i], score);
+            hashAdd(scoreHash, tissue, score);
         }
+    sqlFreeResult(&sr);
     hFreeConn(&conn);
-#endif
 
-AllocArray(medians->expScores, medians->expCount);
+    // get tissue medians for each sample subset
+    double *medians1;
+    double *medians2;
+    AllocArray(medians1, expCount);
+    AllocArray(medians2, expCount);
+    int i;
     for (i=0; i<geneBed->expCount; i++)
-    //medians->expScores[i] = slDoubleMedian(scores[i]);
-    medians->expScores[i] = geneBed->expScores[i];
-
-AllocVar(medians2);
-medians2->expCount = geneBed->expCount;
-AllocArray(medians2->expScores, medians2->expCount);
-for (i = 0; i < medians2->expCount; ++i)
-    medians2->expScores[i] = geneBed->expScores[i];
-
-medians->next = medians2;
-
-return medians;
+        {
+        //medians1[i] = -1, medians2[i] = -1;       // mark missing tissues
+        struct slDouble *scores;
+        scores = hashFindVal(scoreHash1, getTissueName(i));
+        if (scores)
+            medians1[i] = slDoubleMedian(scores);
+        scores = hashFindVal(scoreHash2, getTissueName(i));
+        if (scores)
+            medians2[i] = slDoubleMedian(scores);
+        }
+    geneInfo->medians1 = medians1;
+    geneInfo->medians2 = medians2;
+    }
 }
 
 static struct hash *loadGeneModels(char *table)
 /* Load gene models from table */
 {
 struct sqlConnection *conn = hAllocConn(database);
 struct sqlResult *sr;
 char **row;
 int rowOffset;
 sr = hRangeQuery(conn, table, chromName, winStart, winEnd, NULL, &rowOffset);
 
 struct hash *modelHash = newHash(0);
 struct genePred *model = NULL;
 while ((row = sqlNextRow(sr)) != NULL)
     {
@@ -361,40 +399,38 @@
 }
 
 static void gtexGeneDrawAt(struct track *tg, void *item, struct hvGfx *hvg, int xOff, int y, 
                 double scale, MgFont *font, Color color, enum trackVisibility vis)
 {
 struct gtexGeneInfo *geneInfo = (struct gtexGeneInfo *)item;
 struct gtexGeneBed *geneBed = geneInfo->geneBed;
 
 // Color in dense mode using transcriptClass
 Color statusColor = getTranscriptStatusColor(hvg, geneBed);
 if (vis != tvFull && vis != tvPack)
     {
     bedDrawSimpleAt(tg, geneBed, hvg, xOff, y, scale, font, statusColor, vis);
     return;
     }
-struct gtexGeneBed *computedMedians = NULL;     // 1 or 2 (if comparison) 
-                                                // with medians computed for sample subsets
 
 struct gtexGeneExtras *extras = (struct gtexGeneExtras *)tg->extraUiData;
 if ((extras->isComparison) &&
         (tg->visibility == tvFull || tg->visibility == tvPack))
         //&& gtexGraphHeight() != MIN_GRAPH_HEIGHT)
     {
     // compute medians based on configuration (comparisons, and later, filters)
-    computedMedians = loadComputedMedians(geneBed, extras->graphType);
+    loadComputedMedians(geneInfo, extras);
     }
 int i;
 int expCount = geneBed->expCount;
 double maxMedian = ((struct gtexGeneExtras *)tg->extraUiData)->maxMedian;
 struct rgbColor lineColor = {.r=0};
 int lineColorIx = hvGfxFindColorIx(hvg, lineColor.r, lineColor.g, lineColor.b);
 int heightPer = tg->heightPer;
 
 int graphX = gtexGraphX(geneBed);
 if (graphX < 0)
     return;
 // x1 is at left of graph
 int x1 = xOff + graphX;
 int startX = x1;
 
@@ -412,73 +448,82 @@
 int barWidth = gtexBarWidth();
 int graphPadding = gtexGraphPadding();
 
 // Move to loader
 char *colorScheme = cartUsualStringClosestToHome(cart, tg->tdb, FALSE, GTEX_COLORS, 
                         GTEX_COLORS_DEFAULT);
 for (i=0; i<expCount; i++)
     {
     struct rgbColor fillColor = extras->colors[i];
     if (barWidth == 1 && sameString(colorScheme, GTEX_COLORS_GTEX))
         {
         // brighten colors a bit so they'll be more visible at this scale
         fillColor = gtexTissueBrightenColor(fillColor);
         }
     int fillColorIx = hvGfxFindColorIx(hvg, fillColor.r, fillColor.g, fillColor.b);
-    double expScore = geneBed->expScores[i];
+
+    double expScore = (geneInfo->medians1 ? geneInfo->medians1[i] : geneBed->expScores[i]);
+    //double expScore = geneBed->expScores[i];
+    /*
+    if (expScore < 1)
+        expScore = 0.0;
+        */
     int height = valToHeight(expScore, maxMedian, gtexGraphHeight());
     // TODO: adjust yGene to get gene track distance as desired
     //if (i ==0) uglyf("DRAW: expScore=%.2f, maxMedian=%.2f, graphHeight=%d, y=%d<br>", expScore, maxMedian, gtexGraphHeight(), y);
     //if (i ==0) uglyf("DRAW: yZero=%d, yMedian=%d, height=%d<br>", yZero, yMedian, height);
     if (graphPadding == 0 || sameString(colorScheme, GTEX_COLORS_GTEX))
         hvGfxBox(hvg, x1, yZero-height, barWidth, height, fillColorIx);
     else
         hvGfxOutlinedBox(hvg, x1, yZero-height, barWidth, height, fillColorIx, lineColorIx);
     x1 = x1 + barWidth + graphPadding;
     }
 
 // mark gene extent
 int yGene = yZero + gtexGeneMargin() - 1;
 
 
 // draw gene model
 tg->heightPer = gtexGeneHeight()+1;
 struct linkedFeatures *lf = linkedFeaturesFromGenePred(tg, geneInfo->geneModel, FALSE);
 lf->filterColor = statusColor;
 linkedFeaturesDrawAt(tg, lf, hvg, xOff, yGene, scale, font, color, tvSquish);
 tg->heightPer = heightPer;
 
-if (!extras->isComparison || slCount(computedMedians) != 2)
+if (!geneInfo->medians2)
     return;
 
 // draw comparison graph (upside down)
 
 x1 = startX;
 // yZero is at top of graph
 yZero = yGene + gtexGeneHeight();
 for (i=0; i<expCount; i++)
     {
     struct rgbColor fillColor = extras->colors[i];
     if (barWidth == 1 && sameString(colorScheme, GTEX_COLORS_GTEX))
         {
         // brighten colors a bit so they'll be more visible at this scale
         struct hslColor hsl = mgRgbToHsl(fillColor);
         hsl.s = min(1000, hsl.s + 300);
         fillColor = mgHslToRgb(hsl);
         }
     int fillColorIx = hvGfxFindColorIx(hvg, fillColor.r, fillColor.g, fillColor.b);
-    double expScore = geneBed->expScores[i];
+    //double expScore = geneBed->expScores[i];
+    double expScore = geneInfo->medians2[i];
+    //if (expScore < 1)
+        //expScore = 0.0;
     int height = valToHeight(expScore, maxMedian, gtexGraphHeight());
     // TODO: adjust yGene instead of yMedian+1 to get gene track distance as desired
     //if (i ==0) uglyf("DRAW2: expScore=%.2f, maxMedian=%.2f, graphHeight=%d, y=%d<br>", expScore, maxMedian, gtexGraphHeight(), y);
     //if (i ==0) uglyf("DRAW2: yZero=%d, height=%d<br>", yZero, height);
     if (graphPadding == 0 || sameString(colorScheme, GTEX_COLORS_GTEX))
         hvGfxBox(hvg, x1, yZero, barWidth, height, fillColorIx);
     else
         hvGfxOutlinedBox(hvg, x1, yZero, barWidth, height, fillColorIx, lineColorIx);
     x1 = x1 + barWidth + graphPadding;
     }
 }
 
 static void gtexGeneMapItem(struct track *tg, struct hvGfx *hvg, void *item, char *itemName, 
                         char *mapItemName, int start, int end, int x, int y, int width, int height)
 /* Create a map box for each tissue (bar in the graph) or a single map for squish/dense modes */