9174a163b341bfd2dd5c8fe84952265e74bef32a angie Sun Sep 27 23:50:14 2020 -0700 Added setting sampleColorFile so one or more sample coloring schemes can be offered for the tree in VCF+tree tracks. refs #26177 TODO: document the new setting diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c index b340224..4a4795c 100644 --- src/hg/hgTracks/vcfTrack.c +++ src/hg/hgTracks/vcfTrack.c @@ -1922,127 +1922,222 @@ coords->rank = (minRank + maxRank) / 2.0; coords->depth = maxDepth + 1; } else { // Leaf (sample) double rankStart = leafOrderToHapOrderStart[leafIx]; double rankEnd = leafOrderToHapOrderStart[leafIx]; coords->rank = (rankStart + rankEnd) / 2.0; leafIx++; coords->depth = 0; } return leafIx; } -static void rDrawPhyloTreeInLabelArea(struct phyloTree *node, struct hvGfx *hvg, int x, +static int colorCmp(const void *pa, const void *pb) +/* Compare two colors for sorting by numeric value. */ +{ +const Color ca = *(Color *)pa; +const Color cb = *(Color *)pb; +return ca - cb; +} + +static Color colorFromChildColors(Color *childColors, int childCount, Color defaultCol) +/* If the majority of children have the same color, then return that color, otherwise defaultCol. */ +{ +Color childColCopy[childCount]; +memcpy(childColCopy, childColors, sizeof childColCopy); +qsort(childColCopy, childCount, sizeof(*childColCopy), colorCmp); +Color maxRunColor = childCount > 0 ? childColors[0] : defaultCol; +int runLength = 1, maxRunLength = 1; +int ix; +for (ix = 1; ix < childCount; ix++) + { + if (childColors[ix] == childColors[ix-1]) + { + runLength++; + } + else + { + if (runLength > maxRunLength) + { + maxRunLength = runLength; + maxRunColor = childColors[ix-1]; + } + runLength = 1; + } + } +if (runLength > maxRunLength) + { + maxRunLength = runLength; + maxRunColor = childColors[ix-1]; + } +if (maxRunLength > (childCount>>1)) + return maxRunColor; +return defaultCol; +} + +static Color rDrawPhyloTreeInLabelArea(struct phyloTree *node, struct hvGfx *hvg, int x, int yOff, double pxPerHap, MgFont *font, - struct hash *highlightSamples) + struct hash *highlightSamples, struct hash *sampleColors) /* Recursively draw the tree in the left label area. */ { const int branchW = 4; int labelEnd = leftLabelX + leftLabelWidth; +Color color = MG_BLACK; +if (!sampleColors) + { // Misuse the branch length value as RGB color (if it's the typical small number, will still // draw as approximately black): unsigned int rgb = node->ident->length; -Color color = MAKECOLOR_32( ((rgb>>16)&0xff), ((rgb>>8)&0xff), (rgb&0xff) ); - + color = MAKECOLOR_32( ((rgb>>16)&0xff), ((rgb>>8)&0xff), (rgb&0xff) ); + } if (node->numEdges > 0) { // Draw each child and a horizontal line to child int minY = -1, maxY = 0; + Color childColors[node->numEdges]; int ix; for (ix = 0; ix < node->numEdges; ix++) { struct phyloTree *child = node->edges[ix]; - rDrawPhyloTreeInLabelArea(child, hvg, x+branchW, yOff, pxPerHap, font, highlightSamples); + childColors[ix] = rDrawPhyloTreeInLabelArea(child, hvg, x+branchW, yOff, pxPerHap, font, + highlightSamples, sampleColors); struct nodeCoords *childCoords = child->priv; int childY = yOff + ((0.5 + childCoords->rank) * pxPerHap); - hvGfxLine(hvg, x, childY, x+branchW, childY, color); + hvGfxLine(hvg, x, childY, x+branchW, childY, childColors[ix]); if (minY < 0 || childY < minY) minY = childY; if (childY > maxY) maxY = childY; } // Draw a vertical line to connect the children + if (sampleColors != NULL) + color = colorFromChildColors(childColors, node->numEdges, color); hvGfxLine(hvg, x, minY, x, maxY, color); } else { // Leaf node -- draw a horizontal line, and label if there is space to right of tree struct nodeCoords *coords = node->priv; int yLine = yOff + ((0.5 + coords->rank) * pxPerHap); int yBox = yLine - pxPerHap / 2; int yText = yLine - tl.fontHeight / 2; // Dunno why but the default font seems to draw with the baseline at y while the other fonts // draw with the mid line at y. if (sameOk(tl.textSize, "8")) yText += 2; if (highlightSamples && node->ident->name && hashLookup(highlightSamples, node->ident->name)) hvGfxBox(hvg, leftLabelX, yBox, leftLabelWidth, pxPerHap, MAKECOLOR_32_A(170, 255, 255, 128)); + if (sampleColors != NULL) + color = (Color)hashIntValDefault(sampleColors, node->ident->name, MG_BLACK); hvGfxLine(hvg, x, yLine, x+branchW, yLine, color); int textX = x + branchW + 3; if (pxPerHap >= tl.fontHeight+1 && textX < labelEnd) hvGfxText(hvg, textX, yText, MG_BLACK, font, node->ident->name); } +return color; } static void drawPhyloTreeInLabelArea(struct phyloTree *tree, struct hvGfx *hvg, int yOff, int clipHeight, int gtHapCount, - MgFont *font, struct hash *highlightSamples) + MgFont *font, struct hash *highlightSamples, + struct hash *sampleColors) { struct hvGfx *hvgLL = (hvgSide != NULL) ? hvgSide : hvg; int clipXBak, clipYBak, clipWidthBak, clipHeightBak; hvGfxGetClip(hvgLL, &clipXBak, &clipYBak, &clipWidthBak, &clipHeightBak); hvGfxUnclip(hvgLL); hvGfxSetClip(hvgLL, leftLabelX, yOff, leftLabelWidth, clipHeight); // Draw the tree: int x = leftLabelX; double pxPerHap = (double)clipHeight / gtHapCount; -rDrawPhyloTreeInLabelArea(tree, hvgLL, x, yOff, pxPerHap, font, highlightSamples); +rDrawPhyloTreeInLabelArea(tree, hvgLL, x, yOff, pxPerHap, font, highlightSamples, sampleColors); // Restore the prior clipping: hvGfxUnclip(hvgLL); hvGfxSetClip(hvgLL, clipXBak, clipYBak, clipWidthBak, clipHeightBak); } static void rHighlightSampleRows(struct phyloTree *node, struct hvGfx *hvg, int yOff, double pxPerHap, struct hash *highlightSamples) /* For each leaf node, if it is in highlightSamples then draw a highlight box for it. */ { if (node->numEdges > 0) { int ix; for (ix = 0; ix < node->numEdges; ix++) { struct phyloTree *child = node->edges[ix]; rHighlightSampleRows(child, hvg, yOff, pxPerHap, highlightSamples); } } else { // leaf node; highlight if it's in highlightSamples if (node->ident->name && hashLookup(highlightSamples, node->ident->name)) { struct nodeCoords *coords = node->priv; int y = yOff + (coords->rank * pxPerHap); hvGfxBox(hvg, insideX, y, insideWidth, pxPerHap, MAKECOLOR_32_A(170, 255, 255, 128)); } } } -struct hash *getHighlightSamples(struct trackDb *tdb) +static struct hash *getSampleColors(struct trackDb *tdb) +/* Return a hash of sample names to colors if specified in tdb, or NULL if none specified. */ +{ +struct hash *sampleColors = NULL; +char *setting = cartOrTdbString(cart, tdb, VCF_SAMPLE_COLOR_FILE, NULL); +if (isNotEmpty(setting)) + { + // If the setting has not been set in the cart then we're getting the trackDb setting which + // may specify a list of files and possibly labels like "Thing_one=file1 Thing_two=file2". + // In that case, pick out the first file. + if (strchr(setting, '=') || strchr(setting, ' ')) + { + setting = nextWord(&setting); + char *eq = (strchr(setting, '=')); + if (eq) + setting = eq+1; + } + char *fileName = hReplaceGbdb(setting); + struct lineFile *lf = lineFileUdcMayOpen(fileName, TRUE); + if (lf) + { + sampleColors = hashNew(0); + char *line; + while (lineFileNextReal(lf, &line)) + { + char *words[2]; + chopTabs(line, words); + char *sample = words[0]; + char *colorStr = words[1]; + int rgb = bedParseColor(colorStr); + Color color = MAKECOLOR_32( ((rgb>>16)&0xff), ((rgb>>8)&0xff), (rgb&0xff) ); + hashAddInt(sampleColors, sample, color); + } + lineFileClose(&lf); + } + else + warn("Can't open sampleColorFile '%s'", fileName); + } +return sampleColors; +} + +static struct hash *getHighlightSamples(struct trackDb *tdb) /* Return a hash of node IDs to highlight in the phylo tree display, or NULL if none specified. */ { struct hash *highlightSamples = NULL; char *setting = cartOrTdbString(cart, tdb, "highlightIds", NULL); if (isNotEmpty(setting)) { struct slName *list = slNameListFromComma(setting); highlightSamples = hashFromSlNameList(list); } return highlightSamples; } static void vcfGtHapTreeFileDraw(struct track *tg, int seqStart, int seqEnd, struct hvGfx *hvg, int xOff, int yOff, int width, MgFont *font, Color color, enum trackVisibility vis) @@ -2068,31 +2163,33 @@ struct hash *highlightSamples = getHighlightSamples(tg->tdb); if (highlightSamples) { double pxPerHap = (double)hapHeight / gtHapCount; rHighlightSampleRows(tree, hvg, yOff+extraPixel, pxPerHap, highlightSamples); } struct vcfRecord *rec; for (rec = vcff->records; rec != NULL; rec = rec->next) { enum soTerm funcTerm = soUnknown; if (colorMode == functionMode) funcTerm = functionForRecord(rec, gSeqWin, txiList); drawOneRec(rec, gtHapOrder, gtHapCount, tg, hvg, xOff, yOff, width, FALSE, FALSE, colorMode, funcTerm); } -drawPhyloTreeInLabelArea(tree, hvg, yOff+extraPixel, hapHeight, gtHapCount, font, highlightSamples); +struct hash *sampleColors = getSampleColors(tg->tdb); +drawPhyloTreeInLabelArea(tree, hvg, yOff+extraPixel, hapHeight, gtHapCount, font, highlightSamples, + sampleColors); drawSampleTitles(vcff, yOff+extraPixel, hapHeight, gtHapOrder, gtHapCount, tg->track); } static int vcfHapClusterTotalHeight(struct track *tg, enum trackVisibility vis) /* Return height of haplotype graph (2 * #samples * lineHeight); * 2 because we're assuming diploid genomes here, no XXY, tetraploid etc. */ { const struct vcfFile *vcff = tg->extraUiData; if (vcff->records == NULL) return 0; int ploidy = sameString(chromName, "chrY") ? 1 : 2; int simpleHeight = ploidy * vcff->genotypeCount * tg->lineHeight; int defaultHeight = min(simpleHeight, VCF_DEFAULT_HAP_HEIGHT); char *tdbHeight = trackDbSettingOrDefault(tg->tdb, VCF_HAP_HEIGHT_VAR, NULL); if (isNotEmpty(tdbHeight))