be693595e590bce1d7d9b5e0d34451047c3170f2
chmalee
  Thu Jul 30 15:52:52 2020 -0700
Add 2 new color options for trios, color by predicted functional affect and de novo child variants, refs #25983

diff --git src/hg/hgTracks/vcfTrack.c src/hg/hgTracks/vcfTrack.c
index 425adcf..836efc5 100644
--- src/hg/hgTracks/vcfTrack.c
+++ src/hg/hgTracks/vcfTrack.c
@@ -2469,43 +2469,107 @@
                 c->next = c->next->next;
             else
                 c = c->next;
             }
         }
     }
 
 // now fill out a distance matrix based on the cwaDistance between all the pairs in hapArray
 struct hapDistanceMatrix *hapDistMatrix = fillOutDistanceMatrix(hapArray, vcff, sample, &helper, gtCount);
 unsigned int hapCount = 2 * gtCount;
 unsigned int *hapOrder = needMem(sizeof(unsigned int) * hapCount);
 setHapOrderFromMatrix(hapOrder, hapCount, hapDistMatrix, hapArray, vcff, sample, sampleDrawOrder);
 return hapOrder;
 }
 
-#define VCFPHASED_UNPHASED_COLOR MG_RED
+static void setTransmitOrder(unsigned int *gtHapOrder, char **transmitOrder, int gtHapCount, struct vcfRecord *rec, char **sampleOrder, char *childSample)
+/* Fill out an array of "transmitted", "untransmitted" strings for mouseover and color lookup */
+{
+int i;
+int childSampleIx = stringArrayIx(childSample, sampleOrder, gtHapCount / 2);
+for (i = 0; i < gtHapCount; i++)
+    {
+    int gtIx = gtHapOrder[i];
+    struct vcfGenotype *gt = &(rec->genotypes[gtIx >> 1]);
+    int alleleSampleIx = stringArrayIx(gt->id, sampleOrder, gtHapCount / 2);
+    if (alleleSampleIx != childSampleIx)
+        if (alleleSampleIx > childSampleIx)
+            {
+            transmitOrder[i] = i % 2 == 0 ? "transmitted" : "untransmitted";
+            }
+        else
+            {
+            transmitOrder[i] = i % 2 == 0 ? "untransmitted" : "transmitted";
+            }
+    else
+        transmitOrder[i] = "child";
+    }
+for (i = 0; i < gtHapCount; i++)
+    fprintf(stderr, "%s, ", transmitOrder[i]);
+fprintf(stderr, "\n");
+}
+
+enum phasedColorMode
+    {
+    noColorMode,
+    mendelDiffMode,
+    deNovoOnlyMode,
+    phasedFunctionMode
+    };
 
+static enum phasedColorMode getPhasedColorMode(struct trackDb *tdb)
+/* Get the coloring mode for phased trio tracks */
+{
+enum phasedColorMode colorMode = noColorMode;
+char *colorBy = cartOrTdbString(cart, tdb, VCF_PHASED_COLORBY_VAR, VCF_PHASED_COLORBY_VAR);
+if (sameString(colorBy, VCF_PHASED_COLORBY_MENDEL_DIFF))
+    colorMode = mendelDiffMode;
+else if (sameString(colorBy, VCF_PHASED_COLORBY_DE_NOVO))
+    colorMode = deNovoOnlyMode;
+else
+    {
+    char *geneTrack = cartOrTdbString(cart, tdb, "geneTrack", NULL);
+    if (isNotEmpty(geneTrack) && sameString(colorBy, VCF_PHASED_COLORBY_FUNCTION))
+        colorMode = phasedFunctionMode;
+    }
+return colorMode;
+}
 
-static int getChildAlleleColor(struct track *track, struct vcfRecord *rec, int childHapIx,
-                               int alleleIx, int nameIx, int gtHapCount, unsigned int *gtHapOrder,
-                               char *sampleOrder[])
-/* Return the color we should use for this variant, depending on whether the allele
- * of the child matches what the parent transmitted or if there was some kind of
- * 'mendelian inconsistency' */
+static int getTickColor(struct track *track, struct vcfRecord *rec, int alleleIx, int nameIx, int gtHapCount, unsigned int *gtHapOrder, char *childSampleName, char *sampleOrder[], enum phasedColorMode colorMode, enum soTerm funcTerm)
+/* Return the color we should use for this variant */
 {
 // if there are no parents to draw then no concept of transmitted vs unstransmitted
+Color col = MG_BLACK;
+if (colorMode == noColorMode)
+    col = MG_BLACK;
+else if (colorMode == phasedFunctionMode)
+    {
+    col = colorFromSoTerm(funcTerm);
+    }
+else
+    {
     if (gtHapCount <= 2)
-    return MG_BLACK;
+        col = MG_BLACK;
+    else
+        {
+        char *sampleName = sampleOrder[nameIx];
+        if (!sameString(sampleName, childSampleName)) // parents only have shading in funcMode
+            col = MG_BLACK;
+        else // maybe draw child ticks red
+            {
+            struct vcfGenotype *childGt = &(rec->genotypes[gtHapOrder[alleleIx] >> 1]);
+            int childHapIx = gtHapOrder[alleleIx] & 1 ? childGt->hapIxB : childGt->hapIxA;
             int parIx = alleleIx;
             int numSamples = gtHapCount / 2;
             boolean isEven = (alleleIx % 2) == 0;
             const int adjacentLineSet = 1;
             // how many haplotype "lines" away is the second parent if we are drawing in either
             // parent1,parent2,child or child,parent1,parent2 order:
             const int otherLineSet = 4;
             if (nameIx == 0)
                 {
                 if (!isEven)
                     parIx = alleleIx + adjacentLineSet;
                 else if (numSamples > 2)
                         parIx = alleleIx + otherLineSet;
                 }
             else if (nameIx == 1)
@@ -2513,85 +2577,103 @@
                 if (isEven)
                     parIx = alleleIx - adjacentLineSet;
                 else if (numSamples > 2) // don't compare the allele the child to a missing parent
                     parIx = alleleIx + adjacentLineSet;
                 }
             else
                 {
                 if (isEven)
                     parIx = alleleIx - adjacentLineSet;
                 else
                     parIx = alleleIx - otherLineSet;
                 }
             struct vcfGenotype *parentGt = &(rec->genotypes[gtHapOrder[parIx] >> 1]);
             if (parentGt->isPhased || (parentGt->hapIxA == 1 && parentGt->hapIxB == 1))
                 {
-    int parentAlleleIx = gtHapOrder[parIx] & 1 ? parentGt->hapIxB : parentGt->hapIxA;
-    if (rec->alleles[childHapIx] != rec->alleles[parentAlleleIx]) // if they disagree mark as red
-        return MG_RED;
+                int transmittedIx = gtHapOrder[parIx] & 1 ? parentGt->hapIxB : parentGt->hapIxA;
+                int untransmittedIx = gtHapOrder[parIx] & 1 ? parentGt->hapIxA : parentGt->hapIxB;
+                col = MG_BLACK;
+                if (colorMode == mendelDiffMode)
+                    {
+                    if (rec->alleles[childHapIx] != rec->alleles[transmittedIx] &&
+                        rec->alleles[childHapIx] == rec->alleles[untransmittedIx])
+                        col = MG_RED;
+                    }
                 else
-        return MG_BLACK;
+                    {
+                    if (rec->alleles[childHapIx] != rec->alleles[transmittedIx] &&
+                        rec->alleles[childHapIx] != rec->alleles[untransmittedIx])
+                        col = MG_RED;
+                    }
+                }
             }
-return MG_BLACK;
+        }
+    }
+return col;
 }
 
-void vcfPhasedDrawOneRecord(struct track *track, struct hvGfx *hvg, struct vcfRecord *rec, void *item,
-                            unsigned int *gtHapOrder, int gtHapCount, int xOff, int yOffsets[],
-                            char *sampleOrder[], char *childSample, boolean highlightChildDiffs,
-                            double scale)
+void vcfPhasedDrawOneRecord(struct track *track, struct hvGfx *hvg, struct vcfRecord *rec,
+                            void *item, unsigned int *gtHapOrder, int gtHapCount, int xOff,
+                            int yOffsets[], char *sampleOrder[], char *childSample, double scale,
+                            char *transmitOrder[], enum phasedColorMode colorMode,
+                            enum soTerm funcTerm)
 // Draw a record's haplotypes on the appropriate lines
 {
 int i;
 int x1 = round((double)(rec->chromStart-winStart)*scale) + xOff;
 int x2 = round((double)(rec->chromEnd-winStart)*scale) + xOff;
 struct pgSnpVcfStartEnd *psvs = (struct pgSnpVcfStartEnd *)item;
 int w = x2-x1;
 if (w < 1)
     w = 1;
 int tickHeight = track->itemHeight(track, track->items);
 for (i = 0; i < gtHapCount ; i++)
     {
     struct vcfGenotype *gt = &(rec->genotypes[gtHapOrder[i] >> 1]);
     int nameIx = stringArrayIx(gt->id, sampleOrder, track->customInt);
     struct dyString *mouseover = dyStringNew(0);
+    int tickColor = getTickColor(track, rec, i, nameIx, gtHapCount, gtHapOrder, childSample, sampleOrder, colorMode, funcTerm);
     if (gt->isPhased || (gt->hapIxA == 1 && gt->hapIxB == 1)) // if phased or homozygous alt
         {
         int alIx = gtHapOrder[i] & 1 ? gt->hapIxB : gt->hapIxA;
-        int tickColor = MG_BLACK;
-        if (sameString(childSample, gt->id) && highlightChildDiffs)
-            tickColor = getChildAlleleColor(track, rec, alIx, i, nameIx, gtHapCount, gtHapOrder, sampleOrder);
         dyStringPrintf(mouseover, "%s ", gt->id);
         if (alIx != 0) // non-reference allele
             {
             if (sameString(childSample, gt->id)) // we're drawing child ticks
                 {
                 dyStringPrintf(mouseover, "allele: %s", rec->alleles[alIx]);
                 }
             else
                 {
-                dyStringPrintf(mouseover, "%s allele: %s", i == 0 || i == 5 ? "untransmitted" : "transmitted", rec->alleles[alIx]);
+                if (gt->isPhased)
+                    dyStringPrintf(mouseover, "likely %s allele: %s", transmitOrder[i], rec->alleles[alIx]);
+                else
+                    {
+                    int otherIx = (alIx == gt->hapIxB ? gt->hapIxA : gt->hapIxB);
+                    dyStringPrintf(mouseover, "unphased alleles: %s/%s", rec->alleles[alIx], rec->alleles[otherIx]);
+                    }
                 }
             int y = yOffsets[i] - (tickHeight/2);
             hvGfxBox(hvg, x1, y, w, tickHeight, tickColor);
             vcfPhasedAddMapBox(hvg, rec, psvs, mouseover->string, x1, y, w, tickHeight, track);
             }
         }
     else if (gt->hapIxA != 0 || gt->hapIxB != 0)// draw the tick between the two haplotype lines
         {
         int yMid = ((yOffsets[2*nameIx] + yOffsets[(2*nameIx)+1]) / 2); // midpoint of two haplotype lines
-        hvGfxBox(hvg, x1, yMid - (tickHeight / 2), w, tickHeight, VCFPHASED_UNPHASED_COLOR);
-        dyStringPrintf(mouseover, "%s Unphased genotype: %s/%s", gt->id, rec->alleles[0], rec->alleles[1]);
+        hvGfxBox(hvg, x1, yMid - (tickHeight / 2), w, tickHeight, tickColor);
+        dyStringPrintf(mouseover, "%s unphased genotype: %s/%s", gt->id, rec->alleles[0], rec->alleles[1]);
         vcfPhasedAddMapBox(hvg, rec, psvs, mouseover->string, x1, yMid, w, tickHeight, track);
         }
     }
 }
 
 static void vcfPhasedAddLabel(struct track *track, struct hvGfx *hvg, char *label, int trackY, int labelY, MgFont *font, Color color)
 // Add a VCF Sample name to the left label of the haplotype representation
 {
 int clipXBak, clipYBak, clipWidthBak, clipHeightBak;
 struct hvGfx *hvgLL = (hvgSide != NULL) ? hvgSide : hvg;
 hvGfxGetClip(hvgLL, &clipXBak, &clipYBak, &clipWidthBak, &clipHeightBak);
 hvGfxUnclip(hvgLL);
 hvGfxSetClip(hvgLL, leftLabelX, trackY, leftLabelWidth, track->height);
 
 hvGfxTextRight(hvgLL, leftLabelX, labelY, leftLabelWidth, track->lineHeight, color,
@@ -2645,61 +2727,74 @@
         useAliasLabel && hasAlias ? (char *)name->val : "");
     vcfPhasedAddLabel(track, hvg, label->string, yOff, round(((y1 + y2) / 2) - (track->lineHeight / 2)), font, MG_BLACK);
     }
 }
 
 static void vcfPhasedDrawItems(struct track *track, int seqStart, int seqEnd,
 			      struct hvGfx *hvg, int xOff, int yOff, int width,
 			      MgFont *font, Color color, enum trackVisibility vis)
 /* Split samples' chromosomes (haplotypes), cluster them by parents, and
  * draw them all along a line representing each chromosome*/
 {
 struct vcfFile *vcff = track->extraUiData;
 if (vcff->records == NULL)
     return;
 
+struct txInfo *txiList = NULL;
+struct seqWindow *gSeqWin = chromSeqWindowNew(database, chromName, seqStart, seqEnd);
+enum phasedColorMode colorMode = getPhasedColorMode(track->tdb);
+if (colorMode == phasedFunctionMode)
+    txiList = txInfoLoad(gSeqWin, track->tdb);
 const double scale = scaleForPixels(width);
 boolean hideOtherSamples = cartUsualBooleanClosestToHome(cart, track->tdb, FALSE, VCF_PHASED_HIDE_OTHER_VAR, FALSE);
 struct slPair *pair, *sampleNames = vcfPhasedGetSampleOrder(cart, track->tdb, FALSE, hideOtherSamples);
 int gtCount = slCount(sampleNames);
-int yOffsets[gtCount * 2]; // y offsets of each haplotype line
+int gtHapCount = gtCount * 2;
+int yOffsets[gtHapCount]; // y offsets of each haplotype line
 char *sampleOrder[gtCount]; // order of sampleName lines
 int i;
 for (pair = sampleNames, i = 0; pair != NULL && i < gtCount; pair = pair->next, i++)
     sampleOrder[i] = pair->name;
 
 char *childSample = cloneString(trackDbSetting(track->tdb, VCF_PHASED_CHILD_SAMPLE_SETTING));
 char *pt = strchr(childSample, '|');
 if (pt != NULL)
     *pt = '\0';
 
 // set up the "haplotype" lines and the transparent yellow box to delineate samples
 vcfPhasedSetupHaplotypesLines(track, hvg, xOff, yOff, width, yOffsets, sampleNames, childSample, font);
 
 // maybe sort the variants by haplotype then draw ticks
-unsigned int *hapOrder = needMem(sizeof(short) * gtCount * 2);
+unsigned int *hapOrder = needMem(sizeof(short) * gtHapCount);
 int nRecords = slCount(vcff->records);
 int centerIx = getCenterVariantIx(track, seqStart, seqEnd, vcff->records);
 int startIx = 0;
 int endIx = nRecords;
 hapOrder = computeHapDist(vcff, centerIx, startIx, endIx, childSample, gtCount, sampleOrder);
-boolean highlightChildDiffs = cartUsualBooleanClosestToHome(cart, track->tdb, FALSE, VCF_PHASED_HIGHLIGHT_INCONSISTENT, FALSE);
 struct vcfRecord *rec = NULL;
 struct slList *item = NULL;
+
+// array of "trans", "untrans", etc for mouseovers
+char *transmitOrder[gtHapCount];
+setTransmitOrder(hapOrder, transmitOrder, gtHapCount, vcff->records, sampleOrder, childSample);
+
 for (rec = vcff->records, item = track->items; rec != NULL && item != NULL; rec = rec->next, item = item->next)
     {
-    vcfPhasedDrawOneRecord(track, hvg, rec, item, hapOrder, gtCount * 2, xOff, yOffsets, sampleOrder, childSample, highlightChildDiffs, scale);
+    enum soTerm funcTerm = soUnknown;
+    if (colorMode == phasedFunctionMode)
+        funcTerm = functionForRecord(rec, gSeqWin, txiList);
+    vcfPhasedDrawOneRecord(track, hvg, rec, item, hapOrder, gtHapCount, xOff, yOffsets, sampleOrder, childSample, scale, transmitOrder, colorMode, funcTerm);
     }
 }
 
 static int vcfPhasedItemHeight(struct track *tg, void *item)
 {
 return tg->heightPer;
 }
 
 int vcfPhasedTrackHeight(struct track *tg, enum trackVisibility vis)
 {
 const struct vcfFile *vcff = tg->extraUiData;
 // when doing composite track height, vcfPhasedLoadItems won't have been called yet!
 if (vis == tvDense)
     return pgSnpHeight(tg, vis);
 if (!vcff || vcff->records == NULL)