d3752edc12da1bf08427946150f564dbdd5d2254
angie
  Thu Oct 24 13:55:51 2019 -0700
bigDbSnp track handler code - initial commit.  refs #23283
* dnautil: Added trimRefAltLeft to get left-justified trimming (a la VCF not HGVS).
* bigBedClick: do hReplaceGbdb up front in parseDetailsTablUrls instead of waiting until endpoint.
* trackDbCustom.c: consolidating type-handling for wig/bigWig vs. bigBed-based big*.

diff --git src/hg/hgTracks/variation.c src/hg/hgTracks/variation.c
index 3e0006f..c714c0b 100644
--- src/hg/hgTracks/variation.c
+++ src/hg/hgTracks/variation.c
@@ -1,24 +1,28 @@
 /* variation.c - hgTracks routines that are specific to the tracks in
  * the variation group */
 
 /* Copyright (C) 2014 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "variation.h"
 #include "imageV2.h"
-
+#include "bedCart.h"
+#include "bigBed.h"
+#include "bigDbSnp.h"
+#include "hvGfx.h"
+#include "soTerm.h"
 
 static double snp125AvHetCutoff = SNP125_DEFAULT_MIN_AVHET;
 static int snp125WeightCutoff = SNP125_DEFAULT_MAX_WEIGHT;
 static int snp132MinSubmitters = SNP132_DEFAULT_MIN_SUBMITTERS;
 static float snp132MinMinorAlFreq = SNP132_DEFAULT_MIN_MINOR_AL_FREQ;
 static float snp132MaxMinorAlFreq = SNP132_DEFAULT_MAX_MINOR_AL_FREQ;
 static int snp132MinAlFreq2N = SNP132_DEFAULT_MIN_AL_FREQ_2N;
 
 // Globals for caching cart coloring and filtering settings for snp125+ tracks:
 static enum snp125ColorSource snp125ColorSource = SNP125_DEFAULT_COLOR_SOURCE;
 static enum snp125Color *snp125LocTypeCart = NULL;
 static enum snp125Color *snp125ClassCart = NULL;
 static enum snp125Color *snp125MolTypeCart = NULL;
 static enum snp125Color *snp125ValidCart = NULL;
 static struct hash *snp125FuncCartColorHash = NULL;
@@ -2524,15 +2528,486 @@
 tg->itemColor = delConrad2ItemColor;
 tg->itemNameColor = delConrad2ItemColor;
 }
 
 void delMccarrollMethods(struct track *tg)
 {
 tg->itemColor = delMccarrollItemColor;
 tg->itemNameColor = delMccarrollItemColor;
 }
 
 void delHindsMethods(struct track *tg)
 {
 tg->itemColor = delHindsItemColor;
 tg->itemNameColor = delHindsItemColor;
 }
+
+// Functional impact coloring scheme with lf->filterColor values (hex RGB)
+#define bigDbSnpColorCodingChange 0x00ff0000
+#define bigDbSnpColorSyn 0x0000ff00
+#define bigDbSnpColorUtrNc 0x000000ff
+
+static int lfColorFromSoTerm(enum soTerm term)
+/* Assign a lf->filterColor value (hex RGB) according to soTerm. */
+{
+int color = 0;
+switch (term)
+    {
+    case frameshift_variant:
+    case frameshift:
+    case initiator_codon_variant:
+    case stop_gained:
+    case stop_lost:
+    case splice_acceptor_variant:
+    case splice_donor_variant:
+    case inframe_indel:
+    case inframe_insertion:
+    case inframe_deletion:
+    case missense_variant:
+    case terminator_codon_variant:
+        color = bigDbSnpColorCodingChange;
+        break;
+    case synonymous_variant:
+        color = bigDbSnpColorSyn;
+        break;
+    case _5_prime_UTR_variant:
+    case _3_prime_UTR_variant:
+    case nc_transcript_variant:
+        color = bigDbSnpColorUtrNc;
+        break;
+    default:
+        ;
+        // Leave color at default black
+    }
+return color;
+}
+
+static void removeMinMafFromCart(struct trackDb *tdb)
+/* Remove track's minimum Minor Allele Frequency filter settings from cart. */
+{
+char *cartVar = NULL;
+cartLookUpVariableClosestToHome(cart, tdb, FALSE, "minMaf", &cartVar);
+if (cartVar)
+    cartRemove(cart, cartVar);
+cartLookUpVariableClosestToHome(cart, tdb, FALSE, "freqProj", &cartVar);
+if (cartVar)
+    cartRemove(cart, cartVar);
+}
+
+static int getFreqSourceIx(struct trackDb *tdb)
+/* If trackDb setting freqSourceOrder is defined then return the index of the currently selected
+ * project (default: first project). */
+{
+char *freqSourceOrder = trackDbSetting(tdb, "freqSourceOrder");
+int ix = 0;
+if (isNotEmpty(freqSourceOrder))
+    {
+    char *freqProj = cartOptionalStringClosestToHome(cart, tdb, FALSE, "freqProj");
+    if (isNotEmpty(freqProj))
+        {
+        freqSourceOrder = cloneString(freqSourceOrder);
+        int fsCount = countSeparatedItems(freqSourceOrder, ',');
+        char *sources[fsCount];
+        chopCommas(freqSourceOrder, sources);
+        for (ix = 0;  ix < fsCount;  ix++)
+            {
+            if (sameString(sources[ix], freqProj))
+                break;
+            }
+        if (ix == fsCount)
+            {
+            // Unrecognized project; clear from cart.
+            warn("%s: unrecognized frequency source project '%s'; "
+                 "removing minimum MAF filter from cart.", tdb->shortLabel, freqProj);
+            removeMinMafFromCart(tdb);
+            ix = -1;
+            }
+        freeMem(freqSourceOrder);
+        }
+    }
+return ix;
+}
+
+static int bdsRefAltSlashSepLen(struct bigDbSnp *bds)
+/* Return length of abbreviated slash-separated allele string (ref/alt1/alt2/...) */
+{
+char abbrev[16];
+int len = strlen(bigDbSnpAbbrevAllele(bds->ref, abbrev, sizeof abbrev));
+int i;
+for (i = 0;  i < bds->altCount;  i++)
+    len += 1 + strlen(bigDbSnpAbbrevAllele(bds->alts[i], abbrev, sizeof abbrev));
+return len;
+}
+
+static void bdsAppendRefAlt(struct bigDbSnp *bds, struct dyString *dy)
+/* Append abbreviated ref and alt alleles to dy */
+{
+dyStringAppendSep(dy, " ");
+char abbrev[24];
+dyStringAppend(dy, bigDbSnpAbbrevAllele(bds->ref, abbrev, sizeof abbrev));
+int i;
+for (i = 0;  i < bds->altCount;  i++)
+    {
+    dyStringPrintf(dy, "/%s",
+                   bigDbSnpAbbrevAllele(bds->alts[i], abbrev, sizeof abbrev));
+    }
+}
+
+static int bdsMajMinSlashSepLen(struct bigDbSnp *bds, int freqSourceIx)
+/* Return length of abbreviated slash-separated allele string (major allele/minor allele),
+ * or -1 if not available. */
+{
+if (bds->freqSourceCount > freqSourceIx && isfinite(bds->minorAlleleFreq[freqSourceIx]))
+    {
+    char abbrev[16];
+    int len = strlen(bigDbSnpAbbrevAllele(bds->majorAllele[freqSourceIx],
+                                          abbrev, sizeof abbrev));
+    len += 1 + strlen(bigDbSnpAbbrevAllele(bds->minorAllele[freqSourceIx],
+                                           abbrev, sizeof abbrev));
+    return len;
+    }
+return -1;
+}
+
+static void bdsAppendMajMin(struct bigDbSnp *bds, struct dyString *dy, int freqSourceIx)
+/* Append abbreviated major and minor (i.e. 2nd most frequent) alleles to dy if available. */
+{
+if (bds->freqSourceCount > freqSourceIx && isfinite(bds->minorAlleleFreq[freqSourceIx]))
+    {
+    dyStringAppendSep(dy, " ");
+    char abbrev[24];
+    dyStringAppend(dy, bigDbSnpAbbrevAllele(bds->majorAllele[freqSourceIx], abbrev, sizeof abbrev));
+    dyStringPrintf(dy, "/%s",
+                   bigDbSnpAbbrevAllele(bds->minorAllele[freqSourceIx], abbrev, sizeof abbrev));
+    }
+}
+
+static void bdsAppendMaf(struct bigDbSnp *bds, int freqSourceIx, struct dyString *dy)
+/* If frequency data are available for the currently selected source then append minor allele freq
+ * to dy. */
+{
+if (bds->freqSourceCount > freqSourceIx && isfinite(bds->minorAlleleFreq[freqSourceIx]))
+    {
+    dyStringAppendSep(dy, " ");
+    dyStringPrintf(dy, "%f", bds->minorAlleleFreq[freqSourceIx]);
+    }
+}
+
+static void bdsAppendFunc(struct bigDbSnp *bds, struct dyString *dy)
+/* Append maximum functional impact (if any) to dy. */
+{
+if (bds->maxFuncImpact > 0)
+    {
+    dyStringAppendSep(dy, " ");
+    dyStringAppend(dy, soTermToString(bds->maxFuncImpact));
+    }
+}
+
+static char *bdsLabel(struct trackDb *tdb, struct bigDbSnp *bds)
+/* Simulate itemName with options... */
+{
+struct dyString *dy = dyStringNew(0);
+if (cartUsualBooleanClosestToHome(cart, tdb, FALSE, "label.rsId", TRUE))
+    dyStringAppend(dy, bds->name);
+boolean tooLong = FALSE;
+if (cartUsualBooleanClosestToHome(cart, tdb, FALSE, "label.refAlt", TRUE))
+    {
+    if (bdsRefAltSlashSepLen(bds) < 24)
+        bdsAppendRefAlt(bds, dy);
+    else
+        tooLong = TRUE;
+    }
+int freqSourceIx = getFreqSourceIx(tdb);
+if (cartUsualBooleanClosestToHome(cart, tdb, FALSE, "label.majMin", FALSE))
+    {
+    if (bdsMajMinSlashSepLen(bds, freqSourceIx) < 24)
+        bdsAppendMajMin(bds, dy, freqSourceIx);
+    else
+        tooLong = TRUE;
+    }
+if (cartUsualBooleanClosestToHome(cart, tdb, FALSE, "label.maf", FALSE))
+    {
+    bdsAppendMaf(bds, freqSourceIx, dy);
+    }
+// If the alleles are too long, print the class as a backup.
+if (tooLong)
+    dyStringPrintf(dy, " %s", bigDbSnpClassToString(bds->class));
+if (cartUsualBooleanClosestToHome(cart, tdb, FALSE, "label.func", FALSE))
+    bdsAppendFunc(bds, dy);
+return dyStringCannibalize(&dy);
+}
+
+static char *bdsMouseOver(struct bigDbSnp *bds)
+/* Simulate mouseover with options... */
+{
+struct dyString *dy = dyStringCreate("%s:", bds->name);
+bdsAppendRefAlt(bds, dy);
+bdsAppendFunc(bds, dy);
+return dyStringCannibalize(&dy);
+}
+
+static void paranoidCheckMnvLengths(struct bigDbSnp *bds)
+/* Make sure an mnv variant's alleles are all the same length. */
+{
+if (bds->class != bigDbSnpMnv)
+    errAbort("paranoidCheckMnvLengths: expected %s class=bigDbSnpMnv=%d, got %d",
+             bds->name, bigDbSnpMnv, bds->class);
+int refLen = strlen(bds->ref);
+if (refLen != bds->chromEnd - bds->chromStart)
+    errAbort("paranoidCheckMnvLengths: %s: length of ref '%s' is %d but chromEnd - chromStart is %d",
+             bds->name, bds->ref, refLen, bds->chromEnd - bds->chromStart);
+int aIx;
+for (aIx = 0;  aIx < bds->altCount;  aIx++)
+    if (strlen(bds->alts[aIx]) != refLen)
+        errAbort("paranoidCheckMnvLengths: %s: length of ref '%s' is %d but alt[%d] '%s' is %d",
+                 bds->name, bds->ref, refLen, aIx, bds->alts[aIx], (int)strlen(bds->alts[aIx]));
+}
+
+static boolean alleleBasesMatch(struct bigDbSnp *bds, int i)
+/* Return TRUE if all alternate alleles match ref at base i. */
+{
+assert(i >= 0);
+boolean allMatch = TRUE;
+int aIx;
+for (aIx = 0;  aIx < bds->altCount;  aIx++)
+    {
+    if (bds->alts[aIx][i] != bds->ref[i])
+        {
+        allMatch = FALSE;
+        break;
+        }
+    }
+return allMatch;
+}
+
+static boolean filterMaf(struct bigDbSnp *bds, int freqSourceIx, double minMaf)
+/* Return TRUE if bds passes minimum Minor Allele Frequency filter. */
+{
+if (freqSourceIx < 0)
+    return TRUE;
+if (bds->freqSourceCount > freqSourceIx)
+    {
+    double maf = bds->minorAlleleFreq[freqSourceIx];
+    if (maf >= minMaf)
+        return TRUE;
+    }
+return FALSE;
+}
+
+struct linkedFeatures *lfFromBigDbSnp(struct trackDb *tdb, struct bigBedInterval *bb,
+                                      struct bigBedFilter *filters, int freqSourceIx)
+/* Convert one bigDbSnp item to a linkedFeatures for drawing if it passes filter, else NULL. */
+{
+struct linkedFeatures *lf = NULL;
+char startBuf[16], endBuf[16];
+char *bedRow[32];
+bigBedIntervalToRow(bb, chromName, startBuf, endBuf, bedRow, ArraySize(bedRow));
+if (bigBedFilterInterval(bedRow, filters))
+    {
+    struct bigDbSnp *bds = bigDbSnpLoad(bedRow);
+    double minMaf = cartUsualDoubleClosestToHome(cart, tdb, FALSE, "minMaf", 0.0);
+    if (! filterMaf(bds, freqSourceIx, minMaf))
+        return NULL;
+    AllocVar(lf);
+    lf->name = cloneString(bds->name);
+    AllocVar(lf->components);
+    lf->start = lf->components->start = bds->chromStart;
+    lf->tallStart = lf->start + bds->shiftBases;
+    lf->tallEnd = lf ->end = lf->components->end = bds->chromEnd;
+    lf->label = bdsLabel(tdb, bds);
+    lf->mouseOver = bdsMouseOver(bds);
+    lf->extra = (void *)USE_ITEM_RGB;   /* signal for coloring */
+    lf->filterColor = lfColorFromSoTerm(bds->maxFuncImpact);
+    lf->original = bds;
+    // MNVs in dbSNP are usually linked SNVs; if so, use one sf component for each SNV.
+    if (bds->class == bigDbSnpMnv && bds->chromEnd - bds->chromStart > 2)
+        {
+        paranoidCheckMnvLengths(bds);
+        int len = bds->chromEnd - bds->chromStart;
+        struct simpleFeature *sf = lf->components, *sfList = NULL;
+        int i;
+        for (i = 0;  i < len;  i++)
+            {
+            if (alleleBasesMatch(bds, i))
+                {
+                // If in a block, end the block.
+                if (sf != NULL)
+                    {
+                    sf->end = bds->chromStart + i;
+                    slAddHead(&sfList, sf);
+                    sf = NULL;
+                    }
+                }
+            else
+                {
+                // If in a block, continue it, else start a new block.
+                if (sf == NULL)
+                    {
+                    AllocVar(sf);
+                    sf->start = bds->chromStart + i;
+                    sf->end = bds->chromEnd;
+                    }
+                }
+            }
+        if (sf != NULL)
+            slAddHead(&sfList, sf);
+        slReverse(&sfList);
+        lf->components = sfList;
+        }
+    }
+return lf;
+}
+
+static double getMinMaf(struct trackDb *tdb)
+/* If minMaf is out of range, remove it from cart and return 0.0; otherwise return
+ * current value from cart. */
+{
+double minMaf = cartUsualDoubleClosestToHome(cart, tdb, FALSE, "minMaf", 0.0);
+if (minMaf < 0.0 || minMaf > 0.5)
+    {
+    warn("%s: invalid minimum Minor Allele Frequency %0.03f (range is 0.0 - 0.5); "
+         "removing minimum MAF filter from cart.", tdb->shortLabel, minMaf);
+    removeMinMafFromCart(tdb);
+    return 0.0;
+    }
+return minMaf;
+}
+
+static void bigDbSnpLoadItems(struct track *tg)
+/* Convert bigDbSnp items in window to linkedFeatures. */
+{
+struct linkedFeatures *lfList = NULL;
+double minMaf = getMinMaf(tg->tdb);
+// If minMaf is 0, there's no need to filter so set freqSourceIx to -1.
+int freqSourceIx = (minMaf == 0.0) ? -1 : getFreqSourceIx(tg->tdb);
+char *maxItemStr = trackDbSetting(tg->tdb, "maxItems");
+int maxItems = isNotEmpty(maxItemStr) ? atoi(maxItemStr) : 250000;
+bigBedAddLinkedFeaturesFromExt(tg, chromName, winStart, winEnd, freqSourceIx, 0, FALSE, 4, &lfList,
+                               maxItems);
+slReverse(&lfList);
+slSort(&lfList, linkedFeaturesCmp);
+tg->items = lfList;
+}
+
+static boolean bdsIsIndel(struct bigDbSnp *bds, int *retMinAltLen, int *retMaxAltLen)
+/* Set *ret{Min,Max}AltLen to the {least,greatest} alt allele length.
+ * Return TRUE if any alt is shorter or longer than ref. */
+{
+int refLen = strlen(bds->ref);
+int minAltLen = refLen, maxAltLen = refLen;
+int i;
+for (i = 0;  i < bds->altCount;  i++)
+    {
+    int altLen = strlen(bds->alts[i]);
+    if (altLen < minAltLen)
+        minAltLen = altLen;
+    if (altLen > maxAltLen)
+        maxAltLen = altLen;
+    }
+if (retMinAltLen)
+    *retMinAltLen = minAltLen;
+if (retMaxAltLen)
+    *retMaxAltLen = maxAltLen;
+return (refLen != minAltLen || refLen != maxAltLen);
+}
+
+static boolean bdsHasInsAndDel(struct bigDbSnp *bds, int *retMaxDel)
+/* If bds variant has both insertion(s) and deletion(s) relative to the reference genome,
+ * then return TRUE.  Set *retMaxDel to the largest number of bases deleted. */
+{
+boolean isBoth = FALSE;
+int minAltLen, maxAltLen;
+if (bdsIsIndel(bds, &minAltLen, &maxAltLen))
+    {
+    int refLen = strlen(bds->ref);
+    isBoth = (minAltLen < refLen && refLen < maxAltLen);
+    if (retMaxDel)
+        *retMaxDel = (refLen - minAltLen);
+    }
+else if (retMaxDel)
+    *retMaxDel = 0;
+return isBoth;
+}
+
+static void bigDbSnpDrawItemAt(struct track *tg, void *item, struct hvGfx *hvg,
+                        int xOff, int y, double scale,
+                        MgFont *font, Color color, enum trackVisibility vis)
+/* Draw one bigDbSnp variant, possibly with thin and/or gray portions to show uncertain placement,
+ * and with block-and-line when an MNV is really two linked SNVs. */
+{
+struct linkedFeatures *lf = item;
+int heightPer = tg->heightPer;
+int x1 = round((double)((int)lf->start-winStart)*scale) + xOff;
+int x2 = round((double)((int)lf->end-winStart)*scale) + xOff;
+int w = x2-x1;
+if (w > 1 && lf->tallStart > lf->start)
+    {
+    // There's a thin region, and enough pixels to draw thin and thick.
+    struct bigDbSnp *bds = (struct bigDbSnp *)lf->original;
+    int maxDel = 0;
+    if (bdsHasInsAndDel(bds, &maxDel))
+        {
+        // There are both inserted and deleted bases, so draw a lighter-colored box to show
+        // deleted bases.  Draw the thin region afterwards, so it appears on top of the box.
+        Color delColor = somewhatLighterColor(hvg, lighterColor(hvg, color));
+        int delStart = lf->end - maxDel;
+        int delStartPx = 0, delEndPx = 0;
+        if (scaledBoxToPixelCoords(delStart, lf->end, scale, xOff, &delStartPx, &delEndPx))
+            {
+            int wDel = delEndPx - delStartPx;
+            hvGfxBox(hvg, delStartPx, y, wDel, heightPer, delColor);
+            }
+        }
+    int thinStartPx = 0, thinEndPx = 0;
+    if (scaledBoxToPixelCoords(lf->start, lf->tallStart, scale, xOff, &thinStartPx, &thinEndPx))
+        {
+        int shortOff = heightPer/4;
+        int shortHeight = heightPer - 2*shortOff;
+        // Make sure we get at least one pixel of thin.
+        int wThin = thinEndPx - thinStartPx;
+        if (wThin == 0)
+            {
+            wThin = 1;
+            thinEndPx += 1;
+            }
+        hvGfxBox(hvg, thinStartPx, y+shortOff, wThin, shortHeight, color);
+        }
+    int thickStartPx = 0, thickEndPx = 0;
+    if (scaledBoxToPixelCoords(lf->tallStart, lf->end, scale, xOff, &thickStartPx, &thickEndPx))
+        {
+        // Use thinEndPx instead of thickStartPx in case thinEndPx was incremented above.
+        int wThick = thickEndPx - thinEndPx;
+        hvGfxBox(hvg, thinEndPx, y, wThick, heightPer, color);
+        }
+    }
+else
+    {
+    struct simpleFeature *sf;
+    if (lf->components->next != NULL)
+        {
+        // horizontal line connector for MNVs in which some bases do not change
+        int x1 = round((double)((int)lf->start-winStart)*scale) + xOff;
+        int x2 = round((double)((int)lf->end-winStart)*scale) + xOff;
+        // If I don't subtract 1 from x2, then it sticks out past the end of the last block when
+        // zoomed in so much that each base is multiple pixels.
+        x2--;
+        int midY = y + (tg->heightPer>>1);
+        int w = x2-x1;
+        if (w > 0)
+            hvGfxLine(hvg, x1, midY, x2, midY, color);
+        }
+    for (sf = lf->components;  sf != NULL;  sf = sf->next)
+        {
+        drawScaledBox(hvg, sf->start, sf->end, scale, xOff, y, heightPer, color);
+        }
+    }
+}
+
+void bigDbSnpMethods(struct track *track)
+/* Special load and draw hooks for type bigDbSnp. */
+{
+linkedFeaturesMethods(track);
+track->canPack = TRUE;
+track->loadItems = bigDbSnpLoadItems;
+track->drawItemAt = bigDbSnpDrawItemAt;
+track->itemName = bigLfItemName;
+}