da33d70c10ddaae6c3290cb900d7d0cc2b6ee01b
hiram
  Tue Mar 17 13:57:24 2026 -0700
allow calculation of GC percent on the fly with code help from claude refs #35958

diff --git src/hg/hgTracks/bigWigTrack.c src/hg/hgTracks/bigWigTrack.c
index 4e43b950aa4..cd6139f3c73 100644
--- src/hg/hgTracks/bigWigTrack.c
+++ src/hg/hgTracks/bigWigTrack.c
@@ -341,26 +341,181 @@
 	    }
 	hFreeConn(&conn);
 	}
     }
 bigWigLoadPreDraw(tg, winStart, winEnd, insideWidth);
 }
 
 void mathWigMethods(struct track *track, struct trackDb *tdb, 
 	int wordCount, char *words[])
 /* Set up bigWig methods. */
 {
 bigWigMethods(track, tdb, wordCount, words);
 track->loadItems = mathWigLoadItems;
 }
 
+static struct preDrawContainer *gc5BaseOnTheFlyLoadPreDraw(struct track *tg,
+    int seqStart, int seqEnd, int width)
+/* Compute GC percent directly from genome sequence in winSize-base
+ * non-overlapping windows.  Values are 0-100 to match gc5BaseBw type
+ * bigWig 0 100 range.  wigTrack.c windowing/smoothing handles averaging
+ * when multiple windows fall on the same pixel.
+ */
+{
+char *winSizeString = trackDbSetting(tg->tdb, "calcWinSize");
+int winSize = (int)sqlLongLong(winSizeString);
+
+if (tg->preDrawContainer)
+    return tg->preDrawContainer;
+
+struct preDrawContainer *pre = tg->preDrawContainer = initPreDrawContainer(width);
+struct preDrawElement *preDraw = pre->preDraw;
+int preDrawZero = pre->preDrawZero;
+int preDrawSize = pre->preDrawSize;
+
+struct dnaSeq *seq = hChromSeq(database, chromName, seqStart, seqEnd);
+if (seq == NULL)
+    return pre;
+
+int seqLen = seqEnd - seqStart;
+/* pixelsPerBase is based on the visible window, not the extended fetch range,
+ * so that pixel coordinates are correct relative to the display. */
+double pixelsPerBase = (double)width / (winEnd - winStart);
+int span = winSize;
+int pos;
+
+/* Align to the winSize-base chromosome boundary, not the window boundary.
+ * startOffset is the number of bases into the fetched sequence where
+ * the first chromosome-aligned winSize-base window begins. */
+int startOffset = (span - (seqStart % span)) % span;
+
+for (pos = startOffset; pos + span <= seqLen; pos += span)
+    {
+    int gcCount = 0, validBases = 0;
+    int i;
+    for (i = pos; i < pos + span; i++)
+        {
+        char b = seq->dna[i];
+        if (b == 'g' || b == 'c')  { gcCount++;  validBases++; }
+        else if (b != 'n')           validBases++;
+        }
+    if (validBases == 0)
+        continue;
+
+    double gcPct = 100.0 * gcCount / validBases;   /* 0 - 100 */
+
+    /* Map this span to pixel coordinates relative to winStart.
+     * Data outside the visible window lands in the preDraw margin slots
+     * (negative or >= width), which is correct for smoothing at edges.
+     * Fill every pixel covered by this 5-base span. */
+    int chromPos = seqStart + pos;
+    int x1 = (int)((chromPos - winStart) * pixelsPerBase);
+    int x2 = (int)((chromPos + span - winStart) * pixelsPerBase);
+    int xi;
+    for (xi = x1; xi <= x2; ++xi)
+        {
+        int xCoord = preDrawZero + xi;
+        if (xCoord >= 0 && xCoord < preDrawSize)
+            {
+            preDraw[xCoord].count++;
+            if (gcPct > preDraw[xCoord].max) preDraw[xCoord].max = gcPct;
+            if (gcPct < preDraw[xCoord].min) preDraw[xCoord].min = gcPct;
+            preDraw[xCoord].sumData    += gcPct;
+            preDraw[xCoord].sumSquares += gcPct * gcPct;
+            }
+        }
+    }
+
+dnaSeqFree(&seq);
+// if (measureTiming)
+//     measureTime("GC5 on the fly calculation");
+return pre;
+}
+
+static void gc5BaseOnTheFlyLoadItems(struct track *tg)
+/* Compute GC percent from genome sequence; called in the loadItems phase
+ * just as bigWigLoadItems calls bigWigLoadPreDraw to fill preDrawContainer.
+ * Fetch sequence that covers the preDraw smoothing margins on each side so
+ * the smoothing/averaging machinery has data at the window edges. */
+{
+tg->items = NULL;
+tg->mapsSelf = TRUE;
+/* Extend the fetch range by wiggleSmoothingMax pixels worth of bases on
+ * each side, rounded up to the nearest 5-base span. */
+double basesPerPixel = (double)(winEnd - winStart) / insideWidth;
+int marginBases = ((int)(wiggleSmoothingMax * basesPerPixel) / 5 + 1) * 5;
+int fetchStart = max(0, winStart - marginBases);
+int fetchEnd   = min(hChromSize(database, chromName), winEnd + marginBases);
+gc5BaseOnTheFlyLoadPreDraw(tg, fetchStart, fetchEnd, insideWidth);
+}
+
 void bigWigMethods(struct track *track, struct trackDb *tdb,
 	int wordCount, char *words[])
 /* Set up bigWig methods. */
 {
 bedGraphMethods(track, tdb, wordCount, words);
 track->loadItems = bigWigLoadItems;
 track->preDrawItems = bigWigPreDrawItems;
 track->preDrawMultiRegion = wigMultiRegionGraphLimits;
 track->drawItems = bigWigDrawItems;
 track->loadPreDraw = bigWigLoadPreDraw;
 }
+
+struct track *gc5BaseOnTheFlyTg(struct cart *cart)
+/* Create an on-the-fly GC percent track computed directly from
+ *     from genome sequence.
+ */
+{
+struct track *tg = trackNew();
+struct trackDb *tdb = trackDbNew();
+char longLabel[1024];
+safef(longLabel, sizeof(longLabel), "GC FLY Percent in %s-Base Windows", gcOnFlyWinSize(cart));
+
+/* Fill in trackDb fields needed by wigCartOptionsNew and bigWigMethods. */
+tdb->track      = cloneString(GC_ON_FLY_TRACK_NAME);
+tdb->table      = cloneString(GC_ON_FLY_TRACK_NAME);
+tdb->type       = cloneString("bigWig 0 100");
+tdb->shortLabel = cloneString(GC_ON_FLY_TRACK_LABEL);
+tdb->longLabel  = cloneString(longLabel);
+tdb->grp        = cloneString("map");
+tdb->canPack    = 0;
+tdb->visibility = tvFull;
+
+/* Add wig display settings to match what gc5BaseBw trackDb would have. */
+trackDbAddSetting(tdb, "autoScale",         "Off");
+// trackDbAddSetting(tdb, "viewLimits",        "30:70");
+trackDbAddSetting(tdb, "viewLimits",        "0:100");
+// trackDbAddSetting(tdb, "maxHeightPixels",   "128:36:16");
+trackDbAddSetting(tdb, "maxHeightPixels",   "128:128:128");
+trackDbAddSetting(tdb, "graphTypeDefault",  "Bar");
+trackDbAddSetting(tdb, "gridDefault",       "OFF");
+trackDbAddSetting(tdb, "windowingFunction", "Mean");
+trackDbAddSetting(tdb, "color",             "0,0,0");
+trackDbAddSetting(tdb, "altColor",          "128,128,128");
+trackDbAddSetting(tdb, "gcComputeOnTheFly", "on");
+trackDbAddSetting(tdb, "gcOnTheFlyMaxBases", "500000");
+trackDbAddSetting(tdb, "gcFallbackBigWig", "/gbdb/ce1x/bbi/gc5BaseBw/gc5Base.bw");
+trackDbAddSetting(tdb, "calcWinSize", gcOnFlyWinSize(cart));
+trackDbPolish(tdb);
+
+/* Set up bigWig display methods (drawItems, preDrawItems, etc.) */
+char *words[] = {"bigWig", "0", "100"};
+bigWigMethods(tg, tdb, 3, words);
+
+/* Override data loading with on-the-fly sequence computation. */
+tg->loadItems   = gc5BaseOnTheFlyLoadItems;
+tg->loadPreDraw = gc5BaseOnTheFlyLoadPreDraw;
+
+/* Fill in track struct fields. */
+tg->tdb              = tdb;
+tg->track            = tdb->track;
+tg->table            = tdb->table;
+tg->visibility       = tdb->visibility;
+tg->shortLabel       = tdb->shortLabel;
+tg->longLabel        = tdb->longLabel;
+tg->priority         = tdb->priority;
+tg->defaultPriority  = tg->priority;
+tg->groupName        = tdb->grp;
+tg->defaultGroupName = cloneString(tg->groupName);
+tg->hasUi            = TRUE;
+return tg;
+}