0529f8de2a72011dfcea8f9a1221b6d227341743
braney
  Tue Mar 10 17:34:58 2026 -0700
hgc MAF click: use mafFrag to show only selected species with inserts removed

Change the hgc MAF detail page to use hgMafFrag/hgBigMafFrag instead of
displaying raw MAF blocks. This stitches alignments into a single continuous
display in reference coordinates, filtering to only selected species and
removing insertion columns where the reference has gaps.

Key changes:
- mafClick.c: rewrite mafOrAxtClick2 to build species orderList from
trackDb settings (speciesOrder, speciesGroup, speciesUseFile) respecting
speciesDefaultOff and cart on/off state via cartUsualBooleanClosestToHome,
matching the hgTracks species selection logic in newSpeciesItems().
Add mafStripRefGaps() to remove insertion columns from mafFrag output.
Show spaces instead of dots for matching bases in diff mode.
- hgMaf.h/hgMaf.c: add hgMafFragFromMafList() public wrapper for
pre-loaded mafLists (AXT/custom mafFile support). Change hgMafFragHelper
to skip species not in orderList instead of errAbort. Track per-species
source coordinates (src, start, end, srcSize, strand) in struct oneOrg
so browser/DNA links work correctly in mafFrag output.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

diff --git src/hg/hgc/mafClick.c src/hg/hgc/mafClick.c
index dba605cdb72..05679b5be12 100644
--- src/hg/hgc/mafClick.c
+++ src/hg/hgc/mafClick.c
@@ -23,31 +23,31 @@
 
 #define ADDEXONCAPITAL
 
 /* Javascript to help make a selection from a drop-down
  * go back to the server. */
 static char *autoSubmit = "document.gpForm.submit();";
 
 static void blueCapWrite(FILE *f, char *s, int size, char *r)
 /* Write capital letters in blue. */
 {
 boolean isBlue = FALSE;
 int i;
 for (i=0; i<size; ++i)
     {
     if (r!=NULL && s[i]==r[i])
-	fprintf(f, ".");
+	fprintf(f, " ");
     else
 	{
 	char c = s[i];
 	if (isupper(c))
             {
             if (!isBlue)
                 {
                 fprintf(f, "<span style='color:#0000FF;'>");
                 isBlue = TRUE;
                 }
             }
 	else if (islower(c))
             {
             if (isBlue)
                 {
@@ -442,30 +442,75 @@
 }
 
 static void capMafOnTrack(struct mafAli *maf, char *track, boolean onlyCds)
 /* Capitalize parts of maf that correspond to exons according
  * to given gene prediction track.  */
 {
 char dbOnly[64];
 char *chrom;
 struct mafComp *mc = maf->components;
 strncpy(dbOnly, mc->src, sizeof(dbOnly));
 chrom = chopPrefix(dbOnly);
 capAliTextOnTrack(maf, dbOnly, chrom, track, onlyCds);
 }
 #endif
 
+static void mafStripRefGaps(struct mafAli *maf)
+/* Remove columns where the reference (first component) has a gap character.
+ * These are insertions in non-reference species that should be collapsed
+ * when displaying in reference coordinates. */
+{
+struct mafComp *mc;
+struct mafComp *ref = maf->components;
+if (ref == NULL || ref->text == NULL)
+    return;
+int textSize = maf->textSize;
+
+/* Build boolean array of columns to keep (where ref is not a gap) */
+bool *keep = needMem(textSize);
+int newSize = 0;
+int ii;
+for (ii = 0; ii < textSize; ii++)
+    {
+    if (ref->text[ii] != '-')
+        {
+        keep[ii] = TRUE;
+        newSize++;
+        }
+    }
+
+if (newSize == textSize)
+    {
+    freeMem(keep);
+    return;  /* nothing to strip */
+    }
+
+/* Compact all component texts in place */
+for (mc = maf->components; mc != NULL; mc = mc->next)
+    {
+    if (mc->text == NULL)
+        continue;
+    int jj = 0;
+    for (ii = 0; ii < textSize; ii++)
+        if (keep[ii])
+            mc->text[jj++] = mc->text[ii];
+    mc->text[jj] = '\0';
+    }
+maf->textSize = newSize;
+freeMem(keep);
+}
+
 static struct mafAli *mafOrAxtLoadInRegion2(struct sqlConnection *conn,struct sqlConnection *conn2,
                                             struct trackDb *tdb, char *chrom, int start, int end,
                                             char *axtOtherDb, char *file)
 {
 if (axtOtherDb != NULL)
     {
     struct hash *qSizeHash = hChromSizeHash(axtOtherDb);
     struct mafAli *mafList = axtLoadAsMafInRegion(conn, tdb->table,
             chrom, start, end, database, axtOtherDb, hChromSize(database, chrom), qSizeHash);
     hashFree(&qSizeHash);
     return mafList;
     }
 else
     return mafLoadInRegion2(conn, conn2, tdb->table, chrom,
 	start, end, file);
@@ -497,182 +542,166 @@
 
 static void mafOrAxtClick2(struct sqlConnection *conn, struct sqlConnection *conn2, struct trackDb *tdb, char *axtOtherDb, char *fileName)
 /* Display details for MAF or AXT tracks. */
 {
 if (issueBotWarning)
     {
     char *ip = getenv("REMOTE_ADDR");
     botDelayMessage(ip, botDelayMillis);
     }
 if (winEnd - winStart > 30000)
     {
     printf("Zoom so that window is 30,000 bases or less to see alignments and conservation statistics\n");
     }
 else
     {
-    struct mafAli *mafList = NULL, *maf, *subList = NULL;
-    int aliIx = 0, realCount = 0;
-    char dbChrom[64];
+    struct mafAli *maf = NULL;
     char option[128];
     char *capTrack;
     struct consWiggle *consWig, *consWiggles;
     struct hash *speciesOffHash = NULL;
     char *speciesOrder = NULL;
     char *speciesTarget = trackDbSetting(tdb, SPECIES_TARGET_VAR);
     char buffer[1024];
     int useTarg = FALSE;
     int useIrowChains = FALSE;
     struct hash *labelHash = mafGetLabelHash(tdb);
+    struct slName *orderList = NULL;
 
     safef(option, sizeof(option), "%s.%s", tdb->track, MAF_CHAIN_VAR);
     if (cartCgiUsualBoolean(cart, option, FALSE) &&
 	trackDbSetting(tdb, "irows") != NULL)
 	    useIrowChains = TRUE;
 
     safef(buffer, sizeof(buffer), "%s.vis",tdb->track);
     if (useIrowChains)
 	{
 	if (!cartVarExists(cart, buffer) && (speciesTarget != NULL))
 	    useTarg = TRUE;
 	else
 	    {
 	    char *val;
 
 	    val = cartUsualString(cart, buffer, "useCheck");
             useTarg = sameString("useTarg",val);
             }
         }
 
-    if (sameString(tdb->type, "bigMaf"))
-        {
-        char *fileName = trackDbSetting(tdb, "bigDataUrl");
-        struct bbiFile *bbi =  bigBedFileOpenAlias(fileName, chromAliasFindAliases);
-        mafList = bigMafLoadInRegion(bbi, seqName, winStart, winEnd);
-        }
-    else
-        mafList = mafOrAxtLoadInRegion2(conn,conn2, tdb, seqName, winStart, winEnd,
-                                        axtOtherDb, fileName);
-    safef(dbChrom, sizeof(dbChrom), "%s.%s", hubConnectSkipHubPrefix(database), seqName);
+    /* Determine species order from trackDb settings, matching hgTracks logic */
+    char *speciesGroup = trackDbSetting(tdb, SPECIES_GROUP_VAR);
+    char *speciesUseFile = trackDbSetting(tdb, SPECIES_USE_FILE);
+    speciesOrder = trackDbSetting(tdb, SPECIES_ORDER_VAR);
 
+    /* Check cart override for speciesOrder */
     safef(option, sizeof(option), "%s.speciesOrder", tdb->track);
-    speciesOrder = cartUsualString(cart, option, NULL);
-    if (speciesOrder == NULL)
-	speciesOrder = trackDbSetting(tdb, "speciesOrder");
+    char *cartOrder = cartUsualString(cart, option, NULL);
+    if (cartOrder != NULL)
+	speciesOrder = cartOrder;
 
-    int speciesCt = 0;
-    char *species[2048];
-    struct mafComp **newOrder;
-    if (speciesOrder)
-        {
-        // chop up speciesOrder string and store it away, checking for errors along the way
-        speciesCt = chopLine(cloneString(speciesOrder), species);
-        newOrder = needMem((speciesCt + 1) * sizeof (struct mafComp *));
+    if (speciesUseFile)
+	speciesOrder = cartGetOrderFromFile(database, cart, speciesUseFile);
 
-        int ii;
-        struct hash *nameHash = newHash(5);
-        for(ii=0; ii < speciesCt; ii++)
+    /* Build hash of species that default to off */
+    char *speciesOff = trackDbSetting(tdb, SPECIES_DEFAULT_OFF_VAR);
+    struct hash *defaultOffHash = NULL;
+    if (speciesOff)
         {
-            if (hashLookup(nameHash, species[ii]))
-                errAbort("speciesOrder contains %s more than once.", species[ii]);
-            hashStore(nameHash, species[ii]);
-            }
+        char *offSpecies[2048];
+        int offCt = chopLine(cloneString(speciesOff), offSpecies);
+        defaultOffHash = newHash(5);
+        int ii;
+        for (ii = 0; ii < offCt; ii++)
+            hashAdd(defaultOffHash, offSpecies[ii], NULL);
         }
 
-    for (maf = mafList; maf != NULL; maf = maf->next)
+    /* Build orderList of selected species for mafFrag, matching
+     * newSpeciesItems() logic in wigMafTrack.c */
+    if (speciesOrder || speciesGroup)
         {
-        int mcCount = 0;
-        struct mafComp *mc;
-        struct mafAli *subset;
-        struct mafComp *nextMc;
+        char *groups[1000];
+        char sGroup[2048];
+        int groupCt = 1;
+        int group;
 
-        /* remove empty components and configured off components
-         * from MAF, and ignore
-         * the entire MAF if all components are empty
-         * (solely for gap annotation) */
+        if (speciesGroup)
+            groupCt = chopLine(cloneString(speciesGroup), groups);
 
-        if (!useTarg)
+        /* Add reference database as first in list */
+        slNameAddHead(&orderList, database);
+
+        for (group = 0; group < groupCt; group++)
             {
-            for (mc = maf->components->next; mc != NULL; mc = nextMc)
+            char *species[2048];
+            int speciesCt;
+            if (groupCt != 1 || !speciesOrder)
                 {
-		char buf[64];
-                char *organism;
-		mafSrcDb(mc->src, buf, sizeof buf);
-                organism = hOrganism(buf);
-                if (!organism)
-                    organism = buf;
-		nextMc = mc->next;
-		safef(option, sizeof(option), "%s.%s", tdb->track, buf);
-		if (!cartUsualBoolean(cart, option, TRUE))
+                safef(sGroup, sizeof sGroup, "%s%s",
+                                        SPECIES_GROUP_PREFIX, groups[group]);
+                speciesOrder = trackDbRequiredSetting(tdb, sGroup);
+                }
+            speciesCt = chopLine(cloneString(speciesOrder), species);
+
+            int ii;
+            for (ii = 0; ii < speciesCt; ii++)
+                {
+                boolean defaultOn = (defaultOffHash == NULL
+                    || hashLookup(defaultOffHash, species[ii]) == NULL);
+                if (useTarg || cartUsualBooleanClosestToHome(cart, tdb,
+                                            FALSE, species[ii], defaultOn))
+                    {
+                    slNameAddTail(&orderList, species[ii]);
+                    }
+                else
                     {
                     if (speciesOffHash == NULL)
                         speciesOffHash = newHash(4);
+                    char *organism = hOrganism(species[ii]);
+                    if (!organism)
+                        organism = species[ii];
                     hashStoreName(speciesOffHash, organism);
                     }
-		if (!cartUsualBoolean(cart, option, TRUE))
-		    slRemoveEl(&maf->components, mc);
-		else
-		    mcCount++;
                 }
             }
-        if (mcCount == 0)
-            continue;
-
-	if (speciesCt)
-	    {
-	    struct mafComp *mcThis;
-	    int i;
-
-	    mcCount = 0;
-	    speciesCt = chopLine(cloneString(speciesOrder), species);
-	    newOrder = needMem((speciesCt + 1) * sizeof (struct mafComp *));
-	    newOrder[mcCount++] = maf->components;
-
-	    for (i = 0; i < speciesCt; i++)
-		{
-		if ((mcThis = mafMayFindCompSpecies(maf, species[i], '.')) == NULL)
-		    continue;
-                if (mcThis == maf->components)
-                    errAbort("Reference species (%s) shouldn't be in speciesOrder in trackDb", species[i]);
-		newOrder[mcCount++] = mcThis;
         }
 
-	    maf->components = NULL;
-	    for (i = 0; i < mcCount; i++)
+    /* Load stitched alignment using mafFrag approach */
+    if (sameString(tdb->type, "bigMaf"))
         {
-		newOrder[i]->next = 0;
-		slAddHead(&maf->components, newOrder[i]);
-		}
-
-	    slReverse(&maf->components);
+        char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl");
+        struct bbiFile *bbi = bigBedFileOpenAlias(bigDataUrl, chromAliasFindAliases);
+        maf = hgBigMafFrag(database, bbi, seqName, winStart, winEnd, '+', NULL, orderList);
+        bbiFileClose(&bbi);
         }
-	subset = mafSubsetE(maf, dbChrom, winStart, winEnd, TRUE);
-	if (subset != NULL)
+    else if (axtOtherDb == NULL && fileName == NULL)
         {
-	    /* Reformat MAF if needed so that sequence from current
-	     * database is the first component and on the
-	     * plus strand. */
-	    mafMoveComponentToTop(subset, dbChrom);
-	    if (subset->components->strand == '-')
-		mafFlipStrand(subset);
-	    subset->score = mafScoreMultiz(subset);
-	    slAddHead(&subList, subset);
-	    ++realCount;
+        /* Regular MAF from database */
+        maf = hgMafFrag(database, tdb->table, seqName, winStart, winEnd, '+', NULL, orderList);
         }
+    else
+        {
+        /* AXT or MAF with external file - load blocks, then stitch */
+        struct mafAli *mafList = mafOrAxtLoadInRegion2(conn, conn2, tdb, seqName,
+                                    winStart, winEnd, axtOtherDb, fileName);
+        maf = hgMafFragFromMafList(database, seqName, winStart, winEnd, '+',
+                                    mafList, NULL, orderList);
         }
-    slReverse(&subList);
-    mafAliFreeList(&mafList);
-    if (subList != NULL)
+
+    /* Remove insertion columns (where reference has gaps) */
+    if (maf != NULL)
+        mafStripRefGaps(maf);
+
+    if (maf != NULL)
 	{
 	char *showVarName = "hgc.showMultiBase";
 	char *showVarVal = cartUsualString(cart, showVarName, "all");
 	boolean onlyDiff = sameWord(showVarVal, "diff");
 #ifdef ADDEXONCAPITAL
 	char *codeVarName = "hgc.multiCapCoding";
 	char *codeVarVal = cartUsualString(cart, codeVarName, "coding");
 	boolean onlyCds = sameWord(codeVarVal, "coding");
 #endif
         /* add links for conservation score statistics */
         consWiggles = wigMafWiggles(database, tdb);
         int wigCount = slCount(consWiggles);
         if (wigCount == 1)
             {
             conservationStatsLink(tdb, "Conservation score statistics", consWiggles->table);
@@ -774,50 +803,48 @@
 	puts("for aligned species, click on 'D' to get DNA for aligned species.<BR>");
 
 	printf("<TT><PRE>");
 
         /* notify if species removed from alignment */
         if (speciesOffHash)
             {
             char *species;
             struct hashCookie hc = hashFirst(speciesOffHash);
             puts("<B>Components not displayed:</B> ");
             while ((species = hashNextName(&hc)) != NULL)
                 printf("%s ", species);
             puts("<BR>");
             }
 
-
-	for (maf = subList; maf != NULL; maf = maf->next)
-	    {
 	mafLowerCase(maf);
 #ifdef ADDEXONCAPITAL
 	if (capTrack != NULL)
 	    capMafOnTrack(maf, capTrack, onlyCds);
 #endif
-            printf("<B>Alignment block %d of %d in window, %d - %d, %d bps </B>\n",
-                   ++aliIx,realCount,maf->components->start + 1,
-                   maf->components->start + maf->components->size, maf->components->size);
-            mafPrettyOut(stdout, maf, 70,onlyDiff, aliIx, labelHash);
-            }
-	mafAliFreeList(&subList);
+        printf("<B>Alignment %d - %d, %d bps </B>\n",
+               maf->components->start + 1,
+               maf->components->start + maf->components->size,
+               maf->components->size);
+        mafPrettyOut(stdout, maf, 70, onlyDiff, 1, labelHash);
+	mafAliFree(&maf);
 	}
     else
 	{
         printf("No multiple alignment in browser window");
 	}
     printf("</PRE></TT>");
+    slNameFreeList(&orderList);
     }
 }
 
 static void mafOrAxtClick(struct sqlConnection *conn, struct trackDb *tdb, char *axtOtherDb)
 {
 struct sqlConnection *conn2 = NULL;
 if (!(isHubTrack(tdb->track) || trackHubDatabase(database)))
     conn2 = hAllocConn(database);
 // MAF file location is optionally in trackDb
 char *mafFile = hashFindVal(tdb->settingsHash, "mafFile");
 
 mafOrAxtClick2(conn, conn2, tdb, axtOtherDb, mafFile);
 
 hFreeConn(&conn2);
 }