715e2acf2dd9ef6106cc42ce79c724e1e52ad62e braney Thu Mar 12 09:14:28 2026 -0700 NoDots MAF alignment display for hgc mafClick, with i-row preservation and mafFrag -noDots flag hgMaf.c: add hgMafFragHelperNoDots and public wrappers (hgMafFragNoDots, hgBigMafFragNoDots, hgMafFragFromMafListNoDots) that return a list of maf blocks containing only species with actual sequence — no dot-filled rows. Blocks are broken when the species set changes; gaps between same-species blocks are filled with native sequence for the reference and dashes for others. Preserve i-row data (leftStatus/rightStatus/leftLen/rightLen) through the NoDots path so insert annotations appear in emitted blocks. hgMaf.h: declare the new NoDots public functions. mafClick.c: use NoDots path when mafClickMafFrag is enabled. Fix block numbering (aliIx was never incremented in useMafFrag path). Use full textSize for NoDots line width. Use dots instead of spaces in diff mode for both paths. Fix species label width computation to check labelHash consistently so long assembly names don't misalign sequences. Strip ref gap columns where no other species has sequence. mafFrag: add -noDots option to invoke hgMafFragNoDots from the command line, with 4 new tests (noDots, noDotsRev, noDotsOutName, noDotsLarger). refs #21477 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> diff --git src/hg/hgc/mafClick.c src/hg/hgc/mafClick.c index 950171b68d2..183dc44fc03 100644 --- src/hg/hgc/mafClick.c +++ src/hg/hgc/mafClick.c @@ -25,33 +25,30 @@ #define ADDEXONCAPITAL /* Javascript to help make a selection from a drop-down * go back to the server. */ static char *autoSubmit = "document.gpForm.submit();"; static void blueCapWrite(FILE *f, char *s, int size, char *r) /* Write capital letters in blue. */ { boolean isBlue = FALSE; int i; for (i=0; i<size; ++i) { if (r!=NULL && s[i]==r[i]) { - if (cfgOptionBooleanDefault("mafClickMafFrag", FALSE)) - fprintf(f, " "); - else fprintf(f, "."); } else { char c = s[i]; if (isupper(c)) { if (!isBlue) { fprintf(f, "<span style='color:#0000FF;'>"); isBlue = TRUE; } } else if (islower(c)) { @@ -107,33 +104,35 @@ startChars = sizeChars = srcSizeChars = 0; for (mc = maf->components; mc != NULL; mc = mc->next) { /* Figure out length of source (species) field. */ /*if (mc->size != 0)*/ { char dbOnly[128]; int len; char *org; memset(dbOnly, 0, sizeof(dbOnly)); safef(dbOnly, sizeof(dbOnly), "%s", mc->src); chopPrefix(dbOnly); + if ((labelHash == NULL) || ((org = hashFindVal(labelHash, dbOnly)) == NULL)) + { if ((org = hOrganism(dbOnly)) == NULL) - len = strlen(dbOnly); - else + org = dbOnly; + } len = strlen(org); if (srcChars < len) srcChars = len; len = digitsBaseTen(mc->start); if (startChars < len) startChars = len; len = digitsBaseTen(mc->size); if (sizeChars < len) sizeChars = len; len = digitsBaseTen(mc->srcSize); if (srcSizeChars < len) srcSizeChars = len; if (mc->text && (mc->rightStatus == MAF_INSERT_STATUS) && (masterMc->start + masterMc->size < winEnd)) @@ -448,58 +447,71 @@ } static void capMafOnTrack(struct mafAli *maf, char *track, boolean onlyCds) /* Capitalize parts of maf that correspond to exons according * to given gene prediction track. */ { char dbOnly[64]; char *chrom; struct mafComp *mc = maf->components; strncpy(dbOnly, mc->src, sizeof(dbOnly)); chrom = chopPrefix(dbOnly); capAliTextOnTrack(maf, dbOnly, chrom, track, onlyCds); } #endif -static void mafStripRefGaps(struct mafAli *maf) -/* Remove columns where the reference (first component) has a gap character. - * These are insertions in non-reference species that should be collapsed - * when displaying in reference coordinates. */ +static void mafStripEmptyRefGaps(struct mafAli *maf) +/* Remove columns where the reference (first component) has a gap character + * AND no other component has actual sequence in that column. Columns where + * at least one non-reference species has a base are kept. */ { struct mafComp *mc; struct mafComp *ref = maf->components; if (ref == NULL || ref->text == NULL) return; int textSize = maf->textSize; -/* Build boolean array of columns to keep (where ref is not a gap) */ +/* Build boolean array of columns to keep */ bool *keep = needMem(textSize); int newSize = 0; int ii; for (ii = 0; ii < textSize; ii++) { if (ref->text[ii] != '-') { keep[ii] = TRUE; newSize++; } + else + { + /* Reference has gap — check if any other species has sequence */ + for (mc = ref->next; mc != NULL; mc = mc->next) + { + if (mc->text != NULL && isalpha(mc->text[ii])) + { + keep[ii] = TRUE; + newSize++; + break; + } + } + } } if (newSize == textSize) { freeMem(keep); - return; /* nothing to strip */ + return; } /* Compact all component texts in place */ for (mc = maf->components; mc != NULL; mc = mc->next) { if (mc->text == NULL) continue; int jj = 0; for (ii = 0; ii < textSize; ii++) if (keep[ii]) mc->text[jj++] = mc->text[ii]; mc->text[jj] = '\0'; } maf->textSize = newSize; freeMem(keep); @@ -664,60 +676,59 @@ slNameAddTail(&orderList, species[ii]); } else { if (speciesOffHash == NULL) speciesOffHash = newHash(4); char *organism = hOrganism(species[ii]); if (!organism) organism = species[ii]; hashStoreName(speciesOffHash, organism); } } } } - /* Load stitched alignment using mafFrag approach */ + /* Load alignment using mafFragNoDots approach — returns a list of + * maf blocks, each containing only assemblies with actual sequence. */ if (sameString(tdb->type, "bigMaf")) { char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl"); struct bbiFile *bbi = bigBedFileOpenAlias(bigDataUrl, chromAliasFindAliases); - maf = hgBigMafFrag(database, bbi, seqName, winStart, winEnd, '+', NULL, orderList); + subList = hgBigMafFragNoDots(database, bbi, seqName, winStart, winEnd, '+', NULL, orderList); bbiFileClose(&bbi); } else if (axtOtherDb == NULL && fileName == NULL) { /* Regular MAF from database */ - maf = hgMafFrag(database, tdb->table, seqName, winStart, winEnd, '+', NULL, orderList); + subList = hgMafFragNoDots(database, tdb->table, seqName, winStart, winEnd, '+', NULL, orderList); } else { /* AXT or MAF with external file - load blocks, then stitch */ mafList = mafOrAxtLoadInRegion2(conn, conn2, tdb, seqName, winStart, winEnd, axtOtherDb, fileName); - maf = hgMafFragFromMafList(database, seqName, winStart, winEnd, '+', + subList = hgMafFragFromMafListNoDots(database, seqName, winStart, winEnd, '+', mafList, NULL, orderList); - mafList = NULL; /* consumed by hgMafFragFromMafList */ + mafList = NULL; /* consumed by hgMafFragFromMafListNoDots */ } - /* Remove insertion columns (where reference has gaps) */ - if (maf != NULL) - mafStripRefGaps(maf); + /* Strip ref gap columns where no other species has sequence */ + for (maf = subList; maf != NULL; maf = maf->next) + mafStripEmptyRefGaps(maf); - if (maf != NULL) - slAddHead(&subList, maf); - realCount = (subList != NULL) ? 1 : 0; + realCount = slCount(subList); } else { /* Original block-by-block code path */ if (sameString(tdb->type, "bigMaf")) { char *bigDataUrl = trackDbSetting(tdb, "bigDataUrl"); struct bbiFile *bbi = bigBedFileOpenAlias(bigDataUrl, chromAliasFindAliases); mafList = bigMafLoadInRegion(bbi, seqName, winStart, winEnd); } else mafList = mafOrAxtLoadInRegion2(conn, conn2, tdb, seqName, winStart, winEnd, axtOtherDb, fileName); safef(dbChrom, sizeof(dbChrom), "%s.%s", hubConnectSkipHubPrefix(database), seqName); @@ -942,40 +953,41 @@ char *species; struct hashCookie hc = hashFirst(speciesOffHash); puts("<B>Components not displayed:</B> "); while ((species = hashNextName(&hc)) != NULL) printf("%s ", species); puts("<BR>"); } for (maf = subList; maf != NULL; maf = maf->next) { mafLowerCase(maf); #ifdef ADDEXONCAPITAL if (capTrack != NULL) capMafOnTrack(maf, capTrack, onlyCds); #endif + ++aliIx; if (useMafFrag) printf("<B>Alignment %d - %d, %d bps </B>\n", maf->components->start + 1, maf->components->start + maf->components->size, maf->components->size); else printf("<B>Alignment block %d of %d in window, %d - %d, %d bps </B>\n", - ++aliIx,realCount,maf->components->start + 1, + aliIx,realCount,maf->components->start + 1, maf->components->start + maf->components->size, maf->components->size); - mafPrettyOut(stdout, maf, 70, onlyDiff, aliIx, labelHash); + mafPrettyOut(stdout, maf, useMafFrag ? maf->textSize : 70, onlyDiff, aliIx, labelHash); } mafAliFreeList(&subList); } else { printf("No multiple alignment in browser window"); } printf("</PRE></TT>"); slNameFreeList(&orderList); } } static void mafOrAxtClick(struct sqlConnection *conn, struct trackDb *tdb, char *axtOtherDb) { struct sqlConnection *conn2 = NULL;