715e2acf2dd9ef6106cc42ce79c724e1e52ad62e braney Thu Mar 12 09:14:28 2026 -0700 NoDots MAF alignment display for hgc mafClick, with i-row preservation and mafFrag -noDots flag hgMaf.c: add hgMafFragHelperNoDots and public wrappers (hgMafFragNoDots, hgBigMafFragNoDots, hgMafFragFromMafListNoDots) that return a list of maf blocks containing only species with actual sequence — no dot-filled rows. Blocks are broken when the species set changes; gaps between same-species blocks are filled with native sequence for the reference and dashes for others. Preserve i-row data (leftStatus/rightStatus/leftLen/rightLen) through the NoDots path so insert annotations appear in emitted blocks. hgMaf.h: declare the new NoDots public functions. mafClick.c: use NoDots path when mafClickMafFrag is enabled. Fix block numbering (aliIx was never incremented in useMafFrag path). Use full textSize for NoDots line width. Use dots instead of spaces in diff mode for both paths. Fix species label width computation to check labelHash consistently so long assembly names don't misalign sequences. Strip ref gap columns where no other species has sequence. mafFrag: add -noDots option to invoke hgMafFragNoDots from the command line, with 4 new tests (noDots, noDotsRev, noDotsOutName, noDotsLarger). refs #21477 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> diff --git src/hg/inc/hgMaf.h src/hg/inc/hgMaf.h index 9c2d7cf8b93..ada43160c5e 100644 --- src/hg/inc/hgMaf.h +++ src/hg/inc/hgMaf.h @@ -1,159 +1,199 @@ /* hgMaf.h - Stuff to load up mafs from the browser database. * Also, items for maf track display */ /* Copyright (C) 2012 The Regents of the University of California * See kent/LICENSE or http://genome.ucsc.edu/license/ for licensing information. */ #ifndef HGMAF_H #define HGMAF_H #include "trackDb.h" #include "cart.h" #include "bbiFile.h" /* Track settings and variables */ #define SPECIES_TREE_VAR "speciesTree" #define SPECIES_ORDER_VAR "speciesOrder" #define SPECIES_GROUP_VAR "speciesGroups" #define SPECIES_TARGET_VAR "speciesTarget" #define SPECIES_DEFAULT_OFF_VAR "speciesDefaultOff" #define SPECIES_GROUP_PREFIX "sGroup_" #define SPECIES_HTML_TARGET "sT" #define SPECIES_CODON_DEFAULT "speciesCodonDefault" #define SPECIES_USE_FILE "speciesUseFile" #define SPECIES_LABELS "speciesLabels" #define PAIRWISE_VAR "pairwise" #define PAIRWISE_HEIGHT "pairwiseHeight" #define SUMMARY_VAR "summary" #define BASE_COLORS_VAR "baseColors" #define BASE_COLORS_OFFSET_VAR "baseColorsOffset" #define CONS_WIGGLE "wiggle" #define ITEM_FIRST_CHAR_CASE "itemFirstCharCase" #define DEFAULT_CONS_LABEL "Conservation" #define gsidSubjList "gsidTable.gsidSubjList" #define gsidSeqList "gsidTable.gsidSeqList" #define gisaidSubjList "gisaidTable.gisaidSubjList" #define gisaidSeqList "gisaidTable.gisaidSeqList" struct mafAli *mafLoadInRegion2(struct sqlConnection *conn, struct sqlConnection *conn2, char *table, char *chrom, int start, int end, char *file); /* Return list of alignments in region. */ struct mafAli *mafLoadInRegion(struct sqlConnection *conn, char *table, char *chrom, int start, int end); /* Return list of alignments in region. */ struct mafAli *axtLoadAsMafInRegion(struct sqlConnection *conn, char *table, char *chrom, int start, int end, char *tPrefix, char *qPrefix, int tSize, struct hash *qSizeHash); /* Return list of alignments in region from axt external file as a maf. */ struct mafBaseProbs // the probability of each nucleotide being in a certain colum { double aProb, cProb, gProb, tProb; }; struct mafBaseProbs *hgBigMafProbs( char *database, /* Database, must already have hSetDb to this */ struct bbiFile *bbi, char *chrom, /* Chromosome (in database genome) */ int start, int end, /* start/end in chromosome */ char strand /* Chromosome strand. */ ); /* calculate the probability of each nucleotide in each column of a bigMaf. */ struct mafBaseProbs *hgMafProbs( char *database, /* Database, must already have hSetDb to this */ char *track, /* Name of MAF track */ char *chrom, /* Chromosome (in database genome) */ int start, int end, /* start/end in chromosome */ char strand /* Chromosome strand. */ ); /* calculate the probability of each nucleotide in each column of a maf. */ struct mafAli *hgBigMafFrag( char *database, /* Database, must already have hSetDb to this */ struct bbiFile *bbi, char *chrom, /* Chromosome (in database genome) */ int start, int end, /* start/end in chromosome */ char strand, /* Chromosome strand. */ char *outName, /* Optional name to use in first component */ struct slName *orderList /* Optional order of organisms. */ ); /* hgBigMafFrag - Extract maf sequences for a region from a bigMaf and call hgMafFragHelper. */ struct mafAli *hgMafFrag( char *database, /* Database, must already have hSetDb to this */ char *track, /* Name of MAF track */ char *chrom, /* Chromosome (in database genome) */ int start, int end, /* start/end in chromosome */ char strand, /* Chromosome strand. */ char *outName, /* Optional name to use in first component */ struct slName *orderList /* Optional order of organisms. */ ); /* mafFrag- Extract maf sequences for a region from database. * This creates a somewhat unusual MAF that extends from start * to end whether or not there are actually alignments. Where * there are no alignments (or alignments missing a species) * a . character fills in. The score is always zero, and * the sources just indicate the species. You can mafFree this * as normal. */ struct mafAli *hgMafFragFromMafList( char *database, /* Database, must already have hSetDb to this */ char *chrom, /* Chromosome (in database genome) */ int start, int end, /* start/end in chromosome */ char strand, /* Chromosome strand. */ struct mafAli *mafList, /* Pre-loaded list of maf alignments */ char *outName, /* Optional name to use in first component */ struct slName *orderList /* Optional order of organisms. */ ); /* Extract maf sequences for a region from a pre-loaded mafList. * Same behavior as hgMafFrag but takes mafList directly instead * of loading from database. Caller should not free mafList * afterwards (it is consumed). */ +struct mafAli *hgBigMafFragNoDots( + char *database, /* Database, must already have hSetDb to this */ + struct bbiFile *bbi, + char *chrom, /* Chromosome (in database genome) */ + int start, int end, /* start/end in chromosome */ + char strand, /* Chromosome strand. */ + char *outName, /* Optional name to use in first component */ + struct slName *orderList /* Optional order of organisms. */ + ); +/* hgBigMafFragNoDots - Extract maf sequences for a region from a bigMaf. + * Returns a list of maf blocks with no dots - each block only contains + * assemblies that have actual sequence. */ + +struct mafAli *hgMafFragNoDots( + char *database, /* Database, must already have hSetDb to this */ + char *track, /* Name of MAF track */ + char *chrom, /* Chromosome (in database genome) */ + int start, int end, /* start/end in chromosome */ + char strand, /* Chromosome strand. */ + char *outName, /* Optional name to use in first component */ + struct slName *orderList /* Optional order of organisms. */ + ); +/* hgMafFragNoDots - Extract maf sequences for a region from database. + * Returns a list of maf blocks with no dots - each block only contains + * assemblies that have actual sequence. */ + +struct mafAli *hgMafFragFromMafListNoDots( + char *database, /* Database, must already have hSetDb to this */ + char *chrom, /* Chromosome (in database genome) */ + int start, int end, /* start/end in chromosome */ + char strand, /* Chromosome strand. */ + struct mafAli *mafList, /* Pre-loaded list of maf alignments */ + char *outName, /* Optional name to use in first component */ + struct slName *orderList /* Optional order of organisms. */ + ); +/* Extract maf sequences for a region from a pre-loaded mafList. + * Returns a list of maf blocks with no dots - each block only contains + * assemblies that have actual sequence. Caller should not free mafList + * afterwards (it is consumed). */ + int mafCmp(const void *va, const void *vb); /* Compare to sort based on start of first component. */ struct consWiggle { struct consWiggle *next; /* Next in list */ char *table; /* phastCons table */ char *leftLabel; /* Left label for hgTracks */ char *uiLabel; /* Label to print on trackUi */ }; struct consWiggle *wigMafWiggles(char *db, struct trackDb *tdb); /* get conservation wiggle table names and labels from trackDb setting, ignoring those where table doesn't exist */ char *wigMafWiggleVar(char *prefix, struct consWiggle *wig,char **suffix); // Return name of cart variable (and optionally the suffix) for this cons wiggle struct wigMafSpecies { struct wigMafSpecies *next; char *name; int group; boolean on; }; struct wigMafSpecies * wigMafSpeciesTable(struct cart *cart, struct trackDb *tdb, char *name, char *db) ; char **wigMafGetSpecies(struct cart *cart, struct trackDb *tdb, char *prefix, char *db, struct wigMafSpecies **list, int *groupCt); struct consWiggle *consWiggleFind(char *db,struct trackDb *parent,char *table); /* Return conservation wig if it is found in the parent. */ struct mafAli *bigMafLoadInRegion( struct bbiFile *bbi, char *chrom, int start, int end); /* Read in MAF blocks from bigBed. */ struct hash *mafGetLabelHash(struct trackDb *tdb); /* Get mapping of sequence name to label. */ #endif//ndef HGMAF_H