50d5df2a88178ea49caa2fd1b41ce28af84f219f
braney
  Mon May 8 12:36:08 2023 -0700
support bigMaf for sequence logos

diff --git src/hg/lib/hgMaf.c src/hg/lib/hgMaf.c
index 00667e7..1ac3302 100644
--- src/hg/lib/hgMaf.c
+++ src/hg/lib/hgMaf.c
@@ -179,43 +179,37 @@
 	dyStringAppendMultiC(org->dy, '.', fillSize);
     }
 }
 
 static int countAlpha(char *s)
 /* Count up number of alphabetic characters in string */
 {
 int count = 0;
 char c;
 while ((c = *s++) != 0)
     if (isalpha(c))
         ++count;
 return count;
 }
 
-struct mafBaseProbs *hgMafProbs(
-	char *database,     /* Database, must already have hSetDb to this */
-	char *track,        /* Name of MAF track */
-	char *chrom,        /* Chromosome (in database genome) */
-	int start, int end, /* start/end in chromosome */
-        char strand         /* strand */
+struct mafBaseProbs *hgMafProbsHelper(
+        int size,
+        struct mafAli *maf
         )
 /* calculate the probability of each nucleotide in each column of a maf. */
 {
-struct mafAli *maf = hgMafFrag(database, track, chrom, start, end, '+', NULL, NULL);
-
 struct mafBaseProbs *probs;
-int size = end - start;
 
 AllocArray(probs, size);
 int count = 0;
 unsigned letterBox[256]; 
 
 int ii;
 for(ii=0; ii < maf->textSize; ii++)
     {
     struct mafComp *comp = maf->components;
     char ch = comp->text[ii];
 
     if (ch == '-')
         continue;
 
     memset(letterBox, 0, sizeof letterBox);
@@ -229,51 +223,79 @@
     double total = 0;
     total += letterBox['a'];
     total += letterBox['c'];
     total += letterBox['g'];
     total += letterBox['t'];
     probs[count].aProb = letterBox['a'] / total;
     probs[count].cProb = letterBox['c'] / total;
     probs[count].gProb = letterBox['g'] / total;
     probs[count].tProb = letterBox['t'] / total;
     count++;
     }   
 
 return probs;
 }
 
-struct mafAli *hgMafFrag(
+struct mafBaseProbs *hgBigMafProbs(
+	char *database,     /* Database, must already have hSetDb to this */
+        struct bbiFile *bbi,
+	char *chrom,        /* Chromosome (in database genome) */
+	int start, int end, /* start/end in chromosome */
+        char strand         /* strand */
+        )
+/* calculate the probability of each nucleotide in each column of a bigMaf. */
+{
+struct mafAli *maf = hgBigMafFrag(database, bbi, chrom, start, end, '+', NULL, NULL);
+
+return hgMafProbsHelper(end - start, maf);
+}
+
+struct mafBaseProbs *hgMafProbs(
 	char *database,     /* Database, must already have hSetDb to this */
 	char *track,        /* Name of MAF track */
 	char *chrom,        /* Chromosome (in database genome) */
 	int start, int end, /* start/end in chromosome */
+        char strand         /* strand */
+        )
+/* calculate the probability of each nucleotide in each column of a maf. */
+{
+struct mafAli *maf = hgMafFrag(database, track, chrom, start, end, '+', NULL, NULL);
+
+return hgMafProbsHelper(end - start, maf);
+}
+
+static struct mafAli *hgMafFragHelper(
+	char *database,     /* Database, must already have hSetDb to this */
+	//char *track,        /* Name of MAF track */
+	char *chrom,        /* Chromosome (in database genome) */
+	int start, int end, /* start/end in chromosome */
 	char strand,        /* Chromosome strand. */
+        struct mafAli *mafList,
 	char *outName,      /* Optional name to use in first component */
 	struct slName *orderList /* Optional order of organisms. */
 	)
-/* mafFrag- Extract maf sequences for a region from database.
+/* hgMafFragHelper- Extract maf sequences for a region from database.
  * This creates a somewhat unusual MAF that extends from start
  * to end whether or not there are actually alignments.  Where
  * there are no alignments (or alignments missing a species)
  * a . character fills in.   The score is always zero, and
  * the sources just indicate the species.  You can mafFree this
  * as normal. */
 {
 int chromSize = hChromSize(database, chrom);
-struct sqlConnection *conn = hAllocConn(database);
 struct dnaSeq *native = hChromSeq(database, chrom, start, end);
-struct mafAli *maf, *mafList = mafLoadInRegion(conn, track, chrom, start, end);
+struct mafAli *maf;
 char masterSrc[128];
 struct hash *orgHash = newHash(10);
 struct oneOrg *orgList = NULL, *org, *nativeOrg = NULL;
 int curPos = start, symCount = 0;
 struct slName *name;
 int order = 0;
 
 /* Check that the mafs are really copacetic, the particular
  * subtype we think is in the database that this (relatively)
  * simple code can handle. */
 safef(masterSrc, sizeof(masterSrc), "%s.%s", database, chrom);
 mafCheckFirstComponentSrc(mafList, masterSrc);
 mafCheckFirstComponentStrand(mafList, '+');
 slSort(&mafList, mafCmp);
 
@@ -449,34 +471,68 @@
 	int size = countAlpha(org->dy->string);
 	mc->src = cloneString(org->name);
 	mc->srcSize = size;
 	mc->strand = '+';
 	mc->start = 0;
 	mc->size = size;
 	}
     mc->text = cloneString(org->dy->string);
     dyStringFree(&org->dy);
     slAddHead(&maf->components, mc);
     }
 slReverse(&maf->components);
 
 slFreeList(&orgList);
 freeHash(&orgHash);
-hFreeConn(&conn);
 return maf;
 }
 
+struct mafAli *hgBigMafFrag(
+	char *database,     /* Database, must already have hSetDb to this */
+        struct bbiFile *bbi,
+	char *chrom,        /* Chromosome (in database genome) */
+	int start, int end, /* start/end in chromosome */
+	char strand,        /* Chromosome strand. */
+	char *outName,      /* Optional name to use in first component */
+	struct slName *orderList /* Optional order of organisms. */
+	)
+/* hgBigMafFrag - Extract maf sequences for a region from a bigMaf and call hgMafFragHelper. */
+{
+struct mafAli *mafList = bigMafLoadInRegion( bbi,  chrom, start, end);
+return hgMafFragHelper(database, chrom, start, end, strand, mafList, outName, orderList);
+}
+
+struct mafAli *hgMafFrag(
+	char *database,     /* Database, must already have hSetDb to this */
+	char *track,        /* Name of MAF track */
+	char *chrom,        /* Chromosome (in database genome) */
+	int start, int end, /* start/end in chromosome */
+	char strand,        /* Chromosome strand. */
+	char *outName,      /* Optional name to use in first component */
+	struct slName *orderList /* Optional order of organisms. */
+	)
+/* hgMafFrag- Extract maf sequences for a region from database and call hgMafFragHelper. */
+{
+struct sqlConnection *conn = hAllocConn(database);
+struct mafAli  *mafList = mafLoadInRegion(conn, track, chrom, start, end);
+struct mafAli *ret = hgMafFragHelper(database, chrom, start, end, strand, mafList, outName, orderList);
+
+hFreeConn(&conn);
+
+return ret;
+}
+
 struct consWiggle *wigMafWiggles(char *db, struct trackDb *tdb)
 /* get conservation wiggle table names and labels from trackDb setting,
    ignoring those where table doesn't exist */
 {
 char *fields[20];
 int fieldCt;
 int i;
 char *wigTable, *wigLabel;
 struct consWiggle *wig, *wigList = NULL;
 char *setting = trackDbSetting(tdb, CONS_WIGGLE);
 if (!setting)
     return NULL;
 fieldCt = chopLine(cloneString(setting), fields);
 for (i = 0; i < fieldCt; i += 3)
     {