0a01f6a29963ba42555b02782532a5702b649f29
braney
  Fri Mar 10 10:18:36 2023 -0800
first cut at drawing base probability logos in wiggle tracks calculated
at run time using an associated MAF
related

diff --git src/hg/lib/hgMaf.c src/hg/lib/hgMaf.c
index f6a6a75..00667e7 100644
--- src/hg/lib/hgMaf.c
+++ src/hg/lib/hgMaf.c
@@ -179,30 +179,80 @@
 	dyStringAppendMultiC(org->dy, '.', fillSize);
     }
 }
 
 static int countAlpha(char *s)
 /* Count up number of alphabetic characters in string */
 {
 int count = 0;
 char c;
 while ((c = *s++) != 0)
     if (isalpha(c))
         ++count;
 return count;
 }
 
+struct mafBaseProbs *hgMafProbs(
+	char *database,     /* Database, must already have hSetDb to this */
+	char *track,        /* Name of MAF track */
+	char *chrom,        /* Chromosome (in database genome) */
+	int start, int end, /* start/end in chromosome */
+        char strand         /* strand */
+        )
+/* calculate the probability of each nucleotide in each column of a maf. */
+{
+struct mafAli *maf = hgMafFrag(database, track, chrom, start, end, '+', NULL, NULL);
+
+struct mafBaseProbs *probs;
+int size = end - start;
+
+AllocArray(probs, size);
+int count = 0;
+unsigned letterBox[256]; 
+
+int ii;
+for(ii=0; ii < maf->textSize; ii++)
+    {
+    struct mafComp *comp = maf->components;
+    char ch = comp->text[ii];
+
+    if (ch == '-')
+        continue;
+
+    memset(letterBox, 0, sizeof letterBox);
+    for(; comp; comp = comp->next)
+        {
+        if (comp->text == NULL)
+            continue;
+        letterBox[tolower(comp->text[ii])]++;
+        }
+
+    double total = 0;
+    total += letterBox['a'];
+    total += letterBox['c'];
+    total += letterBox['g'];
+    total += letterBox['t'];
+    probs[count].aProb = letterBox['a'] / total;
+    probs[count].cProb = letterBox['c'] / total;
+    probs[count].gProb = letterBox['g'] / total;
+    probs[count].tProb = letterBox['t'] / total;
+    count++;
+    }   
+
+return probs;
+}
+
 struct mafAli *hgMafFrag(
 	char *database,     /* Database, must already have hSetDb to this */
 	char *track,        /* Name of MAF track */
 	char *chrom,        /* Chromosome (in database genome) */
 	int start, int end, /* start/end in chromosome */
 	char strand,        /* Chromosome strand. */
 	char *outName,      /* Optional name to use in first component */
 	struct slName *orderList /* Optional order of organisms. */
 	)
 /* mafFrag- Extract maf sequences for a region from database.
  * This creates a somewhat unusual MAF that extends from start
  * to end whether or not there are actually alignments.  Where
  * there are no alignments (or alignments missing a species)
  * a . character fills in.   The score is always zero, and
  * the sources just indicate the species.  You can mafFree this