0a01f6a29963ba42555b02782532a5702b649f29 braney Fri Mar 10 10:18:36 2023 -0800 first cut at drawing base probability logos in wiggle tracks calculated at run time using an associated MAF related diff --git src/hg/lib/hgMaf.c src/hg/lib/hgMaf.c index f6a6a75..00667e7 100644 --- src/hg/lib/hgMaf.c +++ src/hg/lib/hgMaf.c @@ -179,30 +179,80 @@ dyStringAppendMultiC(org->dy, '.', fillSize); } } static int countAlpha(char *s) /* Count up number of alphabetic characters in string */ { int count = 0; char c; while ((c = *s++) != 0) if (isalpha(c)) ++count; return count; } +struct mafBaseProbs *hgMafProbs( + char *database, /* Database, must already have hSetDb to this */ + char *track, /* Name of MAF track */ + char *chrom, /* Chromosome (in database genome) */ + int start, int end, /* start/end in chromosome */ + char strand /* strand */ + ) +/* calculate the probability of each nucleotide in each column of a maf. */ +{ +struct mafAli *maf = hgMafFrag(database, track, chrom, start, end, '+', NULL, NULL); + +struct mafBaseProbs *probs; +int size = end - start; + +AllocArray(probs, size); +int count = 0; +unsigned letterBox[256]; + +int ii; +for(ii=0; ii < maf->textSize; ii++) + { + struct mafComp *comp = maf->components; + char ch = comp->text[ii]; + + if (ch == '-') + continue; + + memset(letterBox, 0, sizeof letterBox); + for(; comp; comp = comp->next) + { + if (comp->text == NULL) + continue; + letterBox[tolower(comp->text[ii])]++; + } + + double total = 0; + total += letterBox['a']; + total += letterBox['c']; + total += letterBox['g']; + total += letterBox['t']; + probs[count].aProb = letterBox['a'] / total; + probs[count].cProb = letterBox['c'] / total; + probs[count].gProb = letterBox['g'] / total; + probs[count].tProb = letterBox['t'] / total; + count++; + } + +return probs; +} + struct mafAli *hgMafFrag( char *database, /* Database, must already have hSetDb to this */ char *track, /* Name of MAF track */ char *chrom, /* Chromosome (in database genome) */ int start, int end, /* start/end in chromosome */ char strand, /* Chromosome strand. */ char *outName, /* Optional name to use in first component */ struct slName *orderList /* Optional order of organisms. */ ) /* mafFrag- Extract maf sequences for a region from database. * This creates a somewhat unusual MAF that extends from start * to end whether or not there are actually alignments. Where * there are no alignments (or alignments missing a species) * a . character fills in. The score is always zero, and * the sources just indicate the species. You can mafFree this