afbe9e91a8f837a63978e78e9765ba3e039491ad larrym Mon Jul 5 20:14:58 2010 -0700 add dnaMotifBitScoreWithMarkovBg diff --git src/lib/dnaMotif.c src/lib/dnaMotif.c index 42b41bc..6cbff62 100644 --- src/lib/dnaMotif.c +++ src/lib/dnaMotif.c @@ -170,6 +170,42 @@ return p; } +static float dnaMotifSequenceProbLog(struct dnaMotif *motif, DNA *dna) +{ +float p = 0; +int i; +for (i=0; i<motif->columnCount; ++i) + { + switch (dna[i]) + { + case 'a': + case 'A': + p += motif->aProb[i]; + break; + case 'c': + case 'C': + p += motif->cProb[i]; + break; + case 'g': + case 'G': + p += motif->gProb[i]; + break; + case 't': + case 'T': + p += motif->tProb[i]; + break; + case 0: + warn("dna shorter than motif"); + internalErr(); + break; + default: + p += logBase2(0.25); + break; + } + } +return p; +} + char dnaMotifBestStrand(struct dnaMotif *motif, DNA *dna) /* Figure out which strand of DNA is better for probabalistic motif. */ { @@ -194,6 +230,42 @@ } +double dnaMotifBitScoreWithMarkovBg(struct dnaMotif *motif, DNA *dna, double mark2[5][5][5]) +/* Return logBase2-odds score of dna given a probabalistic motif using a 2nd-order markov model for the background. + motif and markd2 must be in log2 format. + Seq must contain an extra two bases at the front (i.e. we start scoring from dna + 2). */ +{ +double p = dnaMotifSequenceProbLog(motif, dna + 2); +double q = 0; +int i, index; +// XXXX assert somehow that length(dna) == motif->columnCount + 2? +dnaUtilOpen(); +for(i = 0, index = 2; i < motif->columnCount; i++, index++) + { + double tmp = mark2[ntVal5[(int) dna[index - 2]] + 1][ntVal5[(int) dna[index - 1]] + 1][ntVal5[(int) dna[index]] + 1]; +#if 0 + char buf[4]; + safencpy(buf, sizeof(buf), dna + index - 2, 3); + fprintf(stderr, "%s: tmp: %.6f\n", buf, tmp); +#endif + q += tmp; + } +return p - q; +} + + +void dnaMotifMakeLog2(struct dnaMotif *motif) +{ +int i; +for (i = 0; i < motif->columnCount; i++) + { + motif->aProb[i] = logBase2(motif->aProb[i]); + motif->cProb[i] = logBase2(motif->cProb[i]); + motif->gProb[i] = logBase2(motif->gProb[i]); + motif->tProb[i] = logBase2(motif->tProb[i]); + } +} + void dnaMotifNormalize(struct dnaMotif *motif) /* Make all columns of motif sum to one. */ {