f81b7f500a81f07abeacdab89886d0c82cef19e0 hiram Thu Jul 18 15:54:08 2019 -0700 broken code to discover GAPS in sequence refs #23734 diff --git src/utils/findMotif/findMotif.c src/utils/findMotif/findMotif.c index a23cf2b..cece5ac 100644 --- src/utils/findMotif/findMotif.c +++ src/utils/findMotif/findMotif.c @@ -70,30 +70,31 @@ * #define U_BASE_VAL 0 3 * #define C_BASE_VAL 1 1 * #define A_BASE_VAL 2 0 * #define G_BASE_VAL 3 2 * These do not work for the XOR comparison bit count, * using the ones below instead */ static int dnaUtilBases[4]; /* translation values from dnautil.h */ #define A_BASE 0 #define C_BASE 1 #define G_BASE 2 #define T_BASE 3 #define U_BASE 3 +#define N_BASE 4 static char bases[4]; /* for two-bits to ascii conversion */ static void initBases() /* set up the translation matrix for two-bits to ascii */ { bases[A_BASE] = 'A'; bases[C_BASE] = 'C'; bases[G_BASE] = 'G'; bases[T_BASE] = 'T'; dnaUtilBases[A_BASE_VAL] = A_BASE; dnaUtilBases[C_BASE_VAL] = C_BASE; dnaUtilBases[G_BASE_VAL] = G_BASE; dnaUtilBases[T_BASE_VAL] = T_BASE; } @@ -158,30 +159,34 @@ boolean inGap = FALSE; unsigned long long gapCount = 0; mask = 3; for (i=1; i < motifLen; ++i ) mask = (mask << 2) | 3; verbose(3, "#\tsequence: %s size: %d, motifMask: %#llx\n", seq->name, seq->size, mask); verbose(3, "#\tmotif numerical value: %llu (%#llx)\n", posNeedle, posNeedle); /* Need "chrom" */ dna = seq->dna; for (i=0; i < seq->size; ++i) { ++chromPosition; + int nVal = ntVal[(int)dna[i]]; + if (nVal < 0) + val = N_BASE; + else val = dnaUtilBases[ntVal[(int)dna[i]]]; switch (val) { case T_BASE: case C_BASE: case A_BASE: case G_BASE: incomingVal = mask & ((incomingVal << 2) | val); if (! incomingLength) { if (inGap && (((long long int)chromPosition - (long long int)enterGap) > 0)) { ++gapCount; verbose(3,