ad3a176457ad1d21a0fedc47f349ec3484e751f1 angie Fri Feb 9 16:27:51 2018 -0800 Cleaning up some old ugliness about the size parameter to isAllDna and isAllNt. hgc's printSnpAlignment code that parsed snpNNN.fa was using lineSize as length but lineSize is length+1. Then isAllDna was written with "i<size-1" as the loop test instead of "i < size". I didn't fix that properly when I separated out isAllNt from isAllDna. Later, I (re?)discovered that isAllNt needed length+1 as its size and just added some FIXME comments. Thanks Brian R for prodding me to actually fix it. refs #20895 diff --git src/lib/iupac.c src/lib/iupac.c index 9289d51..1fe9d44 100644 --- src/lib/iupac.c +++ src/lib/iupac.c @@ -1,216 +1,203 @@ /* iupac - routines to help cope with IUPAC ambiguity codes in DNA sequence. */ /* Copyright (C) 2012 The Regents of the University of California * See README in this or parent directory for licensing information. */ #include "common.h" #include "dnautil.h" #include "iupac.h" boolean iupacMatchLower(char iupac, char dna) /* See if iupac ambiguity code matches dna character where * both are lower case */ { switch (iupac) { case 'a': return dna == 'a'; case 'c': return dna == 'c'; case 'g': return dna == 'g'; case 't': case 'u': return dna == 't'; case 'r': return dna == 'a' || dna == 'g'; case 'y': return dna == 'c' || dna == 't'; case 's': return dna == 'g' || dna == 'c'; case 'w': return dna == 'a' || dna == 't'; case 'k': return dna == 'g' || dna == 't'; case 'm': return dna == 'a' || dna == 'c'; case 'b': return dna == 'c' || dna == 'g' || dna == 't'; case 'd': return dna == 'a' || dna == 'g' || dna == 't'; case 'h': return dna == 'a' || dna == 'c' || dna == 't'; case 'v': return dna == 'a' || dna == 'c' || dna == 'g'; case 'n': return TRUE; default: errAbort("Unrecognized IUPAC code '%c'", iupac); return FALSE; // Not actually used but prevent compiler complaints } } boolean iupacMatch(char iupac, char dna) /* See if iupac ambiguity code matches dna character */ { return iupacMatchLower(tolower(iupac), tolower(dna)); } boolean isIupacLower(char c) /* See if iupac c is a legal (lower case) iupac char */ { switch (c) { case 'a': case 'c': case 'g': case 't': case 'u': case 'r': case 'y': case 's': case 'w': case 'k': case 'm': case 'b': case 'd': case 'h': case 'v': case 'n': return TRUE; default: return FALSE; } } boolean isIupac(char c) /* See if iupac c is a legal iupac char */ { return isIupacLower(tolower(c)); } void iupacFilter(char *in, char *out) /* Filter out non-DNA non-UIPAC ambiguity code characters and change to lower case. */ { char c; while ((c = *in++) != 0) { c = tolower(c); if (isIupacLower(c)) *out++ = c; } *out++ = 0; } boolean anyIupac(char *s) /* Return TRUE if there are any IUPAC ambiguity codes in s */ { dnaUtilOpen(); -int c; +char c; while ((c = *s++) != 0) { - switch (c) - { - case 'r': - case 'y': - case 's': - case 'w': - case 'k': - case 'm': - case 'b': - case 'd': - case 'h': - case 'v': - case 'n': + if (isIupacAmbiguous(c)) return TRUE; } - } return FALSE; } char iupacComplementBaseLower(char iupac) /* Return IUPAC complement for a single base */ { switch (iupac) { case 'a': return 't'; case 'c': return 'g'; case 'g': return 'c'; case 't': case 'u': return 'a'; case 'r': return 'y'; case 'y': return 'r'; case 's': return 's'; case 'w': return 'w'; case 'k': return 'm'; case 'm': return 'k'; case 'b': return 'v'; case 'd': return 'h'; case 'h': return 'd'; case 'v': return 'b'; case 'n': return 'n'; default: errAbort("Unrecognized IUPAC code '%c'", iupac); return 0; // Just to keep compiler from complaining, control won't reach here. } } void iupacComplementLower(char *iupac, int iuSize) /* Return IUPAC complement many bases. Assumes iupac is lower case. */ { int i; for (i=0; i<iuSize; ++i) iupac[i] = iupacComplementBaseLower(iupac[i]); } void iupacReverseComplement(char *iu, int iuSize) /* Reverse complement a string containing DNA and IUPAC codes. Result will be always * lower case. */ { toLowerN(iu, iuSize); reverseBytes(iu, iuSize); iupacComplementLower(iu, iuSize); } boolean iupacMatchStart(char *iupacPrefix, char *dnaString) /* Return TRUE if start of DNA is compatible with iupac */ { char iupac; while ((iupac = *iupacPrefix++) != 0) { if (!iupacMatch(iupac, *dnaString++)) return FALSE; } return TRUE; } char *iupacIn(char *needle, char *haystack) /* Return first place in haystack (DNA) that matches needle that may contain IUPAC codes. */ { int needleSize = strlen(needle); int haySize = strlen(haystack); char *endOfHay = haystack + haySize - needleSize; char *h; for (h = haystack; h<=endOfHay; ++h) { if (iupacMatchStart(needle, h)) return h; } return NULL; }