src/lib/iupac.c ad3a176457ad1d21a0fedc47f349ec3484e751f1

ad3a176457ad1d21a0fedc47f349ec3484e751f1
angie
  Fri Feb 9 16:27:51 2018 -0800
Cleaning up some old ugliness about the size parameter to isAllDna and isAllNt.
hgc's printSnpAlignment code that parsed snpNNN.fa was using lineSize as length
but lineSize is length+1.  Then isAllDna was written with "i<size-1" as the
loop test instead of "i < size".  I didn't fix that properly when I separated
out isAllNt from isAllDna.
Later, I (re?)discovered that isAllNt needed length+1 as its size and just
added some FIXME comments.  Thanks Brian R for prodding me to actually fix it.
refs #20895

diff --git src/lib/iupac.c src/lib/iupac.c
index 9289d51..1fe9d44 100644
--- src/lib/iupac.c
+++ src/lib/iupac.c
@@ -1,216 +1,203 @@
 /* iupac - routines to help cope with IUPAC ambiguity codes in DNA sequence. */
 
 /* Copyright (C) 2012 The Regents of the University of California 
  * See README in this or parent directory for licensing information. */
 
 #include "common.h"
 #include "dnautil.h"
 #include "iupac.h"
 
 boolean iupacMatchLower(char iupac, char dna)
 /* See if iupac ambiguity code matches dna character where
  * both are lower case */
 {
 switch (iupac)
     {
     case 'a':
         return dna == 'a';
     case 'c':
         return dna == 'c';
     case 'g':
         return dna == 'g';
     case 't':
     case 'u':
         return dna == 't';
     case 'r':
         return dna == 'a' || dna == 'g';
     case 'y':
         return dna == 'c' || dna == 't';
     case 's':
         return dna == 'g' || dna == 'c';
     case 'w':
         return dna == 'a' || dna == 't';
     case 'k':
         return dna == 'g' || dna == 't';
     case 'm':
         return dna == 'a' || dna == 'c';
     case 'b':
         return dna == 'c' || dna == 'g' || dna == 't';
     case 'd':
         return dna == 'a' || dna == 'g' || dna == 't';
     case 'h':
         return dna == 'a' || dna == 'c' || dna == 't';
     case 'v':
         return dna == 'a' || dna == 'c' || dna == 'g';
     case 'n':
         return TRUE;
     default:
         errAbort("Unrecognized IUPAC code '%c'", iupac);
 	return FALSE;   // Not actually used but prevent compiler complaints
     }
 }
 
 boolean iupacMatch(char iupac, char dna)
 /* See if iupac ambiguity code matches dna character */
 {
 return iupacMatchLower(tolower(iupac), tolower(dna));
 }
 
 boolean isIupacLower(char c)
 /* See if iupac c is a legal (lower case) iupac char */
 {
 switch (c)
     {
     case 'a':
     case 'c':
     case 'g':
     case 't':
     case 'u':
     case 'r':
     case 'y':
     case 's':
     case 'w':
     case 'k':
     case 'm':
     case 'b':
     case 'd':
     case 'h':
     case 'v':
     case 'n':
         return TRUE;
     default:
 	return FALSE;
     }
 }
 
 boolean isIupac(char c)
 /* See if iupac c is a legal iupac char */
 {
 return isIupacLower(tolower(c));
 }
 
 void iupacFilter(char *in, char *out)
 /* Filter out non-DNA non-UIPAC ambiguity code characters and change to lower case. */
 {
 char c;
 while ((c = *in++) != 0)
     {
     c = tolower(c);
     if (isIupacLower(c))
        *out++ = c;
     }
 *out++ = 0;
 }
 
 boolean anyIupac(char *s)
 /* Return TRUE if there are any IUPAC ambiguity codes in s */
 {
 dnaUtilOpen();
-int c;
+char c;
 while ((c = *s++) != 0)
     {
-    switch (c)
-	{
-	case 'r':
-	case 'y':
-	case 's':
-	case 'w':
-	case 'k':
-	case 'm':
-	case 'b':
-	case 'd':
-	case 'h':
-	case 'v':
-	case 'n':
+    if (isIupacAmbiguous(c))
         return TRUE;
     }
-    }
 return FALSE;
 }
 
 char iupacComplementBaseLower(char iupac)
 /* Return IUPAC complement for a single base */
 {
 switch (iupac)
     {
     case 'a':
         return 't';
     case 'c':
         return 'g';
     case 'g':
         return 'c';
     case 't':
     case 'u':
         return 'a';
     case 'r':
 	return 'y';
     case 'y':
 	return 'r';
     case 's':
 	return 's';
     case 'w':
 	return 'w';
     case 'k':
 	return 'm';
     case 'm':
 	return 'k';
     case 'b':
 	return 'v';
     case 'd':
 	return 'h';
     case 'h':
 	return 'd';
     case 'v':
 	return 'b';
     case 'n':
 	return 'n';
     default:
         errAbort("Unrecognized IUPAC code '%c'", iupac);
 	return 0;   // Just to keep compiler from complaining, control won't reach here.
     }
 }
 
 void iupacComplementLower(char *iupac, int iuSize)
 /* Return IUPAC complement many bases. Assumes iupac is lower case. */
 {
 int i;
 for (i=0; i<iuSize; ++i)
     iupac[i] = iupacComplementBaseLower(iupac[i]);
 }
 
 void iupacReverseComplement(char *iu, int iuSize)
 /* Reverse complement a string containing DNA and IUPAC codes. Result will be always
  * lower case. */
 {
 toLowerN(iu, iuSize);
 reverseBytes(iu, iuSize);
 iupacComplementLower(iu, iuSize);
 }
 
 boolean iupacMatchStart(char *iupacPrefix, char *dnaString)
 /* Return TRUE if start of DNA is compatible with iupac */
 {
 char iupac;
 while ((iupac = *iupacPrefix++) != 0)
     {
     if (!iupacMatch(iupac, *dnaString++))
         return FALSE;
     }
 return TRUE;
 }
 
 char *iupacIn(char *needle, char *haystack)
 /* Return first place in haystack (DNA) that matches needle that may contain IUPAC codes. */
 {
 int needleSize = strlen(needle);
 int haySize = strlen(haystack);
 char *endOfHay = haystack + haySize - needleSize;
 char *h;
 for (h = haystack; h<=endOfHay; ++h)
     {
     if (iupacMatchStart(needle, h))
         return h;
     }
 return NULL;
 }