ad3a176457ad1d21a0fedc47f349ec3484e751f1
angie
  Fri Feb 9 16:27:51 2018 -0800
Cleaning up some old ugliness about the size parameter to isAllDna and isAllNt.
hgc's printSnpAlignment code that parsed snpNNN.fa was using lineSize as length
but lineSize is length+1.  Then isAllDna was written with "i<size-1" as the
loop test instead of "i < size".  I didn't fix that properly when I separated
out isAllNt from isAllDna.
Later, I (re?)discovered that isAllNt needed length+1 as its size and just
added some FIXME comments.  Thanks Brian R for prodding me to actually fix it.
refs #20895

diff --git src/lib/vcf.c src/lib/vcf.c
index f980d12..246faaf 100644
--- src/lib/vcf.c
+++ src/lib/vcf.c
@@ -764,43 +764,41 @@
 }
 
 static boolean allelesHavePaddingBase(char **alleles, int alleleCount)
 /* Examine alleles to see if they either a) all start with the same base or
  * b) include a symbolic or 0-length allele.  In either of those cases, there
  * must be an initial padding base that we'll need to trim from non-symbolic
  * alleles. */
 {
 if (sameString(alleles[0], "-"))
     return FALSE;
 else if (noAltAllele(alleles, alleleCount))
     // Don't trim assertion of no change (ref == alt)
     return FALSE;
 boolean hasPaddingBase = TRUE;
 char firstBase = '\0';
-if (isAllNt(alleles[0], strlen(alleles[0])
-            +1)) //#*** FIXME isAllNt ignores last base in string!!! always TRUE for len=1
+if (isAllNt(alleles[0], strlen(alleles[0])))
     firstBase = alleles[0][0];
 int i;
 for (i = 1;  i < alleleCount;  i++)
     {
     if (sameString(alleles[i], "-"))
         {
         hasPaddingBase = FALSE;
         break;
         }
-    else if (isAllNt(alleles[i], strlen(alleles[i])
-                     +1)) //#*** FIXME isAllNt ignores last base in string!!! always TRUE for len=1
+    else if (isAllNt(alleles[i], strlen(alleles[i])))
 	{
 	if (firstBase == '\0')
 	    firstBase = alleles[i][0];
 	if (alleles[i][0] != firstBase)
 	    // Different first base implies unpadded alleles.
 	    hasPaddingBase = FALSE;
 	}
     else if (sameString(alleles[i], "<X>") || sameString(alleles[i], "<*>"))
         {
         // Special case for samtools mpileup "<X>" or gVCF "<*>" (no alternate allele observed) --
         // being symbolic doesn't make this an indel and ref base is not necessarily padded.
         hasPaddingBase = FALSE;
         }
     else
 	{
@@ -824,32 +822,31 @@
  * record in hgc -- so return the original chromStart. */
 {
 unsigned int chromStartOrig = rec->chromStart;
 struct vcfFile *vcff = rec->file;
 if (rec->alleleCount > 1)
     {
     boolean hasPaddingBase = allelesHavePaddingBase(rec->alleles, rec->alleleCount);
     if (hasPaddingBase)
 	{
 	rec->chromStart++;
 	int i;
 	for (i = 0;  i < rec->alleleCount;  i++)
 	    {
 	    if (rec->alleles[i][1] == '\0')
 		rec->alleles[i] = vcfFilePooledStr(vcff, "-");
-	    else if (isAllNt(rec->alleles[i], strlen(rec->alleles[i])
-                             +1)) //#*** FIXME isAllNt ignores last base in string!!! always TRUE for len=1
+	    else if (isAllNt(rec->alleles[i], strlen(rec->alleles[i])))
 		rec->alleles[i] = vcfFilePooledStr(vcff, rec->alleles[i]+1);
 	    else // don't trim first character of symbolic allele
 		rec->alleles[i] = vcfFilePooledStr(vcff, rec->alleles[i]);
 	    }
 	}
     }
 return chromStartOrig;
 }
 
 static boolean allEndsGEStartsAndIdentical(char **starts, char **ends, int count)
 /* Given two arrays with <count> elements, return true if all strings in ends[] are
  * greater than or equal to the corresponding strings in starts[], and all ends[]
  * have the same char. */
 {
 int i;
@@ -863,32 +860,31 @@
 }
 
 static int countIdenticalBasesRight(char **alleles, int alCount)
 /* Return the number of bases that are identical at the end of each allele (usually 0). */
 {
 if (noAltAllele(alleles, alCount))
     // Don't trim assertion of no change (ref == alt)
     return 0;
 char *alleleEnds[alCount];
 int i;
 for (i = 0;  i < alCount;  i++)
     {
     int alLen = strlen(alleles[i]);
     // If any allele is symbolic, don't try to trim.
     if (sameString(alleles[i], "-") ||
-        !isAllNt(alleles[i], alLen
-                 +1)) //#*** FIXME isAllNt ignores last base in string!!! always TRUE for len=1
+        !isAllNt(alleles[i], alLen))
 	return 0;
     alleleEnds[i] = alleles[i] + alLen-1;
     }
 int trimmedBases = 0;
 while (allEndsGEStartsAndIdentical(alleles, alleleEnds, alCount))
     {
     trimmedBases++;
     // Trim identical last base of alleles and move alleleEnds[] items back.
     for (i = 0;  i < alCount;  i++)
 	alleleEnds[i]--;
     }
 return trimmedBases;
 }
 
 unsigned int vcfRecordTrimAllelesRight(struct vcfRecord *rec)
@@ -1480,32 +1476,31 @@
 // VCF reference allele gets its own column:
 char *refAllele = words[3];
 char *altAlleles = words[4];
 // Make a vcfRecord-like allele array (ref in [0], alts after) so we can check for padding base:
 int alCount = 1 + countChars(altAlleles, ',') + 1;
 char *alleles[alCount];
 alleles[0] = refAllele;
 char altAlCopy[strlen(altAlleles)+1];
 safecpy(altAlCopy, sizeof(altAlCopy), altAlleles);
 chopByChar(altAlCopy, ',', &(alleles[1]), alCount-1);
 int i;
 if (allelesHavePaddingBase(alleles, alCount))
     {
     // Skip padding base (unless we have a symbolic allele):
     for (i = 0;  i < alCount;  i++)
-	if (isAllNt(alleles[i], strlen(alleles[i])
-                    +1)) //#*** FIXME isAllNt ignores last base in string!!! always TRUE for len=1
+	if (isAllNt(alleles[i], strlen(alleles[i])))
 	    alleles[i]++;
     }
 // Having dealt with left padding base, now look for identical bases on the right:
 int trimmedBases = countIdenticalBasesRight(alleles, alCount);
 // Build a /-separated allele string, trimming bases on the right if necessary:
 dyStringClear(dy);
 if (noAltAllele(alleles, alCount))
     alCount = 1;
 for (i = 0;  i < alCount;  i++)
     {
     char *allele = alleles[i];
     if (!sameString(allele, "."))
         {
         if (i != 0)
             {