ad3a176457ad1d21a0fedc47f349ec3484e751f1
angie
  Fri Feb 9 16:27:51 2018 -0800
Cleaning up some old ugliness about the size parameter to isAllDna and isAllNt.
hgc's printSnpAlignment code that parsed snpNNN.fa was using lineSize as length
but lineSize is length+1.  Then isAllDna was written with "i<size-1" as the
loop test instead of "i < size".  I didn't fix that properly when I separated
out isAllNt from isAllDna.
Later, I (re?)discovered that isAllNt needed length+1 as its size and just
added some FIXME comments.  Thanks Brian R for prodding me to actually fix it.
refs #20895

diff --git src/hg/hgc/hgc.c src/hg/hgc/hgc.c
index 835a66a..e7c8d5d 100644
--- src/hg/hgc/hgc.c
+++ src/hg/hgc/hgc.c
@@ -245,30 +245,31 @@
 #include "mdb.h"
 #include "yaleGencodeAssoc.h"
 #include "itemDetailsHtml.h"
 #include "trackVersion.h"
 #include "numtsClick.h"
 #include "geneReviewsClick.h"
 #include "bigBed.h"
 #include "bigPsl.h"
 #include "bedTabix.h"
 #include "longRange.h"
 #include "hmmstats.h"
 #include "aveStats.h"
 #include "trix.h"
 #include "bPlusTree.h"
 #include "customFactory.h"
+#include "iupac.h"
 
 static char *rootDir = "hgcData";
 
 #define LINESIZE 70  /* size of lines in comp seq feature */
 
 struct cart *cart;	/* User's settings. */
 char *seqName;		/* Name of sequence we're working on. */
 int winStart, winEnd;   /* Bounds of sequence. */
 char *database;		/* Name of mySQL database. */
 char *organism;		/* Colloquial name of organism. */
 char *genome;		/* common name, e.g. Mouse, Human */
 char *scientificName;	/* Scientific name of organism. */
 
 struct hash *trackHash;	/* A hash of all tracks - trackDb valued */
 
@@ -16216,97 +16217,89 @@
 			  boldStart, boldEnd, tIsRc, tSeq->size, qSeq->size);
     }
 
 axtFree(&axt);
 hPrintf("</PRE>");
 }
 
 void printSnpAlignment(struct trackDb *tdb, struct snp *snp, int version)
 /* Get flanking sequences from table; align and print */
 {
 char *fileName = NULL;
 char *variation = NULL;
 
 char *line;
 struct lineFile *lf = NULL;
-int lineSize;
 static int maxFlank = 1000;
 static int lineWidth = 100;
 
 boolean gotVar = FALSE;
-boolean isNucleotide = TRUE;
 boolean leftFlankTrimmed = FALSE;
 boolean rightFlankTrimmed = FALSE;
 
 struct dyString *seqDbSnp5 = newDyString(512);
 struct dyString *seqDbSnp3 = newDyString(512);
 struct dyString *seqDbSnpTemp = newDyString(512);
 
 char *leftFlank = NULL;
 char *rightFlank = NULL;
 
 struct dnaSeq *dnaSeqDbSnp5 = NULL;
 struct dnaSeq *dnaSeqDbSnpO = NULL;
 struct dnaSeq *dnaSeqDbSnp3 = NULL;
 struct dnaSeq *seqDbSnp = NULL;
 struct dnaSeq *seqNib = NULL;
 
-int spaces = 0;
 int len5 = 0;
 int len3 = 0;
 int start = 0;
 int end = 0;
 int skipCount = 0;
 
 off_t offset = 0;
 
 fileName = getSnpSeqFile(tdb, version);
 if (!fileName)
     return;
 
 offset = getSnpSeqFileOffset(tdb, snp);
 if (offset == -1)
     return;
 
 lf = lineFileOpen(fileName, TRUE);
 lineFileSeek(lf, offset, SEEK_SET);
 /* skip the header line */
-lineFileNext(lf, &line, &lineSize);
+lineFileNext(lf, &line, NULL);
 if (!startsWith(">rs", line))
     errAbort("Expected FASTA header, got this line:\n%s\nat offset %lld "
 	     "in file %s", line, (long long)offset, fileName);
 
-while (lineFileNext(lf, &line, &lineSize))
+while (lineFileNext(lf, &line, NULL))
     {
-    spaces = countChars(line, ' ');
     stripString(line, " ");
-    lineSize = lineSize - spaces;
-    if (sameString(line, "N"))
-        isNucleotide = FALSE;
-    else
-        isNucleotide = isAllDna(line, lineSize);
-    if (lineSize > 2 && gotVar)
-        dyStringAppend(seqDbSnp3,line);
-    else if (lineSize > 2 && !gotVar)
-        dyStringAppend(seqDbSnp5,line);
-    else if (lineSize == 2 && !isNucleotide)
+    int len = strlen(line);
+    if (len == 0)
+        break;
+    else if (len == 1 && isIupacAmbiguous(line[0]))
         {
 	gotVar = TRUE;
 	variation = cloneString(line);
 	}
-    else if (lineSize == 1)
-        break;
+    else if (gotVar)
+        dyStringAppend(seqDbSnp3, line);
+    else
+        dyStringAppend(seqDbSnp5, line);
     }
 lineFileClose(&lf);
 
 if (variation == NULL)
     {
     printf("<P>Could not parse ambiguous SNP base out of dbSNP "
 	   "sequence, so can't display re-alignment of flanking sequences.\n");
     return;
     }
 
 /* trim */
 /* axtAffine has a limit of 100,000,000 bases for query x target */
 leftFlank = dyStringCannibalize(&seqDbSnp5);
 rightFlank = dyStringCannibalize(&seqDbSnp3);
 len5 = strlen(leftFlank);