6212068bffa294be01c27e5e34f7b51def22f25a
angie
  Tue Apr 12 17:58:51 2022 -0700
Treat '-' bases as 'N' when counting differing bases between ref and input seq for -maxDiff.

diff --git src/hg/utils/faToVcf/faToVcf.c src/hg/utils/faToVcf/faToVcf.c
index 0ff67ae..e182c11 100644
--- src/hg/utils/faToVcf/faToVcf.c
+++ src/hg/utils/faToVcf/faToVcf.c
@@ -106,42 +106,42 @@
         if (hashLookup(excludedSeqs, seq->name))
             excludeCount++;
         else
             slAddHead(&newList, seq);
         }
     hashFree(&excludedSeqs);
     slReverse(&newList);
     sequences = newList;
     verbose(2, "Excluded %d sequences named in %s (%d sequences remaining including reference)\n",
             excludeCount, excludeFile, slCount(sequences));
     }
 return sequences;
 }
 
 static int countDiffs(struct dnaSeq *ref, struct dnaSeq *seq)
-/* Return the number of bases that differ between ref and seq ignoring 'N'. */
+/* Return the number of bases that differ between ref and seq ignoring 'N' and '-'. */
 {
 if (ref->size != seq->size)
     errAbort("countDiffs: expecting equally sized sequences but %s size %d != %s size %d",
              ref->name, ref->size, seq->name, seq->size);
 int diffs = 0;
 int i;
 for (i = 0;  i < ref->size;  i++)
     {
     char refBase = toupper(ref->dna[i]);
     char seqBase = toupper(seq->dna[i]);
-    if (refBase != 'N' && seqBase != 'N' && seqBase != refBase)
+    if (refBase != 'N' && seqBase != 'N' && seqBase != '-' && seqBase != refBase)
         diffs++;
     }
 return diffs;
 }
 
 static struct dnaSeq *filterMaxDiff(struct dnaSeq *sequences)
 /* If -maxDiff was passed in, remove any sequences with more than that number of differences
  * from the reference (ignoring Ns but not IUPAC ambiguous bases). */
 {
 int maxDiff = optionInt("maxDiff", 0);
 if (maxDiff > 0)
     {
     int excludeCount = 0;
     struct dnaSeq *ref = sequences;
     struct dnaSeq *newList = NULL, *seq, *nextSeq = NULL;