b4339e1d0387e40589c2e1f16727f8f1d882cd1e
angie
  Wed May 19 09:32:32 2021 -0700
faToVcf: when sequences aren't the expected size, mention that non-IUPAC chars are silently ignored since that's a common cause with non-UCSC-processed seqs.

diff --git src/hg/utils/faToVcf/faToVcf.c src/hg/utils/faToVcf/faToVcf.c
index f947a86..d2eb9a4 100644
--- src/hg/utils/faToVcf/faToVcf.c
+++ src/hg/utils/faToVcf/faToVcf.c
@@ -103,31 +103,33 @@
  * sequence in the file, then move the reference sequence to the head of the list. */
 {
 verbose(2, "Reading sequences from %s\n", faFile);
 struct dnaSeq *sequences = faReadAllMixed(faFile);
 int seqCount = slCount(sequences);
 verbose(2, "Read %d sequences.\n", seqCount);
 
 if (seqCount < 2)
     errAbort("faToVcf: expecting multiple sequences in %s but found only %d.", faFile, seqCount);
 
 int seqSize = sequences->size;
 struct dnaSeq *seq;
 for (seq = sequences->next;  seq != NULL;  seq = seq->next)
     if (seq->size != seqSize)
         errAbort("faToVcf: first sequence in %s (%s) has size %d, but sequence %s has size %d. "
-                 "All sequences must have the same size.",
+                 "All sequences must have the same size.  "
+                 "(Does the input contain non-IUPAC characters?  Non-IUPAC characters are ignored.  "
+                 "Masked bases are expected to be 'N'.  Gaps are expected to be '-'.)",
                  faFile, sequences->name, seqSize, seq->name, seq->size);
 
 char *refName = optionVal("ref", sequences->name);
 if (differentString(sequences->name, refName))
     {
     verbose(2, "Using %s as reference.\n", refName);
     struct dnaSeq *seq;
     for (seq = sequences;  seq->next != NULL;  seq = seq->next)
         {
         if (sameString(seq->next->name, refName))
             {
             struct dnaSeq *ref = seq->next;
             seq->next = ref->next;
             ref->next = sequences;
             sequences = ref;