b4339e1d0387e40589c2e1f16727f8f1d882cd1e angie Wed May 19 09:32:32 2021 -0700 faToVcf: when sequences aren't the expected size, mention that non-IUPAC chars are silently ignored since that's a common cause with non-UCSC-processed seqs. diff --git src/hg/utils/faToVcf/faToVcf.c src/hg/utils/faToVcf/faToVcf.c index f947a86..d2eb9a4 100644 --- src/hg/utils/faToVcf/faToVcf.c +++ src/hg/utils/faToVcf/faToVcf.c @@ -103,31 +103,33 @@ * sequence in the file, then move the reference sequence to the head of the list. */ { verbose(2, "Reading sequences from %s\n", faFile); struct dnaSeq *sequences = faReadAllMixed(faFile); int seqCount = slCount(sequences); verbose(2, "Read %d sequences.\n", seqCount); if (seqCount < 2) errAbort("faToVcf: expecting multiple sequences in %s but found only %d.", faFile, seqCount); int seqSize = sequences->size; struct dnaSeq *seq; for (seq = sequences->next; seq != NULL; seq = seq->next) if (seq->size != seqSize) errAbort("faToVcf: first sequence in %s (%s) has size %d, but sequence %s has size %d. " - "All sequences must have the same size.", + "All sequences must have the same size. " + "(Does the input contain non-IUPAC characters? Non-IUPAC characters are ignored. " + "Masked bases are expected to be 'N'. Gaps are expected to be '-'.)", faFile, sequences->name, seqSize, seq->name, seq->size); char *refName = optionVal("ref", sequences->name); if (differentString(sequences->name, refName)) { verbose(2, "Using %s as reference.\n", refName); struct dnaSeq *seq; for (seq = sequences; seq->next != NULL; seq = seq->next) { if (sameString(seq->next->name, refName)) { struct dnaSeq *ref = seq->next; seq->next = ref->next; ref->next = sequences; sequences = ref;