e38dd775f9552b3d050b6910c0989be5aba715f1 angie Fri Jan 19 12:54:23 2018 -0800 'Variants' with no alternate allele may come in two forms: alt == '.' or alt == ref. In either case, filter down to ref only. diff --git src/lib/vcf.c src/lib/vcf.c index 183bb82..f980d12 100644 --- src/lib/vcf.c +++ src/lib/vcf.c @@ -743,38 +743,49 @@ return wordCount; } struct vcfRecord *vcfNextRecord(struct vcfFile *vcff) /* Parse the words in the next line from vcff into a vcfRecord. Return NULL at end of file. * Note: this does not store record in vcff->records! */ { char *words[VCF_MAX_COLUMNS]; int wordCount; if ((wordCount = lineFileChopTab(vcff->lf, words)) <= 0) return NULL; wordCount = checkWordCount(vcff, words, wordCount); return vcfRecordFromRow(vcff, words); } +static boolean noAltAllele(char **alleles, int alleleCount) +/* Return true if there is no alternate allele (missing value ".") or the given alternate allele + * is the same as the reference allele. */ +{ +return (alleleCount == 2 && + (sameString(alleles[0], alleles[1]) || sameString(".", alleles[1]))); +} + static boolean allelesHavePaddingBase(char **alleles, int alleleCount) /* Examine alleles to see if they either a) all start with the same base or * b) include a symbolic or 0-length allele. In either of those cases, there * must be an initial padding base that we'll need to trim from non-symbolic * alleles. */ { if (sameString(alleles[0], "-")) return FALSE; +else if (noAltAllele(alleles, alleleCount)) + // Don't trim assertion of no change (ref == alt) + return FALSE; boolean hasPaddingBase = TRUE; char firstBase = '\0'; if (isAllNt(alleles[0], strlen(alleles[0]) +1)) //#*** FIXME isAllNt ignores last base in string!!! always TRUE for len=1 firstBase = alleles[0][0]; int i; for (i = 1; i < alleleCount; i++) { if (sameString(alleles[i], "-")) { hasPaddingBase = FALSE; break; } else if (isAllNt(alleles[i], strlen(alleles[i]) +1)) //#*** FIXME isAllNt ignores last base in string!!! always TRUE for len=1 @@ -842,30 +853,33 @@ * have the same char. */ { int i; char refEnd = ends[0][0]; for (i = 0; i < count; i++) { if (ends[i] < starts[i] || ends[i][0] != refEnd) return FALSE; } return TRUE; } static int countIdenticalBasesRight(char **alleles, int alCount) /* Return the number of bases that are identical at the end of each allele (usually 0). */ { +if (noAltAllele(alleles, alCount)) + // Don't trim assertion of no change (ref == alt) + return 0; char *alleleEnds[alCount]; int i; for (i = 0; i < alCount; i++) { int alLen = strlen(alleles[i]); // If any allele is symbolic, don't try to trim. if (sameString(alleles[i], "-") || !isAllNt(alleles[i], alLen +1)) //#*** FIXME isAllNt ignores last base in string!!! always TRUE for len=1 return 0; alleleEnds[i] = alleles[i] + alLen-1; } int trimmedBases = 0; while (allEndsGEStartsAndIdentical(alleles, alleleEnds, alCount)) { @@ -1474,37 +1488,43 @@ safecpy(altAlCopy, sizeof(altAlCopy), altAlleles); chopByChar(altAlCopy, ',', &(alleles[1]), alCount-1); int i; if (allelesHavePaddingBase(alleles, alCount)) { // Skip padding base (unless we have a symbolic allele): for (i = 0; i < alCount; i++) if (isAllNt(alleles[i], strlen(alleles[i]) +1)) //#*** FIXME isAllNt ignores last base in string!!! always TRUE for len=1 alleles[i]++; } // Having dealt with left padding base, now look for identical bases on the right: int trimmedBases = countIdenticalBasesRight(alleles, alCount); // Build a /-separated allele string, trimming bases on the right if necessary: dyStringClear(dy); +if (noAltAllele(alleles, alCount)) + alCount = 1; for (i = 0; i < alCount; i++) { char *allele = alleles[i]; if (!sameString(allele, ".")) { - if (i > 0) + if (i != 0) + { + if (sameString(alleles[0], allele)) + continue; dyStringAppendC(dy, '/'); + } if (allele[trimmedBases] == '\0') dyStringAppendC(dy, '-'); else dyStringAppendN(dy, allele, strlen(allele)-trimmedBases); } } return dy->string; } static void vcfWriteWordArrayWithSep(FILE *f, int count, char **words, char sep) /* Write words joined by sep to f (or, if count is zero, "."). */ { if (count < 1) fputc('.', f); else