38caeb2a3929a04b5e1cb518dd74e60eba6a9ccf angie Wed Feb 3 19:10:41 2021 -0800 Don't hardcode VCF_MAX_COLUMNS -- count the number of actual columns in header. diff --git src/lib/vcf.c src/lib/vcf.c index 246fac0..cbb98d5 100644 --- src/lib/vcf.c +++ src/lib/vcf.c @@ -390,45 +390,41 @@ * fixed; make sure the names of fixed columns are as expected. */ { if (! sameString(exp1, words[ix])) { if (exp2 == NULL) vcfFileErr(vcff, "Expected column %d's name in header to be \"%s\" but got \"%s\"", ix+1, exp1, words[ix]); else if (! sameString(exp2, words[ix])) vcfFileErr(vcff, "Expected column %d's name in header to be \"%s\" or \"%s\" " "but got \"%s\"", ix+1, exp1, exp2, words[ix]); } } #define expectColumnName(vcff, exp, words, ix) expectColumnName2(vcff, exp, NULL, words, ix) -// There might be a whole lot of genotype columns... -#define VCF_MAX_COLUMNS 256 * 1024 #define VCF_MIN_COLUMNS 8 char *vcfDefaultHeader = "#CHROM POS ID REF ALT QUAL FILTER INFO"; /* Default header if we have none. */ static void parseColumnHeaderRow(struct vcfFile *vcff, char *line) /* Make sure column names are as we expect, and store genotype sample IDs if any are given. */ { -char *words[VCF_MAX_COLUMNS]; -int wordCount = chopLine(line+1, words); -if (wordCount >= VCF_MAX_COLUMNS) - vcfFileErr(vcff, "header contains at least %d columns; " - "VCF_MAX_COLUMNS may need to be increased in vcf.c!", VCF_MAX_COLUMNS); +int wordCount = chopLine(line+1, NULL); +char *words[wordCount]; +chopLine(line+1, words); if (wordCount < VCF_MIN_COLUMNS) errAbort("VCF header missing at least one of the required VCF fields"); expectColumnName(vcff, "CHROM", words, 0); expectColumnName(vcff, "POS", words, 1); expectColumnName(vcff, "ID", words, 2); expectColumnName(vcff, "REF", words, 3); expectColumnName(vcff, "ALT", words, 4); expectColumnName2(vcff, "QUAL", "PROB", words, 5); expectColumnName(vcff, "FILTER", words, 6); expectColumnName(vcff, "INFO", words, 7); if (wordCount > VCF_MIN_COLUMNS) { expectColumnName(vcff, "FORMAT", words, 8); if (wordCount < 10) vcfFileErr(vcff, "FORMAT column is given, but no sample IDs for genotype columns...?"); @@ -739,31 +735,31 @@ // causing the wordCount to be too high by 1: { int expected = 8; if (vcff->genotypeCount > 0) expected = 9 + vcff->genotypeCount; if (wordCount == expected+1 && words[expected][0] == '\0') wordCount--; lineFileExpectWords(vcff->lf, expected, wordCount); return wordCount; } struct vcfRecord *vcfNextRecord(struct vcfFile *vcff) /* Parse the words in the next line from vcff into a vcfRecord. Return NULL at end of file. * Note: this does not store record in vcff->records! */ { -char *words[VCF_MAX_COLUMNS]; +char *words[10 + vcff->genotypeCount]; int wordCount; if ((wordCount = lineFileChopTab(vcff->lf, words)) <= 0) return NULL; wordCount = checkWordCount(vcff, words, wordCount); return vcfRecordFromRow(vcff, words); } static boolean noAltAllele(char **alleles, int alleleCount) /* Return true if there is no alternate allele (missing value ".") or the given alternate allele * is the same as the reference allele. */ { return (alleleCount == 2 && (sameString(alleles[0], alleles[1]) || sameString(".", alleles[1]))); } @@ -986,31 +982,31 @@ { struct lineFile *lf = NULL; if (startsWith("http://", fileOrUrl) || startsWith("ftp://", fileOrUrl) || startsWith("https://", fileOrUrl)) lf = netLineFileOpen(fileOrUrl); else lf = lineFileMayOpen(fileOrUrl, TRUE); struct vcfFile *vcff = vcfFileHeaderFromLineFile(lf, maxErr); if (vcff && chrom != NULL) { char *line = NULL; while (lineFileNextReal(vcff->lf, &line)) { char lineCopy[strlen(line)+1]; safecpy(lineCopy, sizeof(lineCopy), line); - char *words[VCF_MAX_COLUMNS]; + char *words[10 + vcff->genotypeCount]; int wordCount = chopTabs(lineCopy, words); wordCount = checkWordCount(vcff, words, wordCount); struct vcfRecord *record = vcfRecordFromRow(vcff, words); if (chromsMatch(chrom, record->chrom)) { if (record->chromEnd < start) continue; else { lineFileReuse(vcff->lf); break; } } } }