4cab7f311cadce7428c1d7e1c4dfdcb0d3a43033 angie Wed Jul 27 10:32:19 2011 -0700 Feature #3710 (vcfTabix UI options): Added controls for selection of centervariant for haplotype clustering/sorting. Also, some hgc improvements: made the Genotype Details table into a collapsible section; better display of QUAL and FILTER column values, which also involved improving the representation of those columns in src/inc/vcf.h. diff --git src/lib/vcf.c src/lib/vcf.c index 65bc467..75300f1 100644 --- src/lib/vcf.c +++ src/lib/vcf.c @@ -414,30 +414,49 @@ if (vcff->majorVersion == 0) vcfFileErr(vcff, "missing ##fileformat= header line? Assuming 4.1."); if ((vcff->majorVersion != 4 || (vcff->minorVersion != 0 && vcff->minorVersion != 1)) && (vcff->majorVersion != 3)) vcfFileErr(vcff, "VCFv%d.%d not supported -- only v3.*, v4.0 or v4.1", vcff->majorVersion, vcff->minorVersion); // Next, one header line beginning with single "#" that names the columns: if (line == NULL) // EOF after metadata return vcff; parseColumnHeaderRow(vcff, line); return vcff; } +static void parseFilterColumn(struct vcfFile *vcff, struct vcfRecord *record, char *filterStr) +/* Transform ;-separated filter codes into count + string array. */ +{ +// We don't want to modify something allocated with vcfFilePooledStr because that uses +// hash element names for storage! So don't make a vcfFilePooledStr copy of filterStr and +// chop that; instead, chop a temp string and pool the words separately. +static struct dyString *tmp = NULL; +if (tmp == NULL) + tmp = dyStringNew(0); +dyStringClear(tmp); +dyStringAppend(tmp, filterStr); +record->filterCount = countChars(filterStr, ';') + 1; +record->filters = vcfFileAlloc(vcff, record->filterCount * sizeof(char **)); +(void)chopByChar(tmp->string, ';', record->filters, record->filterCount); +int i; +for (i = 0; i < record->filterCount; i++) + record->filters[i] = vcfFilePooledStr(vcff, record->filters[i]); +} + struct vcfInfoDef *vcfInfoDefForKey(struct vcfFile *vcff, const char *key) /* Return infoDef for key, or NULL if it wasn't specified in the header or VCF spec. */ { struct vcfInfoDef *def; // I expect there to be fairly few definitions (less than a dozen) so // I'm just doing a linear search not hash: for (def = vcff->infoDefs; def != NULL; def = def->next) { if (sameString(key, def->key)) return def; } for (def = vcfSpecInfoDefs; def != NULL; def = def->next) { if (sameString(key, def->key)) return def; @@ -551,32 +570,32 @@ char *words[VCF_MAX_COLUMNS]; int wordCount; while ((wordCount = lineFileChop(vcff->lf, words)) > 0) { lineFileExpectWords(vcff->lf, expected, wordCount); struct vcfRecord *record; AllocVar(record); record->file = vcff; record->chrom = vcfFilePooledStr(vcff, words[0]); record->chromStart = lineFileNeedNum(vcff->lf, words, 1) - 1; // chromEnd may be modified by parseInfoColumn, if INFO column includes END. record->chromEnd = record->chromStart + 1; record->name = vcfFilePooledStr(vcff, words[2]); record->ref = vcfFilePooledStr(vcff, words[3]); record->alt = vcfFilePooledStr(vcff, words[4]); - record->qual = atof(words[5]); //#*** qual can be "." so we need to represent that - record->filter = vcfFilePooledStr(vcff, words[6]); + record->qual = vcfFilePooledStr(vcff, words[5]); + parseFilterColumn(vcff, record, words[6]); parseInfoColumn(vcff, record, words[7]); if (vcff->genotypeCount > 0) { record->format = vcfFilePooledStr(vcff, words[8]); record->genotypeUnparsedStrings = vcfFileAlloc(vcff, vcff->genotypeCount * sizeof(char *)); int i; // Don't bother actually parsing all these until & unless we need the info: for (i = 0; i < vcff->genotypeCount; i++) record->genotypeUnparsedStrings[i] = vcfFileCloneStr(vcff, words[9+i]); } slAddHead(&(vcff->records), record); } slReverse(&(vcff->records)); lineFileClose(&(vcff->lf));