src/lib/vcf.c 4cab7f311cadce7428c1d7e1c4dfdcb0d3a43033

4cab7f311cadce7428c1d7e1c4dfdcb0d3a43033
angie
  Wed Jul 27 10:32:19 2011 -0700
Feature #3710 (vcfTabix UI options): Added controls for selection of centervariant for haplotype clustering/sorting.  Also, some hgc improvements:
made the Genotype Details table into a collapsible section; better display
of QUAL and FILTER column values, which also involved improving the
representation of those columns in src/inc/vcf.h.

diff --git src/lib/vcf.c src/lib/vcf.c
index 65bc467..75300f1 100644
--- src/lib/vcf.c
+++ src/lib/vcf.c
@@ -414,30 +414,49 @@
 if (vcff->majorVersion == 0)
     vcfFileErr(vcff, "missing ##fileformat= header line?  Assuming 4.1.");
 if ((vcff->majorVersion != 4 || (vcff->minorVersion != 0 && vcff->minorVersion != 1)) &&
     (vcff->majorVersion != 3))
     vcfFileErr(vcff, "VCFv%d.%d not supported -- only v3.*, v4.0 or v4.1",
 	       vcff->majorVersion, vcff->minorVersion);
 // Next, one header line beginning with single "#" that names the columns:
 if (line == NULL)
     // EOF after metadata
     return vcff;
 parseColumnHeaderRow(vcff, line);
 return vcff;
 }
 
 
+static void parseFilterColumn(struct vcfFile *vcff, struct vcfRecord *record, char *filterStr)
+/* Transform ;-separated filter codes into count + string array. */
+{
+// We don't want to modify something allocated with vcfFilePooledStr because that uses
+// hash element names for storage!  So don't make a vcfFilePooledStr copy of filterStr and
+// chop that; instead, chop a temp string and pool the words separately.
+static struct dyString *tmp = NULL;
+if (tmp == NULL)
+    tmp = dyStringNew(0);
+dyStringClear(tmp);
+dyStringAppend(tmp, filterStr);
+record->filterCount = countChars(filterStr, ';') + 1;
+record->filters = vcfFileAlloc(vcff, record->filterCount * sizeof(char **));
+(void)chopByChar(tmp->string, ';', record->filters, record->filterCount);
+int i;
+for (i = 0;  i < record->filterCount;  i++)
+    record->filters[i] = vcfFilePooledStr(vcff, record->filters[i]);
+}
+
 struct vcfInfoDef *vcfInfoDefForKey(struct vcfFile *vcff, const char *key)
 /* Return infoDef for key, or NULL if it wasn't specified in the header or VCF spec. */
 {
 struct vcfInfoDef *def;
 // I expect there to be fairly few definitions (less than a dozen) so
 // I'm just doing a linear search not hash:
 for (def = vcff->infoDefs;  def != NULL;  def = def->next)
     {
     if (sameString(key, def->key))
 	return def;
     }
 for (def = vcfSpecInfoDefs;  def != NULL;  def = def->next)
     {
     if (sameString(key, def->key))
 	return def;
@@ -551,32 +570,32 @@
 char *words[VCF_MAX_COLUMNS];
 int wordCount;
 while ((wordCount = lineFileChop(vcff->lf, words)) > 0)
     {
     lineFileExpectWords(vcff->lf, expected, wordCount);
     struct vcfRecord *record;
     AllocVar(record);
     record->file = vcff;
     record->chrom = vcfFilePooledStr(vcff, words[0]);
     record->chromStart = lineFileNeedNum(vcff->lf, words, 1) - 1;
     // chromEnd may be modified by parseInfoColumn, if INFO column includes END.
     record->chromEnd = record->chromStart + 1;
     record->name = vcfFilePooledStr(vcff, words[2]);
     record->ref = vcfFilePooledStr(vcff, words[3]);
     record->alt = vcfFilePooledStr(vcff, words[4]);
-    record->qual = atof(words[5]); //#*** qual can be "." so we need to represent that
-    record->filter = vcfFilePooledStr(vcff, words[6]);
+    record->qual = vcfFilePooledStr(vcff, words[5]);
+    parseFilterColumn(vcff, record, words[6]);
     parseInfoColumn(vcff, record, words[7]);
     if (vcff->genotypeCount > 0)
 	{
 	record->format = vcfFilePooledStr(vcff, words[8]);
 	record->genotypeUnparsedStrings = vcfFileAlloc(vcff,
 						       vcff->genotypeCount * sizeof(char *));
 	int i;
 	// Don't bother actually parsing all these until & unless we need the info:
 	for (i = 0;  i < vcff->genotypeCount;  i++)
 	    record->genotypeUnparsedStrings[i] = vcfFileCloneStr(vcff, words[9+i]);
 	}
     slAddHead(&(vcff->records), record);
     }
 slReverse(&(vcff->records));
 lineFileClose(&(vcff->lf));