e0989a0ac7819024d67e99a2a7a02993bf4f3aa9
angie
  Mon Dec 12 11:14:07 2011 -0800
Feature #3707 (VCF in hgTables): Brooke pointed out that the headerline that shows the selected fields should appear just before the data
rows (note 25).  I had put it up top to invalidate the VCF header, but
that was silly.  Instead, put an informational comment up top explaining
that the output is not valid VCF, and put the selected-columns header
line where it belongs.

diff --git src/hg/hgTables/vcf.c src/hg/hgTables/vcf.c
index 511523e..83c3ce2 100644
--- src/hg/hgTables/vcf.c
+++ src/hg/hgTables/vcf.c
@@ -203,41 +203,34 @@
     {
     /* if we know the field for identifiers, save it away */
     if ((idField != NULL) && sameString(idField, bb->name))
 	idFieldNum = i;
     hashAddInt(fieldHash, bb->name, i);
     }
 
 /* Create an array of column indexes corresponding to the selected field list. */
 int *columnArray;
 AllocArray(columnArray, fieldCount);
 for (i=0; i<fieldCount; ++i)
     {
     columnArray[i] = hashIntVal(fieldHash, fieldArray[i]);
     }
 
-/* Output row of labels if we are outputting only selected columns.
- * We will include original VCF header below, and adding a comment line
- * at the top invalidates the VCF. */
+// If we are outputting a subset of fields, invalidate the VCF header.
 boolean allFields = (fieldCount == VCFDATALINE_NUM_COLS);
 if (!allFields)
-    {
-    fprintf(f, "#%s", fieldArray[0]);
-    for (i=1; i<fieldCount; ++i)
-	fprintf(f, "\t%s", fieldArray[i]);
-    fprintf(f, "\n");
-    }
+    fprintf(f, "# Only selected columns are included below; output is not valid VCF.\n");
 
 struct asObject *as = vcfAsObj();
 struct asFilter *filter = NULL;
 if (anyFilter())
     filter = asFilterFromCart(cart, db, table, as);
 
 /* Loop through outputting each region */
 struct region *region, *regionList = getRegions();
 int maxOut = bigFileMaxOutput();
 // Include the header, absolutely necessary for VCF parsing.
 boolean printedHeader = FALSE;
 // Temporary storage for row-ification:
 struct dyString *dyAlt = newDyString(1024);
 struct dyString *dyFilter = newDyString(1024);
 struct dyString *dyInfo = newDyString(1024);
@@ -247,30 +240,37 @@
     {
     char *fileName = vcfFileName(table, conn, region->chrom);
     struct vcfFile *vcff = vcfTabixFileMayOpen(fileName, region->chrom, region->start, region->end,
 					       100, maxOut);
     if (vcff == NULL)
 	noWarnAbort();
     // If we are outputting all fields, but this VCF has no genotype info, omit the
     // genotype columns from output:
     if (allFields && vcff->genotypeCount == 0)
 	fieldCount = VCFDATALINE_NUM_COLS - 2;
     if (!printedHeader)
 	{
 	fprintf(f, "%s", vcff->headerString);
 	if (filter)
 	    fprintf(f, "# Filtering on %d columns\n", slCount(filter->columnList));
+	if (!allFields)
+	    {
+	    fprintf(f, "#%s", fieldArray[0]);
+	    for (i=1; i<fieldCount; ++i)
+		fprintf(f, "\t%s", fieldArray[i]);
+	    fprintf(f, "\n");
+	    }
 	printedHeader = TRUE;
 	}
     char *row[VCFDATALINE_NUM_COLS];
     char numBuf[VCF_NUM_BUF_SIZE];
     for (rec = vcff->records;  rec != NULL && (maxOut > 0);  rec = rec->next)
         {
 	vcfRecordToRow(rec, region->chrom, numBuf, dyAlt, dyFilter, dyInfo, dyGt, row);
 	if (asFilterOnRow(filter, row))
 	    {
 	    /* if we're looking for identifiers, check if this matches */
 	    if ((idHash != NULL) && (hashLookup(idHash, row[idFieldNum]) == NULL))
 		continue;
 	    // All fields output: after asFilter'ing, preserve original VCF chrom
 	    if (allFields && !sameString(rec->chrom, region->chrom))
 		row[0] = rec->chrom;