3448d26e6e6e90cf35747019e2e036ef260362d4
angie
  Fri Apr 3 13:12:00 2015 -0700
When VCF ##INFO description and data seem to have the same number of
|-separated columns (and at least 3 apparent columns), display them
as separate tables below the big table of INFO tags and settings
because they can be quite large; for example, VEP annotations may
have >40 |-separated columns for each alt allele & transcript.
refs #14379

diff --git src/hg/hgc/vcfClick.c src/hg/hgc/vcfClick.c
index c1d23ac..51dce08 100644
--- src/hg/hgc/vcfClick.c
+++ src/hg/hgc/vcfClick.c
@@ -86,66 +86,175 @@
 {
 if (rec->filterCount == 0 || sameString(rec->filters[0], "."))
     printf("<B>Filter:</B> "NA"<BR>\n");
 else if (rec->filterCount == 1 && sameString(rec->filters[0], "PASS"))
     printf("<B>Filter:</B> PASS<BR>\n");
 else
     {
     printf("<B>Filter failures:</B> ");
     printf("<font style='font-weight: bold; color: #FF0000;'>\n");
     struct vcfFile *vcff = rec->file;
     printKeysWithDescriptions(vcff, rec->filterCount, rec->filters, vcff->filterDefs, FALSE);
     printf("</font>\n");
     }
 }
 
+// Characters we expect to see in |-separated parts of an ##INFO description that specifies
+// tabular contents:
+#define COL_DESC_WORD_REGEX "[A-Za-z_0-9.-]+"
+// Series of |-separated words:
+#define COL_DESC_REGEX COL_DESC_WORD_REGEX"(\\|"COL_DESC_WORD_REGEX")+"
+
+// Minimum number of |-separated values for interpreting descriptions and values as tabular:
+#define MIN_COLUMN_COUNT 3
+
+static boolean looksTabular(const struct vcfInfoDef *def, struct vcfInfoElement *el)
+/* Return TRUE if def->description seems to contain a |-separated description of columns
+ * and el's first non-empty string value has the same number of |-separated parts. */
+{
+if (!def || def->type != vcfInfoString || isEmpty(def->description))
+    return FALSE;
+if (regexMatch(def->description, COL_DESC_REGEX))
+    {
+    int descColCount = countChars(def->description, '|') + 1;
+    if (descColCount >= MIN_COLUMN_COUNT)
+        {
+        int j;
+        for (j = 0;  j < el->count;  j++)
+            {
+            char *val = el->values[j].datString;
+            if (isEmpty(val))
+                continue;
+            int elColCount = countChars(val, '|') + 1;
+            if (elColCount == descColCount)
+                return TRUE;
+            }
+        }
+    }
+return FALSE;
+}
+
+static void printTabularHeaderRow(const struct vcfInfoDef *def)
+/* Parse the column header parts out of def->description and print as table header row;
+ * call this only when looksTabular returns TRUE. */
+{
+regmatch_t substrArr[PATH_LEN];
+if (regexMatchSubstr(def->description, COL_DESC_REGEX, substrArr, ArraySize(substrArr)))
+    {
+    puts("<TR>");
+    // Make a copy of the part of def->description that matches the regex,
+    // then chop by '|' and print out header column tags:
+    int matchSize = substrArr[0].rm_eo - substrArr[0].rm_so;
+    char copy[matchSize+1];
+    safencpy(copy, sizeof(copy), def->description + substrArr[0].rm_so, matchSize);
+    // Turn '_' into ' ' so description words can wrap inside headers, saving some space
+    subChar(copy, '_', ' ');
+    char *words[PATH_LEN];
+    int descColCount = chopByChar(copy, '|', words, ArraySize(words));
+    int i;
+    for (i = 0;  i < descColCount; i++)
+        printf("<TH class='withThinBorder'>%s</TH>", words[i]);
+    puts("</TR>");
+    }
+else
+    errAbort("printTabularHeaderRow: code bug, if looksTabular returns true then "
+             "regex should work here");
+}
+
+static void printTabularData(struct vcfInfoElement *el)
+/* Print a row for each value in el, separating columns by '|'. */
+{
+int j;
+for (j = 0;  j < el->count;  j++)
+    {
+    puts("<TR>");
+    char *val = el->values[j].datString;
+    if (!isEmpty(val))
+        {
+        int len = strlen(val);
+        char copy[len+1];
+        safencpy(copy, sizeof(copy), val, len);
+        char *words[PATH_LEN];
+        int colCount = chopByChar(copy, '|', words, ArraySize(words));
+        int k;
+        for (k = 0;  k < colCount;  k++)
+            printf("<TD class='withThinBorder'>%s</TD>", words[k]);
+        }
+    puts("</TR>");
+    }
+}
+
+
 static void vcfInfoDetails(struct vcfRecord *rec)
 /* Expand info keys to descriptions, then print out keys and values. */
 {
 if (rec->infoCount == 0)
     return;
 struct vcfFile *vcff = rec->file;
 puts("<B>INFO column annotations:</B><BR>");
 puts("<TABLE border=0 cellspacing=0 cellpadding=2>");
 int i;
 for (i = 0;  i < rec->infoCount;  i++)
     {
     struct vcfInfoElement *el = &(rec->infoElements[i]);
     const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key);
-    printf("<TR valign='top'><TD align=\"right\"><B>%s:</B></TD><TD style=width:15%%;'>",
+    printf("<TR valign='top'><TD align=\"right\"><B>%s:</B></TD><TD>",
            el->key);
     int j;
     enum vcfInfoType type = def ? def->type : vcfInfoString;
     if (type == vcfInfoFlag && el->count == 0)
 	printf("Yes"); // no values, so we can't call vcfPrintDatum...
     // However, if this is older VCF, type vcfInfoFlag might have a value.
+    if (looksTabular(def, el))
+        {
+        // Make a special display below
+        printf("<em>see below</em>");
+        }
+    else
+        {
         for (j = 0;  j < el->count;  j++)
             {
             if (j > 0)
                 printf(", ");
             if (el->missingData[j])
                 printf(".");
             else
                 vcfPrintDatum(stdout, el->values[j], type);
             }
+        }
     if (def != NULL)
 	printf("&nbsp;&nbsp;</TD><TD>%s", def->description);
     else
 	printf("</TD><TD>");
     printf("</TD></TR>\n");
     }
 puts("</TABLE>");
+// Now show the tabular fields, if any
+for (i = 0;  i < rec->infoCount;  i++)
+    {
+    struct vcfInfoElement *el = &(rec->infoElements[i]);
+    const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key);
+    if (looksTabular(def, el))
+        {
+        puts("<BR>");
+        printf("<B>%s</B>: %s<BR>\n", el->key, def->description);
+        puts("<TABLE class='withThinBorder'>");
+        printTabularHeaderRow(def);
+        printTabularData(el);
+        puts("</TABLE>");
+        }
+    }
 }
 
 static void vcfGenotypeTable(struct vcfRecord *rec, char *track, char **displayAls)
 /* Put the table containing details about each genotype into a collapsible section. */
 {
 static struct dyString *tmp1 = NULL;
 if (tmp1 == NULL)
     tmp1 = dyStringNew(0);
 jsBeginCollapsibleSection(cart, track, "genotypes", "Detailed genotypes", FALSE);
 dyStringClear(tmp1);
 dyStringAppend(tmp1, rec->format);
 struct vcfFile *vcff = rec->file;
 enum vcfInfoType formatTypes[256];
 char *formatKeys[256];
 int formatCount = chopString(tmp1->string, ":", formatKeys, ArraySize(formatKeys));