3448d26e6e6e90cf35747019e2e036ef260362d4
angie
Fri Apr 3 13:12:00 2015 -0700
When VCF ##INFO description and data seem to have the same number of
|-separated columns (and at least 3 apparent columns), display them
as separate tables below the big table of INFO tags and settings
because they can be quite large; for example, VEP annotations may
have >40 |-separated columns for each alt allele & transcript.
refs #14379
diff --git src/hg/hgc/vcfClick.c src/hg/hgc/vcfClick.c
index c1d23ac..51dce08 100644
--- src/hg/hgc/vcfClick.c
+++ src/hg/hgc/vcfClick.c
@@ -86,66 +86,175 @@
{
if (rec->filterCount == 0 || sameString(rec->filters[0], "."))
printf("Filter: "NA"
\n");
else if (rec->filterCount == 1 && sameString(rec->filters[0], "PASS"))
printf("Filter: PASS
\n");
else
{
printf("Filter failures: ");
printf("\n");
struct vcfFile *vcff = rec->file;
printKeysWithDescriptions(vcff, rec->filterCount, rec->filters, vcff->filterDefs, FALSE);
printf("\n");
}
}
+// Characters we expect to see in |-separated parts of an ##INFO description that specifies
+// tabular contents:
+#define COL_DESC_WORD_REGEX "[A-Za-z_0-9.-]+"
+// Series of |-separated words:
+#define COL_DESC_REGEX COL_DESC_WORD_REGEX"(\\|"COL_DESC_WORD_REGEX")+"
+
+// Minimum number of |-separated values for interpreting descriptions and values as tabular:
+#define MIN_COLUMN_COUNT 3
+
+static boolean looksTabular(const struct vcfInfoDef *def, struct vcfInfoElement *el)
+/* Return TRUE if def->description seems to contain a |-separated description of columns
+ * and el's first non-empty string value has the same number of |-separated parts. */
+{
+if (!def || def->type != vcfInfoString || isEmpty(def->description))
+ return FALSE;
+if (regexMatch(def->description, COL_DESC_REGEX))
+ {
+ int descColCount = countChars(def->description, '|') + 1;
+ if (descColCount >= MIN_COLUMN_COUNT)
+ {
+ int j;
+ for (j = 0; j < el->count; j++)
+ {
+ char *val = el->values[j].datString;
+ if (isEmpty(val))
+ continue;
+ int elColCount = countChars(val, '|') + 1;
+ if (elColCount == descColCount)
+ return TRUE;
+ }
+ }
+ }
+return FALSE;
+}
+
+static void printTabularHeaderRow(const struct vcfInfoDef *def)
+/* Parse the column header parts out of def->description and print as table header row;
+ * call this only when looksTabular returns TRUE. */
+{
+regmatch_t substrArr[PATH_LEN];
+if (regexMatchSubstr(def->description, COL_DESC_REGEX, substrArr, ArraySize(substrArr)))
+ {
+ puts("
%s: | ", + printf(" | ||
%s: | ", el->key); int j; enum vcfInfoType type = def ? def->type : vcfInfoString; if (type == vcfInfoFlag && el->count == 0) printf("Yes"); // no values, so we can't call vcfPrintDatum... // However, if this is older VCF, type vcfInfoFlag might have a value. + if (looksTabular(def, el)) + { + // Make a special display below + printf("see below"); + } + else + { for (j = 0; j < el->count; j++) { if (j > 0) printf(", "); if (el->missingData[j]) printf("."); else vcfPrintDatum(stdout, el->values[j], type); } + } if (def != NULL) printf(" | %s", def->description); else printf(" | "); printf(" |