3448d26e6e6e90cf35747019e2e036ef260362d4 angie Fri Apr 3 13:12:00 2015 -0700 When VCF ##INFO description and data seem to have the same number of |-separated columns (and at least 3 apparent columns), display them as separate tables below the big table of INFO tags and settings because they can be quite large; for example, VEP annotations may have >40 |-separated columns for each alt allele & transcript. refs #14379 diff --git src/hg/hgc/vcfClick.c src/hg/hgc/vcfClick.c index c1d23ac..51dce08 100644 --- src/hg/hgc/vcfClick.c +++ src/hg/hgc/vcfClick.c @@ -86,66 +86,175 @@ { if (rec->filterCount == 0 || sameString(rec->filters[0], ".")) printf("Filter: "NA"
\n"); else if (rec->filterCount == 1 && sameString(rec->filters[0], "PASS")) printf("Filter: PASS
\n"); else { printf("Filter failures: "); printf("\n"); struct vcfFile *vcff = rec->file; printKeysWithDescriptions(vcff, rec->filterCount, rec->filters, vcff->filterDefs, FALSE); printf("\n"); } } +// Characters we expect to see in |-separated parts of an ##INFO description that specifies +// tabular contents: +#define COL_DESC_WORD_REGEX "[A-Za-z_0-9.-]+" +// Series of |-separated words: +#define COL_DESC_REGEX COL_DESC_WORD_REGEX"(\\|"COL_DESC_WORD_REGEX")+" + +// Minimum number of |-separated values for interpreting descriptions and values as tabular: +#define MIN_COLUMN_COUNT 3 + +static boolean looksTabular(const struct vcfInfoDef *def, struct vcfInfoElement *el) +/* Return TRUE if def->description seems to contain a |-separated description of columns + * and el's first non-empty string value has the same number of |-separated parts. */ +{ +if (!def || def->type != vcfInfoString || isEmpty(def->description)) + return FALSE; +if (regexMatch(def->description, COL_DESC_REGEX)) + { + int descColCount = countChars(def->description, '|') + 1; + if (descColCount >= MIN_COLUMN_COUNT) + { + int j; + for (j = 0; j < el->count; j++) + { + char *val = el->values[j].datString; + if (isEmpty(val)) + continue; + int elColCount = countChars(val, '|') + 1; + if (elColCount == descColCount) + return TRUE; + } + } + } +return FALSE; +} + +static void printTabularHeaderRow(const struct vcfInfoDef *def) +/* Parse the column header parts out of def->description and print as table header row; + * call this only when looksTabular returns TRUE. */ +{ +regmatch_t substrArr[PATH_LEN]; +if (regexMatchSubstr(def->description, COL_DESC_REGEX, substrArr, ArraySize(substrArr))) + { + puts(""); + // Make a copy of the part of def->description that matches the regex, + // then chop by '|' and print out header column tags: + int matchSize = substrArr[0].rm_eo - substrArr[0].rm_so; + char copy[matchSize+1]; + safencpy(copy, sizeof(copy), def->description + substrArr[0].rm_so, matchSize); + // Turn '_' into ' ' so description words can wrap inside headers, saving some space + subChar(copy, '_', ' '); + char *words[PATH_LEN]; + int descColCount = chopByChar(copy, '|', words, ArraySize(words)); + int i; + for (i = 0; i < descColCount; i++) + printf("%s", words[i]); + puts(""); + } +else + errAbort("printTabularHeaderRow: code bug, if looksTabular returns true then " + "regex should work here"); +} + +static void printTabularData(struct vcfInfoElement *el) +/* Print a row for each value in el, separating columns by '|'. */ +{ +int j; +for (j = 0; j < el->count; j++) + { + puts(""); + char *val = el->values[j].datString; + if (!isEmpty(val)) + { + int len = strlen(val); + char copy[len+1]; + safencpy(copy, sizeof(copy), val, len); + char *words[PATH_LEN]; + int colCount = chopByChar(copy, '|', words, ArraySize(words)); + int k; + for (k = 0; k < colCount; k++) + printf("%s", words[k]); + } + puts(""); + } +} + + static void vcfInfoDetails(struct vcfRecord *rec) /* Expand info keys to descriptions, then print out keys and values. */ { if (rec->infoCount == 0) return; struct vcfFile *vcff = rec->file; puts("INFO column annotations:
"); puts(""); int i; for (i = 0; i < rec->infoCount; i++) { struct vcfInfoElement *el = &(rec->infoElements[i]); const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key); - printf("\n"); } puts("
%s:", + printf("
%s:", el->key); int j; enum vcfInfoType type = def ? def->type : vcfInfoString; if (type == vcfInfoFlag && el->count == 0) printf("Yes"); // no values, so we can't call vcfPrintDatum... // However, if this is older VCF, type vcfInfoFlag might have a value. + if (looksTabular(def, el)) + { + // Make a special display below + printf("see below"); + } + else + { for (j = 0; j < el->count; j++) { if (j > 0) printf(", "); if (el->missingData[j]) printf("."); else vcfPrintDatum(stdout, el->values[j], type); } + } if (def != NULL) printf("  %s", def->description); else printf(""); printf("
"); +// Now show the tabular fields, if any +for (i = 0; i < rec->infoCount; i++) + { + struct vcfInfoElement *el = &(rec->infoElements[i]); + const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key); + if (looksTabular(def, el)) + { + puts("
"); + printf("%s: %s
\n", el->key, def->description); + puts(""); + printTabularHeaderRow(def); + printTabularData(el); + puts("
"); + } + } } static void vcfGenotypeTable(struct vcfRecord *rec, char *track, char **displayAls) /* Put the table containing details about each genotype into a collapsible section. */ { static struct dyString *tmp1 = NULL; if (tmp1 == NULL) tmp1 = dyStringNew(0); jsBeginCollapsibleSection(cart, track, "genotypes", "Detailed genotypes", FALSE); dyStringClear(tmp1); dyStringAppend(tmp1, rec->format); struct vcfFile *vcff = rec->file; enum vcfInfoType formatTypes[256]; char *formatKeys[256]; int formatCount = chopString(tmp1->string, ":", formatKeys, ArraySize(formatKeys));