3448d26e6e6e90cf35747019e2e036ef260362d4 angie Fri Apr 3 13:12:00 2015 -0700 When VCF ##INFO description and data seem to have the same number of |-separated columns (and at least 3 apparent columns), display them as separate tables below the big table of INFO tags and settings because they can be quite large; for example, VEP annotations may have >40 |-separated columns for each alt allele & transcript. refs #14379 diff --git src/hg/hgc/vcfClick.c src/hg/hgc/vcfClick.c index c1d23ac..51dce08 100644 --- src/hg/hgc/vcfClick.c +++ src/hg/hgc/vcfClick.c @@ -86,66 +86,175 @@ { if (rec->filterCount == 0 || sameString(rec->filters[0], ".")) printf("<B>Filter:</B> "NA"<BR>\n"); else if (rec->filterCount == 1 && sameString(rec->filters[0], "PASS")) printf("<B>Filter:</B> PASS<BR>\n"); else { printf("<B>Filter failures:</B> "); printf("<font style='font-weight: bold; color: #FF0000;'>\n"); struct vcfFile *vcff = rec->file; printKeysWithDescriptions(vcff, rec->filterCount, rec->filters, vcff->filterDefs, FALSE); printf("</font>\n"); } } +// Characters we expect to see in |-separated parts of an ##INFO description that specifies +// tabular contents: +#define COL_DESC_WORD_REGEX "[A-Za-z_0-9.-]+" +// Series of |-separated words: +#define COL_DESC_REGEX COL_DESC_WORD_REGEX"(\\|"COL_DESC_WORD_REGEX")+" + +// Minimum number of |-separated values for interpreting descriptions and values as tabular: +#define MIN_COLUMN_COUNT 3 + +static boolean looksTabular(const struct vcfInfoDef *def, struct vcfInfoElement *el) +/* Return TRUE if def->description seems to contain a |-separated description of columns + * and el's first non-empty string value has the same number of |-separated parts. */ +{ +if (!def || def->type != vcfInfoString || isEmpty(def->description)) + return FALSE; +if (regexMatch(def->description, COL_DESC_REGEX)) + { + int descColCount = countChars(def->description, '|') + 1; + if (descColCount >= MIN_COLUMN_COUNT) + { + int j; + for (j = 0; j < el->count; j++) + { + char *val = el->values[j].datString; + if (isEmpty(val)) + continue; + int elColCount = countChars(val, '|') + 1; + if (elColCount == descColCount) + return TRUE; + } + } + } +return FALSE; +} + +static void printTabularHeaderRow(const struct vcfInfoDef *def) +/* Parse the column header parts out of def->description and print as table header row; + * call this only when looksTabular returns TRUE. */ +{ +regmatch_t substrArr[PATH_LEN]; +if (regexMatchSubstr(def->description, COL_DESC_REGEX, substrArr, ArraySize(substrArr))) + { + puts("<TR>"); + // Make a copy of the part of def->description that matches the regex, + // then chop by '|' and print out header column tags: + int matchSize = substrArr[0].rm_eo - substrArr[0].rm_so; + char copy[matchSize+1]; + safencpy(copy, sizeof(copy), def->description + substrArr[0].rm_so, matchSize); + // Turn '_' into ' ' so description words can wrap inside headers, saving some space + subChar(copy, '_', ' '); + char *words[PATH_LEN]; + int descColCount = chopByChar(copy, '|', words, ArraySize(words)); + int i; + for (i = 0; i < descColCount; i++) + printf("<TH class='withThinBorder'>%s</TH>", words[i]); + puts("</TR>"); + } +else + errAbort("printTabularHeaderRow: code bug, if looksTabular returns true then " + "regex should work here"); +} + +static void printTabularData(struct vcfInfoElement *el) +/* Print a row for each value in el, separating columns by '|'. */ +{ +int j; +for (j = 0; j < el->count; j++) + { + puts("<TR>"); + char *val = el->values[j].datString; + if (!isEmpty(val)) + { + int len = strlen(val); + char copy[len+1]; + safencpy(copy, sizeof(copy), val, len); + char *words[PATH_LEN]; + int colCount = chopByChar(copy, '|', words, ArraySize(words)); + int k; + for (k = 0; k < colCount; k++) + printf("<TD class='withThinBorder'>%s</TD>", words[k]); + } + puts("</TR>"); + } +} + + static void vcfInfoDetails(struct vcfRecord *rec) /* Expand info keys to descriptions, then print out keys and values. */ { if (rec->infoCount == 0) return; struct vcfFile *vcff = rec->file; puts("<B>INFO column annotations:</B><BR>"); puts("<TABLE border=0 cellspacing=0 cellpadding=2>"); int i; for (i = 0; i < rec->infoCount; i++) { struct vcfInfoElement *el = &(rec->infoElements[i]); const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key); - printf("<TR valign='top'><TD align=\"right\"><B>%s:</B></TD><TD style=width:15%%;'>", + printf("<TR valign='top'><TD align=\"right\"><B>%s:</B></TD><TD>", el->key); int j; enum vcfInfoType type = def ? def->type : vcfInfoString; if (type == vcfInfoFlag && el->count == 0) printf("Yes"); // no values, so we can't call vcfPrintDatum... // However, if this is older VCF, type vcfInfoFlag might have a value. + if (looksTabular(def, el)) + { + // Make a special display below + printf("<em>see below</em>"); + } + else + { for (j = 0; j < el->count; j++) { if (j > 0) printf(", "); if (el->missingData[j]) printf("."); else vcfPrintDatum(stdout, el->values[j], type); } + } if (def != NULL) printf(" </TD><TD>%s", def->description); else printf("</TD><TD>"); printf("</TD></TR>\n"); } puts("</TABLE>"); +// Now show the tabular fields, if any +for (i = 0; i < rec->infoCount; i++) + { + struct vcfInfoElement *el = &(rec->infoElements[i]); + const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key); + if (looksTabular(def, el)) + { + puts("<BR>"); + printf("<B>%s</B>: %s<BR>\n", el->key, def->description); + puts("<TABLE class='withThinBorder'>"); + printTabularHeaderRow(def); + printTabularData(el); + puts("</TABLE>"); + } + } } static void vcfGenotypeTable(struct vcfRecord *rec, char *track, char **displayAls) /* Put the table containing details about each genotype into a collapsible section. */ { static struct dyString *tmp1 = NULL; if (tmp1 == NULL) tmp1 = dyStringNew(0); jsBeginCollapsibleSection(cart, track, "genotypes", "Detailed genotypes", FALSE); dyStringClear(tmp1); dyStringAppend(tmp1, rec->format); struct vcfFile *vcff = rec->file; enum vcfInfoType formatTypes[256]; char *formatKeys[256]; int formatCount = chopString(tmp1->string, ":", formatKeys, ArraySize(formatKeys));