a3e03473bbde188141b61d0c3db48c85d2ec6aa3 chmalee Tue Sep 22 17:38:13 2020 -0700 Basic vcf to bed converter, with a -fields argument for what VCF INFO tags to include in the bigBed itself, refs #25010 diff --git src/hg/hgc/vcfClick.c src/hg/hgc/vcfClick.c index a7ad5f3..1fb648f 100644 --- src/hg/hgc/vcfClick.c +++ src/hg/hgc/vcfClick.c @@ -87,64 +87,31 @@ { if (rec->filterCount == 0 || sameString(rec->filters[0], ".")) printf("<B>Filter:</B> "NA"<BR>\n"); else if (rec->filterCount == 1 && sameString(rec->filters[0], "PASS")) printf("<B>Filter:</B> PASS<BR>\n"); else { printf("<B>Filter failures:</B> "); printf("<font style='font-weight: bold; color: #FF0000;'>\n"); struct vcfFile *vcff = rec->file; printKeysWithDescriptions(vcff, rec->filterCount, rec->filters, vcff->filterDefs, FALSE); printf("</font>\n"); } } -// Characters we expect to see in |-separated parts of an ##INFO description that specifies -// tabular contents: -#define COL_DESC_WORD_REGEX "[A-Za-z_0-9.-]+" -// Series of |-separated words: -#define COL_DESC_REGEX COL_DESC_WORD_REGEX"(\\|"COL_DESC_WORD_REGEX")+" -// Minimum number of |-separated values for interpreting descriptions and values as tabular: -#define MIN_COLUMN_COUNT 3 - -static boolean looksTabular(const struct vcfInfoDef *def, struct vcfInfoElement *el) -/* Return TRUE if def->description seems to contain a |-separated description of columns - * and el's first non-empty string value has the same number of |-separated parts. */ -{ -if (!def || def->type != vcfInfoString || isEmpty(def->description)) - return FALSE; -if (regexMatch(def->description, COL_DESC_REGEX)) - { - int descColCount = countChars(def->description, '|') + 1; - if (descColCount >= MIN_COLUMN_COUNT) - { - int j; - for (j = 0; j < el->count; j++) - { - char *val = el->values[j].datString; - if (isEmpty(val)) - continue; - int elColCount = countChars(val, '|') + 1; - if (elColCount == descColCount) - return TRUE; - } - } - } -return FALSE; -} static void printTabularHeaderRow(const struct vcfInfoDef *def) /* Parse the column header parts out of def->description and print as table header row; * call this only when looksTabular returns TRUE. */ { regmatch_t substrArr[PATH_LEN]; if (regexMatchSubstr(def->description, COL_DESC_REGEX, substrArr, ArraySize(substrArr))) { puts("<TR>"); // Make a copy of the part of def->description that matches the regex, // then chop by '|' and print out header column tags: int matchSize = substrArr[0].rm_eo - substrArr[0].rm_so; char copy[matchSize+1]; safencpy(copy, sizeof(copy), def->description + substrArr[0].rm_so, matchSize); // Turn '_' into ' ' so description words can wrap inside headers, saving some space