a3e03473bbde188141b61d0c3db48c85d2ec6aa3 chmalee Tue Sep 22 17:38:13 2020 -0700 Basic vcf to bed converter, with a -fields argument for what VCF INFO tags to include in the bigBed itself, refs #25010 diff --git src/lib/vcf.c src/lib/vcf.c index 5b17054..b9df010 100644 --- src/lib/vcf.c +++ src/lib/vcf.c @@ -1727,15 +1727,41 @@ void vcfRecordWriteNoGt(FILE *f, struct vcfRecord *rec) /* Write the first 8 columns of VCF rec to f. Genotype data will be ignored if present. */ { fprintf(f, "%s\t%d\t%s\t%s\t", rec->chrom, rec->chromStart+1, rec->name, rec->alleles[0]); // Alternate alleles start at [1] vcfWriteWordArrayWithSep(f, rec->alleleCount-1, &(rec->alleles[1]), ','); fputc('\t', f); fputs(rec->qual, f); fputc('\t', f); vcfWriteWordArrayWithSep(f, rec->filterCount, rec->filters, ';'); fputc('\t', f); vcfWriteInfo(f, rec); fputc('\n', f); } + +boolean looksTabular(const struct vcfInfoDef *def, const struct vcfInfoElement *el) +/* Return TRUE if def->description seems to contain a |-separated description of columns + * and el's first non-empty string value has the same number of |-separated parts. */ +{ +if (!def || def->type != vcfInfoString || isEmpty(def->description)) + return FALSE; +if (regexMatch(def->description, COL_DESC_REGEX)) + { + int descColCount = countChars(def->description, '|') + 1; + if (descColCount >= MIN_COLUMN_COUNT) + { + int j; + for (j = 0; j < el->count; j++) + { + char *val = el->values[j].datString; + if (isEmpty(val)) + continue; + int elColCount = countChars(val, '|') + 1; + if (elColCount == descColCount) + return TRUE; + } + } + } +return FALSE; +}