a3e03473bbde188141b61d0c3db48c85d2ec6aa3 chmalee Tue Sep 22 17:38:13 2020 -0700 Basic vcf to bed converter, with a -fields argument for what VCF INFO tags to include in the bigBed itself, refs #25010 diff --git src/inc/vcf.h src/inc/vcf.h index 7450ad6..bad2d5d 100644 --- src/inc/vcf.h +++ src/inc/vcf.h @@ -334,16 +334,29 @@ /* Allocate memory for a string from vcff's shared string pool. */ #define VCF_NUM_COLS 10 struct asObject *vcfAsObj(); // Return asObject describing fields of VCF char *vcfGetSlashSepAllelesFromWords(char **words, struct dyString *dy); /* Overwrite dy with a /-separated allele string from VCF words, * skipping the extra initial base that VCF requires for indel alleles if necessary. * Return dy->string for convenience. */ void vcfRecordWriteNoGt(FILE *f, struct vcfRecord *rec); /* Write the first 8 columns of VCF rec to f. Genotype data will be ignored if present. */ +// Characters we expect to see in |-separated parts of an ##INFO description that specifies +// tabular contents: +#define COL_DESC_WORD_REGEX "[A-Za-z_0-9.-]+" +// Series of |-separated words: +#define COL_DESC_REGEX COL_DESC_WORD_REGEX"(\\|"COL_DESC_WORD_REGEX")+" + +// Minimum number of |-separated values for interpreting descriptions and values as tabular: +#define MIN_COLUMN_COUNT 3 + +boolean looksTabular(const struct vcfInfoDef *def, const struct vcfInfoElement *el); +/* Return TRUE if def->description seems to contain a |-separated description of columns + * and el's first non-empty string value has the same number of |-separated parts. */ + #endif // vcf_h