a3e03473bbde188141b61d0c3db48c85d2ec6aa3
chmalee
  Tue Sep 22 17:38:13 2020 -0700
Basic vcf to bed converter, with a -fields argument for what VCF INFO tags to include in the bigBed itself, refs #25010

diff --git src/inc/vcf.h src/inc/vcf.h
index 7450ad6..bad2d5d 100644
--- src/inc/vcf.h
+++ src/inc/vcf.h
@@ -334,16 +334,29 @@
 /* Allocate memory for a string from vcff's shared string pool. */
 
 #define VCF_NUM_COLS 10
 
 struct asObject *vcfAsObj();
 // Return asObject describing fields of VCF
 
 char *vcfGetSlashSepAllelesFromWords(char **words, struct dyString *dy);
 /* Overwrite dy with a /-separated allele string from VCF words,
  * skipping the extra initial base that VCF requires for indel alleles if necessary.
  * Return dy->string for convenience. */
 
 void vcfRecordWriteNoGt(FILE *f, struct vcfRecord *rec);
 /* Write the first 8 columns of VCF rec to f.  Genotype data will be ignored if present. */
 
+// Characters we expect to see in |-separated parts of an ##INFO description that specifies
+// tabular contents:
+#define COL_DESC_WORD_REGEX "[A-Za-z_0-9.-]+"
+// Series of |-separated words:
+#define COL_DESC_REGEX COL_DESC_WORD_REGEX"(\\|"COL_DESC_WORD_REGEX")+"
+
+// Minimum number of |-separated values for interpreting descriptions and values as tabular:
+#define MIN_COLUMN_COUNT 3
+
+boolean looksTabular(const struct vcfInfoDef *def, const struct vcfInfoElement *el);
+/* Return TRUE if def->description seems to contain a |-separated description of columns
+ * and el's first non-empty string value has the same number of |-separated parts. */
+
 #endif // vcf_h