3f6e034cd0844d8e99a69e56af72e8c4cce98a9c
chmalee
  Tue Jan 30 15:55:18 2024 -0800
Hash INFO keys during header parsing for faster lookups later when loading records

diff --git src/inc/vcf.h src/inc/vcf.h
index 2e5eb5d..e5f210e 100644
--- src/inc/vcf.h
+++ src/inc/vcf.h
@@ -87,30 +87,31 @@
     struct vcfFile *file;	// Pointer back to parent vcfFile
 };
 
 struct vcfFile
 /* Info extracted from a VCF file.  Manages all memory for contents.
  * Clearly borrowing structure from MarkD's gff3File. :) */
 {
     char *fileOrUrl;		// VCF local file path or URL
     char *headerString;		// Complete original header including newlines.
     int majorVersion;		// 4 etc.
     int minorVersion;		// 0, 1 etc.
     struct vcfInfoDef *infoDefs;	// Header's definitions of INFO column components
     struct vcfInfoDef *filterDefs;	// Header's definitions of FILTER column failure codes
     struct vcfInfoDef *altDefs;	// Header's defs of symbolic alternate alleles (e.g. DEL, INS)
     struct vcfInfoDef *gtFormatDefs;	// Header's defs of GENOTYPE compnts. listed in FORMAT col.
+    struct hash *infoDefHash; // Hash of all INFO keys, as there can be hundreds of them
     bool allPhased;         // True if all record->genotypes have been phased
     int genotypeCount;		// Number of optional genotype columns described in header
     char **genotypeIds;		// Array of optional genotype column names described in header
     struct vcfRecord *records;	// VCF data rows, sorted by position
     struct hash *byName;		// Hash records by name -- not populated until needed.
     struct hash *pool;		// Used to allocate string values that tend to
 				// be repeated in the files.  hash's localMem is also used to
 				// allocated memory for all other objects (if recordPool null)
     struct lm *reusePool;       // If created with vcfFileMakeReusePool, non-shared record data is
                                 // allocated from this pool. Useful when walking through huge files.
     struct lineFile *lf;	// Used only during parsing
     int maxErr;			// Maximum number of errors before aborting
     int errCnt;			// Error count
 };