294e28961da0eab069c44113ff02dbd6008bb9cf
angie
  Mon Oct 27 22:13:02 2014 -0700
I was under the mistaken impression that VCF header ##INFO andencouraged."  This change remove errors about missing descriptions
that were causing hgTables to give up after 101 errors on a track
hub VCF.  While inspecting the code I found that hgc's vcfClick.c
printKeysWithDescriptions had never made use of its infoDefs arg
as intended, so descriptions from ##ALT (symbolic alternate alleles)
and ##FILTER were not displayed -- fixed that.
refs #14231

diff --git src/lib/vcf.c src/lib/vcf.c
index 4b21a6c..cde6810 100644
--- src/lib/vcf.c
+++ src/lib/vcf.c
@@ -215,45 +215,45 @@
 
 char *vcfFilePooledStr(struct vcfFile *vcff, char *str)
 /* Allocate memory for a string from vcff's shared string pool. */
 {
 return hashStoreName(vcff->pool, str);  // Always stored in main pool, not reuse pool
 }
 
 static enum vcfInfoType vcfInfoTypeFromSubstr(struct vcfFile *vcff, char *line, regmatch_t substr)
 /* Translate substring of line into vcfInfoType or complain. */
 {
 char typeWord[16];
 int substrLen = substr.rm_eo - substr.rm_so;
 if (substrLen > sizeof(typeWord) - 1)
     {
     vcfFileErr(vcff, "substring passed to vcfInfoTypeFromSubstr is too long.");
-    return vcfInfoNoType;
+    return vcfInfoString;
     }
 safencpy(typeWord, sizeof(typeWord), line + substr.rm_so, substrLen);
 if (sameString("Integer", typeWord))
     return vcfInfoInteger;
 if (sameString("Float", typeWord))
     return vcfInfoFloat;
 if (sameString("Flag", typeWord))
     return vcfInfoFlag;
 if (sameString("Character", typeWord))
     return vcfInfoCharacter;
 if (sameString("String", typeWord))
     return vcfInfoString;
 vcfFileErr(vcff, "Unrecognized type word \"%s\" in metadata line \"%s\"", typeWord, line);
-return vcfInfoNoType;
+return vcfInfoString;
 }
 
 // Regular expressions to check format and extract information from header lines:
 static const char *fileformatRegex = "^##(file)?format=VCFv([0-9]+)(\\.([0-9]+))?$";
 static const char *infoOrFormatRegex =
     "^##(INFO|FORMAT)="
     "<ID=([A-Za-z0-9_:-]+),"
     "Number=(\\.|A|G|[0-9-]+),"
     "Type=([A-Za-z]+),"
     "Description=\"?(.*)\"?>$";
 static const char *filterOrAltRegex =
     "^##(FILTER|ALT)="
     "<ID=([^,]+),"
     "(Description|Type)=\"?(.*)\"?>$";
 // VCF version 3.3 was different enough to warrant separate regexes:
@@ -549,37 +549,31 @@
 	return def;
     }
 for (def = vcfSpecInfoDefs;  def != NULL;  def = def->next)
     {
     if (sameString(key, def->key))
 	return def;
     }
 return NULL;
 }
 
 static enum vcfInfoType typeForInfoKey(struct vcfFile *vcff, const char *key)
 /* Look up the type of INFO component key, in the definitions from the header,
  * and failing that, from the keys reserved in the spec. */
 {
 struct vcfInfoDef *def = vcfInfoDefForKey(vcff, key);
-if (def == NULL)
-    {
-    vcfFileErr(vcff, "There is no INFO header defining \"%s\"", key);
-    // default to string so we can display value as-is:
-    return vcfInfoString;
-    }
-return def->type;
+return def ? def->type : vcfInfoString;
 }
 
 static int parseInfoValue(struct vcfRecord *record, char *infoKey, enum vcfInfoType type,
 			  char *valStr, union vcfDatum **pData, bool **pMissingData)
 /* Parse a comma-separated list of values into array of union vcfInfoDatum and return count. */
 {
 char *valWords[VCF_MAX_INFO];
 int count = chopCommas(valStr, valWords);
 struct vcfFile *vcff = record->file;
 union vcfDatum *data = vcfFileAlloc(vcff, count * sizeof(union vcfDatum));
 bool *missingData = vcfFileAlloc(vcff, count * sizeof(*missingData));
 int j;
 for (j = 0;  j < count;  j++)
     {
     if (type != vcfInfoString && type != vcfInfoCharacter && sameString(valWords[j], "."))
@@ -1107,37 +1101,31 @@
 	return def;
     }
 for (def = vcfSpecGtFormatDefs;  def != NULL;  def = def->next)
     {
     if (sameString(key, def->key))
 	return def;
     }
 return NULL;
 }
 
 static enum vcfInfoType typeForGtFormat(struct vcfFile *vcff, const char *key)
 /* Look up the type of FORMAT component key, in the definitions from the header,
  * and failing that, from the keys reserved in the spec. */
 {
 struct vcfInfoDef *def = vcfInfoDefForGtKey(vcff, key);
-if (def == NULL)
-    {
-    vcfFileErr(vcff, "There is no FORMAT header defining \"%s\"", key);
-    // default to string so we can display value as-is:
-    return vcfInfoString;
-    }
-return def->type;
+return def ? def->type : vcfInfoString;
 }
 
 #define VCF_MAX_FORMAT VCF_MAX_INFO
 #define VCF_MAX_FORMAT_LEN (VCF_MAX_FORMAT * 4)
 
 void vcfParseGenotypes(struct vcfRecord *record)
 /* Translate record->genotypesUnparsedStrings[] into proper struct vcfGenotype[].
  * This destroys genotypesUnparsedStrings. */
 {
 if (record->genotypeUnparsedStrings == NULL)
     return;
 struct vcfFile *vcff = record->file;
 record->genotypes = vcfFileAlloc(vcff, vcff->genotypeCount * sizeof(struct vcfGenotype));
 char format[VCF_MAX_FORMAT_LEN];
 safecpy(format, sizeof(format), record->format);