294e28961da0eab069c44113ff02dbd6008bb9cf angie Mon Oct 27 22:13:02 2014 -0700 I was under the mistaken impression that VCF header ##INFO andencouraged." This change remove errors about missing descriptions that were causing hgTables to give up after 101 errors on a track hub VCF. While inspecting the code I found that hgc's vcfClick.c printKeysWithDescriptions had never made use of its infoDefs arg as intended, so descriptions from ##ALT (symbolic alternate alleles) and ##FILTER were not displayed -- fixed that. refs #14231 diff --git src/lib/vcf.c src/lib/vcf.c index 4b21a6c..cde6810 100644 --- src/lib/vcf.c +++ src/lib/vcf.c @@ -215,45 +215,45 @@ char *vcfFilePooledStr(struct vcfFile *vcff, char *str) /* Allocate memory for a string from vcff's shared string pool. */ { return hashStoreName(vcff->pool, str); // Always stored in main pool, not reuse pool } static enum vcfInfoType vcfInfoTypeFromSubstr(struct vcfFile *vcff, char *line, regmatch_t substr) /* Translate substring of line into vcfInfoType or complain. */ { char typeWord[16]; int substrLen = substr.rm_eo - substr.rm_so; if (substrLen > sizeof(typeWord) - 1) { vcfFileErr(vcff, "substring passed to vcfInfoTypeFromSubstr is too long."); - return vcfInfoNoType; + return vcfInfoString; } safencpy(typeWord, sizeof(typeWord), line + substr.rm_so, substrLen); if (sameString("Integer", typeWord)) return vcfInfoInteger; if (sameString("Float", typeWord)) return vcfInfoFloat; if (sameString("Flag", typeWord)) return vcfInfoFlag; if (sameString("Character", typeWord)) return vcfInfoCharacter; if (sameString("String", typeWord)) return vcfInfoString; vcfFileErr(vcff, "Unrecognized type word \"%s\" in metadata line \"%s\"", typeWord, line); -return vcfInfoNoType; +return vcfInfoString; } // Regular expressions to check format and extract information from header lines: static const char *fileformatRegex = "^##(file)?format=VCFv([0-9]+)(\\.([0-9]+))?$"; static const char *infoOrFormatRegex = "^##(INFO|FORMAT)=" "$"; static const char *filterOrAltRegex = "^##(FILTER|ALT)=" "$"; // VCF version 3.3 was different enough to warrant separate regexes: @@ -549,37 +549,31 @@ return def; } for (def = vcfSpecInfoDefs; def != NULL; def = def->next) { if (sameString(key, def->key)) return def; } return NULL; } static enum vcfInfoType typeForInfoKey(struct vcfFile *vcff, const char *key) /* Look up the type of INFO component key, in the definitions from the header, * and failing that, from the keys reserved in the spec. */ { struct vcfInfoDef *def = vcfInfoDefForKey(vcff, key); -if (def == NULL) - { - vcfFileErr(vcff, "There is no INFO header defining \"%s\"", key); - // default to string so we can display value as-is: - return vcfInfoString; - } -return def->type; +return def ? def->type : vcfInfoString; } static int parseInfoValue(struct vcfRecord *record, char *infoKey, enum vcfInfoType type, char *valStr, union vcfDatum **pData, bool **pMissingData) /* Parse a comma-separated list of values into array of union vcfInfoDatum and return count. */ { char *valWords[VCF_MAX_INFO]; int count = chopCommas(valStr, valWords); struct vcfFile *vcff = record->file; union vcfDatum *data = vcfFileAlloc(vcff, count * sizeof(union vcfDatum)); bool *missingData = vcfFileAlloc(vcff, count * sizeof(*missingData)); int j; for (j = 0; j < count; j++) { if (type != vcfInfoString && type != vcfInfoCharacter && sameString(valWords[j], ".")) @@ -1107,37 +1101,31 @@ return def; } for (def = vcfSpecGtFormatDefs; def != NULL; def = def->next) { if (sameString(key, def->key)) return def; } return NULL; } static enum vcfInfoType typeForGtFormat(struct vcfFile *vcff, const char *key) /* Look up the type of FORMAT component key, in the definitions from the header, * and failing that, from the keys reserved in the spec. */ { struct vcfInfoDef *def = vcfInfoDefForGtKey(vcff, key); -if (def == NULL) - { - vcfFileErr(vcff, "There is no FORMAT header defining \"%s\"", key); - // default to string so we can display value as-is: - return vcfInfoString; - } -return def->type; +return def ? def->type : vcfInfoString; } #define VCF_MAX_FORMAT VCF_MAX_INFO #define VCF_MAX_FORMAT_LEN (VCF_MAX_FORMAT * 4) void vcfParseGenotypes(struct vcfRecord *record) /* Translate record->genotypesUnparsedStrings[] into proper struct vcfGenotype[]. * This destroys genotypesUnparsedStrings. */ { if (record->genotypeUnparsedStrings == NULL) return; struct vcfFile *vcff = record->file; record->genotypes = vcfFileAlloc(vcff, vcff->genotypeCount * sizeof(struct vcfGenotype)); char format[VCF_MAX_FORMAT_LEN]; safecpy(format, sizeof(format), record->format);