52a177ce176f86c4f3d6afc1f9bdd0c3cc1f8ac9 angie Mon Oct 17 15:14:16 2011 -0700 Removing unwanted double-quote at end of line that is pulled in by loosened regex. diff --git src/lib/vcf.c src/lib/vcf.c index b5202e1..0fe7ef2 100644 --- src/lib/vcf.c +++ src/lib/vcf.c @@ -294,30 +294,33 @@ nonAsciiWorkaround(line); if (regexMatchSubstr(line, infoOrFormatRegex, substrs, ArraySize(substrs))) // substrs[2] is ID/key, substrs[3] is Number, [4] is Type and [5] is Description. { struct vcfInfoDef *def = vcfFileAlloc(vcff, sizeof(struct vcfInfoDef)); def->key = vcfFileCloneSubstr(vcff, line, substrs[2]); char *number = vcfFileCloneSubstr(vcff, line, substrs[3]); if (sameString(number, ".") || sameString(number, "A") || sameString(number, "G")) // A is #alts which varies line-to-line; "G" is #genotypes which we haven't // yet seen. Why is there a G here -- shouldn't such attributes go in the // genotype columns? def->fieldCount = -1; else def->fieldCount = atoi(number); def->type = vcfInfoTypeFromSubstr(vcff, line, substrs[4]); + // greedy regex pulls in end quote, trim if found: + if (line[substrs[5].rm_eo-1] == '"') + line[substrs[5].rm_eo-1] = '\0'; def->description = vcfFileCloneSubstr(vcff, line, substrs[5]); slAddHead((isInfo ? &(vcff->infoDefs) : &(vcff->gtFormatDefs)), def); } else vcfFileErr(vcff, "##%s line does not match expected pattern /%s/: \"%s\"", (isInfo ? "INFO" : "FORMAT"), infoOrFormatRegex, line); } else if (startsWith("##FILTER=", line) || startsWith("##ALT=", line)) { boolean isFilter = startsWith("##FILTER", line); if (regexMatchSubstr(line, filterOrAltRegex, substrs, ArraySize(substrs))) { // substrs[2] is ID/key, substrs[4] is Description. struct vcfInfoDef *def = vcfFileAlloc(vcff, sizeof(struct vcfInfoDef)); def->key = vcfFileCloneSubstr(vcff, line, substrs[2]);