e0d0e327c82dd1fd6acb00b23aa041219b7eb59f
angie
  Fri Jul 1 16:22:30 2011 -0700
#986 (VCF): Loosening regexes for parsing VCF header to accomodate 1000 Genomes pilot release trio indel files.
diff --git src/lib/vcf.c src/lib/vcf.c
index ea175e5..65bc467 100644
--- src/lib/vcf.c
+++ src/lib/vcf.c
@@ -226,38 +226,38 @@
     return vcfInfoFloat;
 if (sameString("Flag", typeWord))
     return vcfInfoFlag;
 if (sameString("Character", typeWord))
     return vcfInfoCharacter;
 if (sameString("String", typeWord))
     return vcfInfoString;
 vcfFileErr(vcff, "Unrecognized type word \"%s\" in metadata line \"%s\"", typeWord, line);
 return vcfInfoNoType;
 }
 
 // Regular expressions to check format and extract information from header lines:
 static const char *fileformatRegex = "^##(file)?format=VCFv([0-9]+)(\\.([0-9]+))?$";
 static const char *infoOrFormatRegex =
     "^##(INFO|FORMAT)="
-    "<ID=([A-Za-z0-9]+),"
-    "Number=(\\.|[0-9]+),"
+    "<ID=([A-Za-z0-9_:-]+),"
+    "Number=(\\.|[0-9-]+),"
     "Type=([A-Za-z]+),"
-    "Description=\"([^\"]+)\">$";
+    "Description=\"?([^\"]+)\"?>$";
 static const char *filterOrAltRegex =
     "^##(FILTER|ALT)="
-    "<ID=([A-Za-z0-9]+),"
-    "Description=\"([^\"]+)\">$";
+    "<ID=([A-Za-z0-9_:-]+),"
+    "(Description|Type)=\"([^\"]+)\">$";
 
 INLINE void nonAsciiWorkaround(char *line)
 // Workaround for annoying 3-byte quote marks included in some 1000 Genomes files:
 {
 (void)strSwapStrs(line, strlen(line)+1, "\342\200\234", "\"");
 (void)strSwapStrs(line, strlen(line)+1, "\342\200\235", "\"");
 }
 
 static void parseMetadataLine(struct vcfFile *vcff, char *line)
 /* Parse a VCF header line beginning with "##" that defines a metadata. */
 {
 char *ptr = line;
 if (ptr == NULL && !startsWith(ptr, "##"))
     errAbort("Bad line passed to parseMetadataLine");
 ptr += 2;