4344e168885850e053dc245826ab42f2b445c98f angie Wed Aug 24 13:11:01 2011 -0700 Feature #2821 (VCF parser): Fun with flawed input. This file's rows have6 keywords in the format column, but most genotype columns have only 5 pieces of data: ftp://ftp-trace.ncbi.nlm.nih.gov/1000genomes/ftp/release/20100804/supporting/AFR.BI_withr2.20100804.genotypes.vcf.gz This causes many vcfFileErr calls in vcfParseGenotypes. Turned out that vcfFileErr was not stopping after the specified # of errors; and that the warnings are so verbose that I don't think we really need to display them in hgTracks and hgc. So they are now ignored, and we don't segv on missing genotype info. diff --git src/hg/hgc/vcfClick.c src/hg/hgc/vcfClick.c index a26df9d..e39854b 100644 --- src/hg/hgc/vcfClick.c +++ src/hg/hgc/vcfClick.c @@ -101,40 +101,48 @@ for (j = 0; j < el->count; j++) { if (j > 0) printf(", "); vcfPrintDatum(stdout, el->values[j], type); } if (def != NULL) printf("</TD><TD> %s", def->description); else printf("</TD><TD>"); printf("</TD></TR>\n"); } puts("</TABLE>"); } +static void ignoreEm(char *format, va_list args) +/* Ignore warnings from genotype parsing -- when there's one, there + * are usually hundreds more just like it. */ +{ +} + static void vcfGenotypesDetails(struct vcfRecord *rec, char *track) /* Print genotypes in some kind of table... */ { struct vcfFile *vcff = rec->file; if (vcff->genotypeCount == 0) return; static struct dyString *tmp1 = NULL; if (tmp1 == NULL) tmp1 = dyStringNew(0); +pushWarnHandler(ignoreEm); vcfParseGenotypes(rec); +popWarnHandler(); // Tally genotypes and alleles for summary: int refs = 0, alts = 0, refRefs = 0, refAlts = 0, altAlts = 0, gtOther = 0, phasedGts = 0; int i; for (i = 0; i < vcff->genotypeCount; i++) { struct vcfGenotype *gt = &(rec->genotypes[i]); if (gt->isPhased) phasedGts++; if (gt->hapIxA == 0) refs++; else alts++; if (!gt->isHaploid) { if (gt->hapIxB == 0) @@ -189,31 +197,31 @@ if (sameString(formatKeys[i], vcfGtGenotype)) continue; printf("<TH>%s</TH>", formatKeys[i]); } puts("</TR>\n"); for (i = 0; i < vcff->genotypeCount; i++) { struct vcfGenotype *gt = &(rec->genotypes[i]); char *hapA = rec->alleles[gt->hapIxA]; char *hapB = gt->isHaploid ? NA : rec->alleles[gt->hapIxB]; char sep = gt->isPhased ? '|' : '/'; char *phasing = gt->isHaploid ? NA : gt->isPhased ? "Y" : "n"; printf("<TR><TD>%s</TD><TD>%s%c%s</TD><TD>%s</TD>", vcff->genotypeIds[i], hapA, sep, hapB, phasing); int j; - for (j = 0; j < formatCount; j++) + for (j = 0; j < gt->infoCount; j++) { if (sameString(formatKeys[j], vcfGtGenotype)) continue; printf("<TD>"); struct vcfInfoElement *el = &(gt->infoElements[j]); int k; for (k = 0; k < el->count; k++) { if (k > 0) printf(", "); vcfPrintDatum(stdout, el->values[k], formatTypes[j]); } printf("</TD>"); } puts("</TR>"); @@ -261,31 +269,31 @@ void doVcfTabixDetails(struct trackDb *tdb, char *item) /* Show details of an alignment from a VCF file compressed and indexed by tabix. */ { #if (defined USE_TABIX && defined KNETFILE_HOOKS) knetUdcInstall(); if (udcCacheTimeout() < 300) udcSetCacheTimeout(300); #endif//def USE_TABIX && KNETFILE_HOOKS int start = cartInt(cart, "o"); int end = cartInt(cart, "t"); struct sqlConnection *conn = hAllocConnTrack(database, tdb); // TODO: will need to handle per-chrom files like bam, maybe fold bamFileNameFromTable into this:: char *fileOrUrl = bbiNameFromSettingOrTable(tdb, conn, tdb->table); hFreeConn(&conn); -int vcfMaxErr = 100; +int vcfMaxErr = -1; struct vcfFile *vcff = NULL; /* protect against temporary network error */ struct errCatch *errCatch = errCatchNew(); if (errCatchStart(errCatch)) { vcff = vcfTabixFileMayOpen(fileOrUrl, seqName, start, end, vcfMaxErr); } errCatchEnd(errCatch); if (errCatch->gotError) { if (isNotEmpty(errCatch->message->string)) warn("%s", errCatch->message->string); } errCatchFree(&errCatch); if (vcff != NULL)