c843f30cccc3610e26a26142ad0622f29402254b angie Wed Jan 11 09:54:24 2012 -0800 Bug #6529 (VCF internal rep doesn't do missing data): add flags for missingdata values to vcfInfoElement, so we know the difference between "0" and ".". This applies to both INFO column values and genotype (FORMAT) values. Also, vcfGenotype's hapIxA and hapIxB are now signed, with negative values indicating missing data. User reported bug in MLQ #6462. diff --git src/hg/hgc/vcfClick.c src/hg/hgc/vcfClick.c index 4ca7b5e..9b64613 100644 --- src/hg/hgc/vcfClick.c +++ src/hg/hgc/vcfClick.c @@ -90,30 +90,33 @@ { struct vcfInfoElement *el = &(rec->infoElements[i]); const struct vcfInfoDef *def = vcfInfoDefForKey(vcff, el->key); if (def == NULL) continue; printf("%s: ", el->key); int j; enum vcfInfoType type = def->type; if (type == vcfInfoFlag && el->count == 0) printf("Yes"); // no values, so we can't call vcfPrintDatum... // However, if this is older VCF, type vcfInfoFlag might have a value. for (j = 0; j < el->count; j++) { if (j > 0) printf(", "); + if (el->missingData[j]) + printf("."); + else vcfPrintDatum(stdout, el->values[j], type); } if (def != NULL) printf(" %s", def->description); else printf(""); printf("\n"); } puts(""); } static void ignoreEm(char *format, va_list args) /* Ignore warnings from genotype parsing -- when there's one, there * are usually hundreds more just like it. */ { @@ -129,37 +132,37 @@ if (tmp1 == NULL) tmp1 = dyStringNew(0); pushWarnHandler(ignoreEm); vcfParseGenotypes(rec); popWarnHandler(); // Tally genotypes and alleles for summary: int refs = 0, alts = 0, refRefs = 0, refAlts = 0, altAlts = 0, gtOther = 0, phasedGts = 0; int i; for (i = 0; i < vcff->genotypeCount; i++) { struct vcfGenotype *gt = &(rec->genotypes[i]); if (gt->isPhased) phasedGts++; if (gt->hapIxA == 0) refs++; - else + else if (gt->hapIxA > 0) alts++; if (!gt->isHaploid) { if (gt->hapIxB == 0) refs++; - else + else if (gt->hapIxB > 0) alts++; if (gt->hapIxA == 0 && gt->hapIxB == 0) refRefs++; else if (gt->hapIxA == 1 && gt->hapIxB == 1) altAlts++; else if ((gt->hapIxA == 1 && gt->hapIxB == 0) || (gt->hapIxA == 0 && gt->hapIxB == 1)) refAlts++; else gtOther++; } } printf("Genotype count: %d (%d phased)
\n", vcff->genotypeCount, phasedGts); double refAf = (double)refs/(2*vcff->genotypeCount); double altAf = (double)alts/(2*vcff->genotypeCount); @@ -197,48 +200,56 @@ printf("  %s: %s
\n", formatKeys[i], desc); formatTypes[i] = def->type; } hTableStart(); puts("Sample IDGenotypePhased?"); for (i = 0; i < formatCount; i++) { if (sameString(formatKeys[i], vcfGtGenotype)) continue; printf("%s", formatKeys[i]); } puts("\n"); for (i = 0; i < vcff->genotypeCount; i++) { struct vcfGenotype *gt = &(rec->genotypes[i]); - char *hapA = rec->alleles[gt->hapIxA]; - char *hapB = gt->isHaploid ? NA : rec->alleles[gt->hapIxB]; + char *hapA = ".", *hapB = "."; + if (gt->hapIxA >= 0) + hapA = rec->alleles[(unsigned char)gt->hapIxA]; + if (gt->isHaploid) + hapB = NA; + else if (gt->hapIxB >= 0) + hapB = rec->alleles[(unsigned char)gt->hapIxB]; char sep = gt->isPhased ? '|' : '/'; char *phasing = gt->isHaploid ? NA : gt->isPhased ? "Y" : "n"; printf("%s%s%c%s%s", vcff->genotypeIds[i], hapA, sep, hapB, phasing); int j; for (j = 0; j < gt->infoCount; j++) { if (sameString(formatKeys[j], vcfGtGenotype)) continue; printf(""); struct vcfInfoElement *el = &(gt->infoElements[j]); int k; for (k = 0; k < el->count; k++) { if (k > 0) printf(", "); + if (el->missingData[k]) + printf("."); + else vcfPrintDatum(stdout, el->values[k], formatTypes[j]); } printf(""); } puts(""); } hTableEnd(); jsEndCollapsibleSection(); } static void pgSnpCodingDetail(struct vcfRecord *rec) /* Translate rec into pgSnp (with proper chrom name) and call Belinda's * coding effect predictor from pgSnp details. */ { char *genePredTable = "knownGene";