d87ef9ad0e95989404c641381e4671ac95ba5b90 angie Fri Aug 16 15:37:15 2013 -0700 Fix for #11519: annoStreamVcf wasn't chr-ifying VCF seq names (whichcan be 1, 2, 3 instead of chr1, chr2, chr3) before comparing with the current region chrom (or minChrom). diff --git src/lib/annoStreamVcf.c src/lib/annoStreamVcf.c index a13f9e9..47f902e 100644 --- src/lib/annoStreamVcf.c +++ src/lib/annoStreamVcf.c @@ -64,86 +64,86 @@ if (i > 0) dyStringAppendC(self->dyGt, '\t'); dyStringAppend(self->dyGt, words[9+i]); } self->asWords[9] = self->dyGt->string; } else { self->asWords[8] = ""; self->asWords[9] = ""; } self->record = vcfRecordFromRow(self->vcff, words); return self->asWords; } +static char *getProperChromName(struct annoStreamVcf *self, char *vcfChrom) +/* We tolerate chr-less chrom names in VCF and BAM ("1" for "chr1" etc); to avoid + * confusing the rest of the system, return the chr-ful version if it exists. */ +{ +char *name = hashFindVal(self->chromNameHash, vcfChrom); +if (name == NULL) + { + name = vcfChrom; + struct twoBitFile *tbf = self->streamer.assembly->tbf; + char buf[256]; + if (! twoBitIsSequence(tbf, vcfChrom)) + { + safef(buf, sizeof(buf), "chr%s", vcfChrom); + if (twoBitIsSequence(tbf, buf)) + name = buf; + } + name = lmCloneString(self->chromNameHash->lm, name); + hashAdd(self->chromNameHash, vcfChrom, name); + } +return name; +} + static char **nextRowUnfiltered(struct annoStreamVcf *self, char *minChrom, uint minEnd) /* Get the next VCF record and put the row text into autoSql words. * Return pointer to self->asWords if we get a row, otherwise NULL. */ { struct annoStreamer *sSelf = (struct annoStreamer *)self; char *regionChrom = sSelf->chrom; uint regionStart = sSelf->regionStart; uint regionEnd = sSelf->regionEnd; if (minChrom != NULL) { if (regionChrom == NULL) { regionChrom = minChrom; regionStart = minEnd; regionEnd = annoAssemblySeqSize(sSelf->assembly, minChrom); } else { regionStart = max(regionStart, minEnd); } } char **words = nextRowRaw(self); if (regionChrom != NULL && words != NULL) { - if (self->isTabix && strcmp(words[0], regionChrom) < 0) + if (self->isTabix && strcmp(getProperChromName(self, words[0]), regionChrom) < 0) lineFileSetTabixRegion(self->vcff->lf, regionChrom, regionStart, regionEnd); while (words != NULL && - (strcmp(words[0], regionChrom) < 0 || + (strcmp(getProperChromName(self, words[0]), regionChrom) < 0 || (sameString(words[0], regionChrom) && self->record->chromEnd < regionEnd))) words = nextRowRaw(self); } return words; } -static char *getProperChromName(struct annoStreamVcf *self, char *vcfChrom) -/* We tolerate chr-less chrom names in VCF and BAM ("1" for "chr1" etc); to avoid - * confusing the rest of the system, return the chr-ful version if it exists. */ -{ -char *name = hashFindVal(self->chromNameHash, vcfChrom); -if (name == NULL) - { - name = vcfChrom; - struct twoBitFile *tbf = self->streamer.assembly->tbf; - char buf[256]; - if (! twoBitIsSequence(tbf, vcfChrom)) - { - safef(buf, sizeof(buf), "chr%s", vcfChrom); - if (twoBitIsSequence(tbf, buf)) - name = buf; - } - name = lmCloneString(self->chromNameHash->lm, name); - hashAdd(self->chromNameHash, vcfChrom, name); - } -return name; -} - static struct annoRow *asvNextRow(struct annoStreamer *sSelf, char *minChrom, uint minEnd, struct lm *callerLm) /* Return an annoRow encoding the next VCF record, or NULL if there are no more items. */ { struct annoStreamVcf *self = (struct annoStreamVcf *)sSelf; if (minChrom != NULL && sSelf->chrom != NULL && differentString(minChrom, sSelf->chrom)) errAbort("annoStreamVcf %s: nextRow minChrom='%s' but region chrom='%s'", sSelf->name, minChrom, sSelf->chrom); if (self->maxRecords > 0 && self->recordCount >= self->maxRecords) return NULL; char **words = nextRowUnfiltered(self, minChrom, minEnd); if (words == NULL) return NULL; // Skip past any left-join failures until we get a right-join failure, a passing row, or EOF. boolean rightFail = FALSE;