bc7b808b5abb70ab79b06df228575e257b0b18a4 angie Mon Sep 30 12:06:27 2013 -0700 When reading a VCF file without a tabix index, make sure we stopafter regionEnd (if there is one). diff --git src/lib/annoStreamVcf.c src/lib/annoStreamVcf.c index 7496f8e..dd32f9a 100644 --- src/lib/annoStreamVcf.c +++ src/lib/annoStreamVcf.c @@ -98,54 +98,66 @@ name = buf; } name = lmCloneString(self->chromNameHash->lm, name); hashAdd(self->chromNameHash, vcfChrom, name); } return name; } static char **nextRowUnfiltered(struct annoStreamVcf *self, char *minChrom, uint minEnd) /* Get the next VCF record and put the row text into autoSql words. * Return pointer to self->asWords if we get a row, otherwise NULL. */ { struct annoStreamer *sSelf = (struct annoStreamer *)self; char *regionChrom = sSelf->chrom; uint regionStart = sSelf->regionStart; -uint regionEnd = sSelf->regionEnd; if (minChrom != NULL) { if (regionChrom == NULL) { regionChrom = minChrom; regionStart = minEnd; - regionEnd = annoAssemblySeqSize(sSelf->assembly, minChrom); } else { regionStart = max(regionStart, minEnd); } } char **words = nextRowRaw(self); if (regionChrom != NULL && words != NULL) { if (self->isTabix && strcmp(getProperChromName(self, words[0]), regionChrom) < 0) + { + uint regionEnd = sSelf->regionEnd; + if (minChrom != NULL && sSelf->chrom == NULL) + regionEnd = annoAssemblySeqSize(sSelf->assembly, minChrom); lineFileSetTabixRegion(self->vcff->lf, regionChrom, regionStart, regionEnd); + } while (words != NULL && (strcmp(getProperChromName(self, words[0]), regionChrom) < 0 || (sameString(words[0], regionChrom) && self->record->chromEnd < regionStart))) words = nextRowRaw(self); } +// Tabix doesn't give us any rows past end of region, but if not using tabix, +// detect when we're past end of region: +if (words != NULL && !self->isTabix && sSelf->chrom != NULL + && self->record->chromStart > sSelf->regionEnd) + { + words = NULL; + self->record = NULL; + } +if (words != NULL) self->recordCount++; if (words == NULL || (self->maxRecords > 0 && self->recordCount >= self->maxRecords)) self->eof = TRUE; return words; } static struct annoRow *nextRowFiltered(struct annoStreamVcf *self, char *minChrom, uint minEnd, struct lm *callerLm) /* Get the next record that passes our filters. */ { char **words = nextRowUnfiltered(self, minChrom, minEnd); if (words == NULL) return NULL; // Skip past any left-join failures until we get a right-join failure, a passing row, or EOF. struct annoStreamer *sSelf = (struct annoStreamer *)self;