src/htslib/synced_bcf_reader.c 7df6e18265341f87a69fba808aa1f92f8ebca841

7df6e18265341f87a69fba808aa1f92f8ebca841
markd
  Wed Apr 15 13:39:42 2026 -0700
move copy of htslib

diff --git src/htslib/synced_bcf_reader.c src/htslib/synced_bcf_reader.c
deleted file mode 100644
index 1e70fc61024..00000000000
--- src/htslib/synced_bcf_reader.c
+++ /dev/null
@@ -1,1284 +0,0 @@
-/*  synced_bcf_reader.c -- stream through multiple VCF files.
-
-    Copyright (C) 2012-2014 Genome Research Ltd.
-
-    Author: Petr Danecek <pd3@sanger.ac.uk>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.  */
-
-#include <config.h>
-
-#include <stdio.h>
-#include <unistd.h>
-#include <string.h>
-#include <limits.h>
-#include <errno.h>
-#include <ctype.h>
-#include <sys/stat.h>
-#include "htslib/synced_bcf_reader.h"
-#include "htslib/kseq.h"
-#include "htslib/khash_str2int.h"
-#include "htslib/bgzf.h"
-
-#define MAX_CSI_COOR 0x7fffffff     // maximum indexable coordinate of .csi
-
-typedef struct
-{
-    uint32_t start, end;
-}
-region1_t;
-
-typedef struct _region_t
-{
-    region1_t *regs;
-    int nregs, mregs, creg;
-}
-region_t;
-
-static void _regions_add(bcf_sr_regions_t *reg, const char *chr, int start, int end);
-static bcf_sr_regions_t *_regions_init_string(const char *str);
-static int _regions_match_alleles(bcf_sr_regions_t *reg, int als_idx, bcf1_t *rec);
-
-char *bcf_sr_strerror(int errnum)
-{
-    switch (errnum)
-    {
-        case open_failed: 
-            return strerror(errno); break;
-        case not_bgzf:
-            return "not compressed with bgzip"; break;
-        case idx_load_failed:
-            return "could not load index"; break;
-        case file_type_error:
-            return "unknown file type"; break;
-        case api_usage_error:
-            return "API usage error"; break;
-        case header_error:
-            return "could not parse header"; break;
-        case no_eof:
-            return "no BGZF EOF marker; file may be truncated"; break;
-        default: return ""; 
-    }
-}
-
-static int *init_filters(bcf_hdr_t *hdr, const char *filters, int *nfilters)
-{
-    kstring_t str = {0,0,0};
-    const char *tmp = filters, *prev = filters;
-    int nout = 0, *out = NULL;
-    while ( 1 )
-    {
-        if ( *tmp==',' || !*tmp )
-        {
-            out = (int*) realloc(out, (nout+1)*sizeof(int));
-            if ( tmp-prev==1 && *prev=='.' )
-            {
-                out[nout] = -1;
-                nout++;
-            }
-            else
-            {
-                str.l = 0;
-                kputsn(prev, tmp-prev, &str);
-                out[nout] = bcf_hdr_id2int(hdr, BCF_DT_ID, str.s);
-                if ( out[nout]>=0 ) nout++;
-            }
-            if ( !*tmp ) break;
-            prev = tmp+1;
-        }
-        tmp++;
-    }
-    if ( str.m ) free(str.s);
-    *nfilters = nout;
-    return out;
-}
-
-int bcf_sr_set_regions(bcf_srs_t *readers, const char *regions, int is_file)
-{
-    assert( !readers->regions );
-    if ( readers->nreaders )
-    {
-        fprintf(stderr,"[%s:%d %s] Error: bcf_sr_set_regions() must be called before bcf_sr_add_reader()\n", __FILE__,__LINE__,__FUNCTION__);
-        return -1;
-    }
-    readers->regions = bcf_sr_regions_init(regions,is_file,0,1,-2);
-    if ( !readers->regions ) return -1;
-    readers->explicit_regs = 1;
-    readers->require_index = 1;
-    return 0;
-}
-int bcf_sr_set_targets(bcf_srs_t *readers, const char *targets, int is_file, int alleles)
-{
-    assert( !readers->targets );
-    if ( targets[0]=='^' )
-    {
-        readers->targets_exclude = 1;
-        targets++;
-    }
-    readers->targets = bcf_sr_regions_init(targets,is_file,0,1,-2);
-    if ( !readers->targets ) return -1;
-    readers->targets_als = alleles;
-    return 0;
-}
-
-int bcf_sr_add_reader(bcf_srs_t *files, const char *fname)
-{
-    htsFile* file_ptr = hts_open(fname, "r");
-    if ( ! file_ptr ) {
-        files->errnum = open_failed;
-        return 0;
-    }
-
-    files->has_line = (int*) realloc(files->has_line, sizeof(int)*(files->nreaders+1));
-    files->has_line[files->nreaders] = 0;
-    files->readers  = (bcf_sr_t*) realloc(files->readers, sizeof(bcf_sr_t)*(files->nreaders+1));
-    bcf_sr_t *reader = &files->readers[files->nreaders++];
-    memset(reader,0,sizeof(bcf_sr_t));
-
-    reader->file = file_ptr;
-
-    files->errnum = 0;
-
-    if ( reader->file->format.compression==bgzf )
-    {
-        BGZF *bgzf = hts_get_bgzfp(reader->file);
-        if ( bgzf && bgzf_check_EOF(bgzf) == 0 ) {
-            files->errnum = no_eof;
-            fprintf(stderr,"[%s] Warning: no BGZF EOF marker; file may be truncated.\n", fname);
-        }
-    }
-
-    if ( files->require_index )
-    {
-        if ( reader->file->format.format==vcf )
-        {
-            if ( reader->file->format.compression!=bgzf )
-            {
-                files->errnum = not_bgzf;
-                return 0;
-            }
-
-            reader->tbx_idx = tbx_index_load(fname);
-            if ( !reader->tbx_idx )
-            {
-                files->errnum = idx_load_failed;
-                return 0;
-            }
-
-            reader->header = bcf_hdr_read(reader->file);
-        }
-        else if ( reader->file->format.format==bcf )
-        {
-            if ( reader->file->format.compression!=bgzf )
-            {
-                files->errnum = not_bgzf;
-                return 0;
-            }
-
-            reader->header = bcf_hdr_read(reader->file);
-
-            reader->bcf_idx = bcf_index_load(fname);
-            if ( !reader->bcf_idx )
-            {
-                files->errnum = idx_load_failed;
-                return 0;
-            }
-        }
-        else
-        {
-            files->errnum = file_type_error;
-            return 0;
-        }
-    }
-    else
-    {
-        if ( reader->file->format.format==bcf || reader->file->format.format==vcf )
-        {
-            reader->header = bcf_hdr_read(reader->file);
-        }
-        else
-        {
-            files->errnum = file_type_error;
-            return 0;
-        }
-        files->streaming = 1;
-    }
-    if ( files->streaming && files->nreaders>1 )
-    {
-        files->errnum = api_usage_error;
-        fprintf(stderr,"[%s:%d %s] Error: %d readers, yet require_index not set\n", __FILE__,__LINE__,__FUNCTION__,files->nreaders);
-        return 0;
-    }
-    if ( files->streaming && files->regions )
-    {
-        files->errnum = api_usage_error;
-        fprintf(stderr,"[%s:%d %s] Error: cannot tabix-jump in streaming mode\n", __FILE__,__LINE__,__FUNCTION__);
-        return 0;
-    }
-    if ( !reader->header )
-    {
-        files->errnum = header_error;
-        return 0;
-    }
-
-    reader->fname = strdup(fname);
-    if ( files->apply_filters )
-        reader->filter_ids = init_filters(reader->header, files->apply_filters, &reader->nfilter_ids);
-
-    // Update list of chromosomes
-    if ( !files->explicit_regs && !files->streaming )
-    {
-        int n,i;
-        const char **names = reader->tbx_idx ? tbx_seqnames(reader->tbx_idx, &n) : bcf_hdr_seqnames(reader->header, &n);
-        for (i=0; i<n; i++)
-        {
-            if ( !files->regions )
-                files->regions = _regions_init_string(names[i]);
-            else
-                _regions_add(files->regions, names[i], -1, -1);
-        }
-        free(names);
-    }
-
-    return 1;
-}
-
-bcf_srs_t *bcf_sr_init(void)
-{
-    bcf_srs_t *files = (bcf_srs_t*) calloc(1,sizeof(bcf_srs_t));
-    return files;
-}
-
-static void bcf_sr_destroy1(bcf_sr_t *reader)
-{
-    free(reader->fname);
-    if ( reader->tbx_idx ) tbx_destroy(reader->tbx_idx);
-    if ( reader->bcf_idx ) hts_idx_destroy(reader->bcf_idx);
-    bcf_hdr_destroy(reader->header);
-    hts_close(reader->file);
-    if ( reader->itr ) tbx_itr_destroy(reader->itr);
-    int j;
-    for (j=0; j<reader->mbuffer; j++)
-        bcf_destroy1(reader->buffer[j]);
-    free(reader->buffer);
-    free(reader->samples);
-    free(reader->filter_ids);
-}
-void bcf_sr_destroy(bcf_srs_t *files)
-{
-    int i;
-    for (i=0; i<files->nreaders; i++)
-        bcf_sr_destroy1(&files->readers[i]);
-    free(files->has_line);
-    free(files->readers);
-    for (i=0; i<files->n_smpl; i++) free(files->samples[i]);
-    free(files->samples);
-    if (files->targets) bcf_sr_regions_destroy(files->targets);
-    if (files->regions) bcf_sr_regions_destroy(files->regions);
-    if ( files->tmps.m ) free(files->tmps.s);
-    free(files);
-}
-
-void bcf_sr_remove_reader(bcf_srs_t *files, int i)
-{
-    assert( !files->samples );  // not ready for this yet
-    bcf_sr_destroy1(&files->readers[i]);
-    if ( i+1 < files->nreaders )
-    {
-        memmove(&files->readers[i], &files->readers[i+1], (files->nreaders-i-1)*sizeof(bcf_sr_t));
-        memmove(&files->has_line[i], &files->has_line[i+1], (files->nreaders-i-1)*sizeof(int));
-    }
-    files->nreaders--;
-}
-
-
-/*
-   Removes duplicate records from the buffer. The meaning of "duplicate" is
-   controlled by the $collapse variable, which can cause that from multiple
-   <indel|snp|any> lines only the first is considered and the rest is ignored.
-   The removal is done by setting the redundant lines' positions to -1 and
-   moving these lines at the end of the buffer.
- */
-static void collapse_buffer(bcf_srs_t *files, bcf_sr_t *reader)
-{
-    int irec,jrec, has_snp=0, has_indel=0, has_any=0;
-    for (irec=1; irec<=reader->nbuffer; irec++)
-    {
-        bcf1_t *line = reader->buffer[irec];
-        if ( line->pos != reader->buffer[1]->pos ) break;
-        if ( files->collapse&COLLAPSE_ANY )
-        {
-            if ( !has_any ) has_any = 1;
-            else line->pos = -1;
-        }
-        int line_type = bcf_get_variant_types(line);
-        if ( files->collapse&COLLAPSE_SNPS && line_type&(VCF_SNP|VCF_MNP) )
-        {
-            if ( !has_snp ) has_snp = 1;
-            else line->pos = -1;
-        }
-        if ( files->collapse&COLLAPSE_INDELS && line_type&VCF_INDEL )
-        {
-            if ( !has_indel ) has_indel = 1;
-            else line->pos = -1;
-        }
-    }
-    bcf1_t *tmp;
-    irec = jrec = 1;
-    while ( irec<=reader->nbuffer && jrec<=reader->nbuffer )
-    {
-        if ( reader->buffer[irec]->pos != -1 ) { irec++; continue; }
-        if ( jrec<=irec ) jrec = irec+1;
-        while ( jrec<=reader->nbuffer && reader->buffer[jrec]->pos==-1 ) jrec++;
-        if ( jrec<=reader->nbuffer )
-        {
-            tmp = reader->buffer[irec]; reader->buffer[irec] = reader->buffer[jrec]; reader->buffer[jrec] = tmp;
-        }
-    }
-    reader->nbuffer = irec - 1;
-}
-
-void debug_buffer(FILE *fp, bcf_sr_t *reader)
-{
-    int j;
-    for (j=0; j<=reader->nbuffer; j++)
-    {
-        bcf1_t *line = reader->buffer[j];
-        fprintf(fp,"\t%p\t%s%s\t%s:%d\t%s ", line,reader->fname,j==0?"*":" ",reader->header->id[BCF_DT_CTG][line->rid].key,line->pos+1,line->n_allele?line->d.allele[0]:"");
-        int k;
-        for (k=1; k<line->n_allele; k++) fprintf(fp," %s", line->d.allele[k]);
-        fprintf(fp,"\n");
-    }
-}
-
-void debug_buffers(FILE *fp, bcf_srs_t *files)
-{
-    int i;
-    for (i=0; i<files->nreaders; i++)
-    {
-        fprintf(fp, "has_line: %d\t%s\n", bcf_sr_has_line(files,i),files->readers[i].fname);
-        debug_buffer(fp, &files->readers[i]);
-    }
-    fprintf(fp,"\n");
-}
-
-static inline int has_filter(bcf_sr_t *reader, bcf1_t *line)
-{
-    int i, j;
-    if ( !line->d.n_flt )
-    {
-        for (j=0; j<reader->nfilter_ids; j++)
-            if ( reader->filter_ids[j]<0 ) return 1;
-        return 0;
-    }
-    for (i=0; i<line->d.n_flt; i++)
-    {
-        for (j=0; j<reader->nfilter_ids; j++)
-            if ( line->d.flt[i]==reader->filter_ids[j] ) return 1;
-    }
-    return 0;
-}
-
-static int _reader_seek(bcf_sr_t *reader, const char *seq, int start, int end)
-{
-    if ( end>=MAX_CSI_COOR )
-    {
-        fprintf(stderr,"The coordinate is out of csi index limit: %d\n", end+1);
-        exit(1);
-    }
-    if ( reader->itr )
-    {
-        hts_itr_destroy(reader->itr);
-        reader->itr = NULL;
-    }
-    reader->nbuffer = 0;
-    if ( reader->tbx_idx )
-    {
-        int tid = tbx_name2id(reader->tbx_idx, seq);
-        if ( tid==-1 ) return -1;    // the sequence not present in this file
-        reader->itr = tbx_itr_queryi(reader->tbx_idx,tid,start,end+1);
-    }
-    else
-    {
-        int tid = bcf_hdr_name2id(reader->header, seq);
-        if ( tid==-1 ) return -1;    // the sequence not present in this file
-        reader->itr = bcf_itr_queryi(reader->bcf_idx,tid,start,end+1);
-    }
-    if ( !reader->itr ) fprintf(stderr,"Could not seek: %s:%d-%d\n",seq,start+1,end+1);
-    assert(reader->itr);
-    return 0;
-}
-
-/*
- *  _readers_next_region() - jumps to next region if necessary
- *  Returns 0 on success or -1 when there are no more regions left
- */
-static int _readers_next_region(bcf_srs_t *files)
-{
-    // Need to open new chromosome? Check number of lines in all readers' buffers
-    int i, eos = 0;
-    for (i=0; i<files->nreaders; i++)
-        if ( !files->readers[i].itr && !files->readers[i].nbuffer ) eos++;
-
-    if ( eos!=files->nreaders )
-    {
-        // Some of the readers still has buffered lines
-        return 0;
-    }
-
-    // No lines in the buffer, need to open new region or quit
-    if ( bcf_sr_regions_next(files->regions)<0 ) return -1;
-
-    for (i=0; i<files->nreaders; i++)
-        _reader_seek(&files->readers[i],files->regions->seq_names[files->regions->iseq],files->regions->start,files->regions->end);
-
-    return 0;
-}
-
-/*
- *  _reader_fill_buffer() - buffers all records with the same coordinate
- */
-static void _reader_fill_buffer(bcf_srs_t *files, bcf_sr_t *reader)
-{
-    // Return if the buffer is full: the coordinate of the last buffered record differs
-    if ( reader->nbuffer && reader->buffer[reader->nbuffer]->pos != reader->buffer[1]->pos ) return;
-
-    // No iterator (sequence not present in this file) and not streaming
-    if ( !reader->itr && !files->streaming ) return;
-
-    // Fill the buffer with records starting at the same position
-    int i, ret = 0;
-    while (1)
-    {
-        if ( reader->nbuffer+1 >= reader->mbuffer )
-        {
-            // Increase buffer size
-            reader->mbuffer += 8;
-            reader->buffer = (bcf1_t**) realloc(reader->buffer, sizeof(bcf1_t*)*reader->mbuffer);
-            for (i=8; i>0; i--)     // initialize
-            {
-                reader->buffer[reader->mbuffer-i] = bcf_init1();
-                reader->buffer[reader->mbuffer-i]->max_unpack = files->max_unpack;
-                reader->buffer[reader->mbuffer-i]->pos = -1;    // for rare cases when VCF starts from 1
-            }
-        }
-        if ( files->streaming )
-        {
-            if ( reader->file->format.format==vcf )
-            {
-                if ( (ret=hts_getline(reader->file, KS_SEP_LINE, &files->tmps)) < 0 ) break;   // no more lines
-                int ret = vcf_parse1(&files->tmps, reader->header, reader->buffer[reader->nbuffer+1]);
-                if ( ret<0 ) break;
-            }
-            else if ( reader->file->format.format==bcf )
-            {
-                if ( (ret=bcf_read1(reader->file, reader->header, reader->buffer[reader->nbuffer+1])) < 0 ) break; // no more lines
-            }
-            else
-            {
-                fprintf(stderr,"[%s:%d %s] fixme: not ready for this\n", __FILE__,__LINE__,__FUNCTION__);
-                exit(1);
-            }
-        }
-        else if ( reader->tbx_idx )
-        {
-            if ( (ret=tbx_itr_next(reader->file, reader->tbx_idx, reader->itr, &files->tmps)) < 0 ) break;  // no more lines
-            vcf_parse1(&files->tmps, reader->header, reader->buffer[reader->nbuffer+1]);
-        }
-        else
-        {
-            if ( (ret=bcf_itr_next(reader->file, reader->itr, reader->buffer[reader->nbuffer+1])) < 0 ) break; // no more lines
-            bcf_subset_format(reader->header,reader->buffer[reader->nbuffer+1]);
-        }
-
-        // apply filter
-        if ( !reader->nfilter_ids )
-            bcf_unpack(reader->buffer[reader->nbuffer+1], BCF_UN_STR);
-        else
-        {
-            bcf_unpack(reader->buffer[reader->nbuffer+1], BCF_UN_STR|BCF_UN_FLT);
-            if ( !has_filter(reader, reader->buffer[reader->nbuffer+1]) ) continue;
-        }
-        reader->nbuffer++;
-
-        if ( reader->buffer[reader->nbuffer]->pos != reader->buffer[1]->pos ) break;    // the buffer is full
-    }
-    if ( ret<0 )
-    {
-        // done for this region
-        tbx_itr_destroy(reader->itr);
-        reader->itr = NULL;
-    }
-    if ( files->collapse && reader->nbuffer>=2 && reader->buffer[1]->pos==reader->buffer[2]->pos )
-        collapse_buffer(files, reader);
-}
-
-/*
- *  _readers_shift_buffer() - removes the first line and all subsequent lines with the same position
- */
-static void _reader_shift_buffer(bcf_sr_t *reader)
-{
-    int i;
-    for (i=2; i<=reader->nbuffer; i++)
-        if ( reader->buffer[i]->pos!=reader->buffer[1]->pos ) break;
-    if ( i<=reader->nbuffer )
-    {
-        // A record with a different position follows, swap it. Because of the reader's logic,
-        // only one such line can be present.
-        bcf1_t *tmp = reader->buffer[1]; reader->buffer[1] = reader->buffer[i]; reader->buffer[i] = tmp;
-        reader->nbuffer = 1;
-    }
-    else
-        reader->nbuffer = 0;    // no other line
-}
-
-/*
- *  _reader_match_alleles() - from multiple buffered lines selects the one which
- *  corresponds best to the template line. The logic is controlled by COLLAPSE_*
- *  Returns 0 on success or -1 when no good matching line is found.
- */
-static int _reader_match_alleles(bcf_srs_t *files, bcf_sr_t *reader, bcf1_t *tmpl)
-{
-    int i, irec = -1;
-
-    // if no template given, use the first available record
-    if ( !tmpl )
-        irec = 1;
-    else
-    {
-        int tmpl_type = bcf_get_variant_types(tmpl);
-        for (i=1; i<=reader->nbuffer; i++)
-        {
-            bcf1_t *line = reader->buffer[i];
-            if ( line->pos != reader->buffer[1]->pos ) break;  // done with this reader
-
-            // Easiest case: matching by position only
-            if ( files->collapse&COLLAPSE_ANY ) { irec=i; break; }
-
-            int line_type = bcf_get_variant_types(line);
-
-            // No matter what the alleles are, as long as they are both SNPs
-            if ( files->collapse&COLLAPSE_SNPS && tmpl_type&VCF_SNP && line_type&VCF_SNP ) { irec=i; break; }
-            // ... or indels
-            if ( files->collapse&COLLAPSE_INDELS && tmpl_type&VCF_INDEL && line_type&VCF_INDEL ) { irec=i; break; }
-
-            // More thorough checking: REFs must match
-            if ( tmpl->rlen != line->rlen ) continue;  // different length
-            if ( !tmpl->d.allele || !line->d.allele ) continue;   // one of the lines is empty, someone is swapped buffered lines?!
-            if ( strcmp(tmpl->d.allele[0], line->d.allele[0]) ) continue; // the strings do not match
-
-            int ial,jal;
-            if ( files->collapse==COLLAPSE_NONE )
-            {
-                // Exact match, all alleles must be identical
-                if ( tmpl->n_allele!=line->n_allele ) continue;   // different number of alleles, skip
-
-                int nmatch = 1; // REF has been already checked
-                for (ial=1; ial<tmpl->n_allele; ial++)
-                {
-                    for (jal=1; jal<line->n_allele; jal++)
-                        if ( !strcmp(tmpl->d.allele[ial], line->d.allele[jal]) ) { nmatch++; break; }
-                }
-                if ( nmatch==tmpl->n_allele ) { irec=i; break; }    // found: exact match
-                continue;
-            }
-
-            if ( line->n_allele==1 && tmpl->n_allele==1 ) { irec=i; break; }    // both sites are non-variant
-
-            // COLLAPSE_SOME: at least some ALTs must match
-            for (ial=1; ial<tmpl->n_allele; ial++)
-            {
-                for (jal=1; jal<line->n_allele; jal++)
-                    if ( !strcmp(tmpl->d.allele[ial], line->d.allele[jal]) ) { irec=i; break; }
-                if ( irec>=1 ) break;
-            }
-            if ( irec>=1 ) break;
-        }
-        if ( irec==-1 ) return -1;  // no matching line was found
-    }
-
-    // Set the selected line (irec) as active: set it to buffer[0], move the remaining lines forward
-    // and put the old bcf1_t record at the end.
-    bcf1_t *tmp = reader->buffer[0];
-    reader->buffer[0] = reader->buffer[irec];
-    for (i=irec+1; i<=reader->nbuffer; i++) reader->buffer[i-1] = reader->buffer[i];
-    reader->buffer[ reader->nbuffer ] = tmp;
-    reader->nbuffer--;
-
-    return 0;
-}
-
-int _reader_next_line(bcf_srs_t *files)
-{
-    int i, min_pos = INT_MAX;
-
-    // Loop until next suitable line is found or all readers have finished
-    while ( 1 )
-    {
-        // Get all readers ready for the next region.
-        if ( files->regions && _readers_next_region(files)<0 ) break;
-
-        // Fill buffers
-        const char *chr = NULL;
-        for (i=0; i<files->nreaders; i++)
-        {
-            _reader_fill_buffer(files, &files->readers[i]);
-
-            // Update the minimum coordinate
-            if ( !files->readers[i].nbuffer ) continue;
-            if ( min_pos > files->readers[i].buffer[1]->pos )
-            {
-                min_pos = files->readers[i].buffer[1]->pos;
-                chr = bcf_seqname(files->readers[i].header, files->readers[i].buffer[1]);
-            }
-        }
-        if ( min_pos==INT_MAX )
-        {
-            if ( !files->regions ) break;
-            continue;
-        }
-
-        // Skip this position if not present in targets
-        if ( files->targets )
-        {
-            int ret = bcf_sr_regions_overlap(files->targets, chr, min_pos, min_pos);
-            if ( (!files->targets_exclude && ret<0) || (files->targets_exclude && !ret) )
-            {
-                // Remove all lines with this position from the buffer
-                for (i=0; i<files->nreaders; i++)
-                    if ( files->readers[i].nbuffer && files->readers[i].buffer[1]->pos==min_pos )
-                        _reader_shift_buffer(&files->readers[i]);
-                min_pos = INT_MAX;
-                continue;
-            }
-        }
-
-        break;  // done: min_pos is set
-    }
-
-    // There can be records with duplicate positions. Set the active line intelligently so that
-    // the alleles match.
-    int nret = 0;   // number of readers sharing the position
-    bcf1_t *first = NULL;   // record which will be used for allele matching
-    for (i=0; i<files->nreaders; i++)
-    {
-        files->has_line[i] = 0;
-
-        // Skip readers with no records at this position
-        if ( !files->readers[i].nbuffer || files->readers[i].buffer[1]->pos!=min_pos ) continue;
-
-        // Until now buffer[0] of all reader was empty and the lines started at buffer[1].
-        // Now lines which are ready to be output will be moved to buffer[0].
-        if ( _reader_match_alleles(files, &files->readers[i], first) < 0 ) continue;
-        if ( !first ) first = files->readers[i].buffer[0];
-
-        nret++;
-        files->has_line[i] = 1;
-    }
-    return nret;
-}
-
-int bcf_sr_next_line(bcf_srs_t *files)
-{
-    if ( !files->targets_als )
-        return _reader_next_line(files);
-
-    while (1)
-    {
-        int i, ret = _reader_next_line(files);
-        if ( !ret ) return ret;
-
-        for (i=0; i<files->nreaders; i++)
-            if ( files->has_line[i] ) break;
-
-        if ( _regions_match_alleles(files->targets, files->targets_als-1, files->readers[i].buffer[0]) ) return ret;
-
-        // Check if there are more duplicate lines in the buffers. If not, return this line as if it
-        // matched the targets, even if there is a type mismatch
-        for (i=0; i<files->nreaders; i++)
-        {
-            if ( !files->has_line[i] ) continue;
-            if ( files->readers[i].nbuffer==0 || files->readers[i].buffer[1]->pos!=files->readers[i].buffer[0]->pos ) continue;
-            break;
-        }
-        if ( i==files->nreaders ) return ret;   // no more lines left, output even if target alleles are not of the same type
-    }
-}
-
-static void bcf_sr_seek_start(bcf_srs_t *readers)
-{
-    bcf_sr_regions_t *reg = readers->regions;
-    int i;
-    for (i=0; i<reg->nseqs; i++)
-        reg->regs[i].creg = -1;
-    reg->iseq = 0;
-}
-
-
-int bcf_sr_seek(bcf_srs_t *readers, const char *seq, int pos)
-{
-    if ( !readers->regions ) return 0;
-    if ( !seq && !pos )
-    {
-        // seek to start
-        bcf_sr_seek_start(readers);
-        return 0;
-    }
-    bcf_sr_regions_overlap(readers->regions, seq, pos, pos);
-    int i, nret = 0;
-    for (i=0; i<readers->nreaders; i++)
-    {
-        nret += _reader_seek(&readers->readers[i],seq,pos,MAX_CSI_COOR-1);
-    }
-    return nret;
-}
-
-int bcf_sr_set_samples(bcf_srs_t *files, const char *fname, int is_file)
-{
-    int i, j, nsmpl, free_smpl = 0;
-    char **smpl = NULL;
-
-    void *exclude = (fname[0]=='^') ? khash_str2int_init() : NULL;
-    if ( exclude || strcmp("-",fname) ) // "-" stands for all samples
-    {
-        smpl = hts_readlist(fname, is_file, &nsmpl);
-        if ( !smpl )
-        {
-            fprintf(stderr,"Could not read the file: \"%s\"\n", fname);
-            return 0;
-        }
-        if ( exclude )
-        {
-            for (i=0; i<nsmpl; i++)
-                khash_str2int_inc(exclude, smpl[i]);
-        }
-        free_smpl = 1;
-    }
-    if ( !smpl )
-    {
-        smpl  = files->readers[0].header->samples;   // intersection of all samples
-        nsmpl = bcf_hdr_nsamples(files->readers[0].header);
-    }
-
-    files->samples = NULL;
-    files->n_smpl  = 0;
-    for (i=0; i<nsmpl; i++)
-    {
-        if ( exclude && khash_str2int_has_key(exclude,smpl[i])  ) continue;
-
-        int n_isec = 0;
-        for (j=0; j<files->nreaders; j++)
-        {
-            if ( bcf_hdr_id2int(files->readers[j].header, BCF_DT_SAMPLE, smpl[i])<0 ) break;
-            n_isec++;
-        }
-        if ( n_isec!=files->nreaders )
-        {
-            fprintf(stderr,"Warning: The sample \"%s\" was not found in %s, skipping\n", smpl[i], files->readers[n_isec].fname);
-            continue;
-        }
-
-        files->samples = (char**) realloc(files->samples, (files->n_smpl+1)*sizeof(const char*));
-        files->samples[files->n_smpl++] = strdup(smpl[i]);
-    }
-
-    if ( exclude ) khash_str2int_destroy(exclude);
-    if ( free_smpl )
-    {
-        for (i=0; i<nsmpl; i++) free(smpl[i]);
-        free(smpl);
-    }
-
-    if ( !files->n_smpl )
-    {
-        if ( files->nreaders>1 )
-            fprintf(stderr,"No samples in common.\n");
-        return 0;
-    }
-    for (i=0; i<files->nreaders; i++)
-    {
-        bcf_sr_t *reader = &files->readers[i];
-        reader->samples  = (int*) malloc(sizeof(int)*files->n_smpl);
-        reader->n_smpl   = files->n_smpl;
-        for (j=0; j<files->n_smpl; j++)
-            reader->samples[j] = bcf_hdr_id2int(reader->header, BCF_DT_SAMPLE, files->samples[j]);
-    }
-    return 1;
-}
-
-// Add a new region into a list sorted by start,end. On input the coordinates
-// are 1-based, stored 0-based, inclusive.
-static void _regions_add(bcf_sr_regions_t *reg, const char *chr, int start, int end)
-{
-    if ( start==-1 && end==-1 )
-    {
-        start = 0; end = MAX_CSI_COOR-1;
-    }
-    else
-    {
-        start--; end--; // store 0-based coordinates
-    }
-
-    if ( !reg->seq_hash )
-         reg->seq_hash = khash_str2int_init();
-
-    int iseq;
-    if ( khash_str2int_get(reg->seq_hash, chr, &iseq)<0 )
-    {
-        // the chromosome block does not exist
-        iseq = reg->nseqs++;
-        reg->seq_names = (char**) realloc(reg->seq_names,sizeof(char*)*reg->nseqs);
-        reg->regs = (region_t*) realloc(reg->regs,sizeof(region_t)*reg->nseqs);
-        memset(&reg->regs[reg->nseqs-1],0,sizeof(region_t));
-        reg->seq_names[iseq] = strdup(chr);
-        reg->regs[iseq].creg = -1;
-        khash_str2int_set(reg->seq_hash,reg->seq_names[iseq],iseq);
-    }
-
-    region_t *creg = &reg->regs[iseq];
-
-    // the regions may not be sorted on input: binary search
-    int i, min = 0, max = creg->nregs - 1;
-    while ( min<=max )
-    {
-        i = (max+min)/2;
-        if ( start < creg->regs[i].start ) max = i - 1;
-        else if ( start > creg->regs[i].start ) min = i + 1;
-        else break;
-    }
-    if ( min>max || creg->regs[i].start!=start || creg->regs[i].end!=end )
-    {
-        // no such region, insert a new one just after max
-        hts_expand(region1_t,creg->nregs+1,creg->mregs,creg->regs);
-        if ( ++max < creg->nregs )
-            memmove(&creg->regs[max+1],&creg->regs[max],(creg->nregs - max)*sizeof(region1_t));
-        creg->regs[max].start = start;
-        creg->regs[max].end   = end;
-        creg->nregs++;
-    }
-}
-
-// File name or a list of genomic locations. If file name, NULL is returned.
-static bcf_sr_regions_t *_regions_init_string(const char *str)
-{
-    bcf_sr_regions_t *reg = (bcf_sr_regions_t *) calloc(1, sizeof(bcf_sr_regions_t));
-    reg->start = reg->end = -1;
-    reg->prev_start = reg->prev_seq = -1;
-
-    kstring_t tmp = {0,0,0};
-    const char *sp = str, *ep = str;
-    int from, to;
-    while ( 1 )
-    {
-        while ( *ep && *ep!=',' && *ep!=':' ) ep++;
-        tmp.l = 0;
-        kputsn(sp,ep-sp,&tmp);
-        if ( *ep==':' )
-        {
-            sp = ep+1;
-            from = hts_parse_decimal(sp,(char**)&ep,0);
-            if ( sp==ep )
-            {
-                fprintf(stderr,"[%s:%d %s] Could not parse the region(s): %s\n", __FILE__,__LINE__,__FUNCTION__,str);
-                free(reg); free(tmp.s); return NULL;
-            }
-            if ( !*ep || *ep==',' )
-            {
-                _regions_add(reg, tmp.s, from, from);
-                sp = ep;
-                continue;
-            }
-            if ( *ep!='-' )
-            {
-                fprintf(stderr,"[%s:%d %s] Could not parse the region(s): %s\n", __FILE__,__LINE__,__FUNCTION__,str);
-                free(reg); free(tmp.s); return NULL;
-            }
-            ep++;
-            sp = ep;
-            to = hts_parse_decimal(sp,(char**)&ep,0);
-            if ( *ep && *ep!=',' )
-            {
-                fprintf(stderr,"[%s:%d %s] Could not parse the region(s): %s\n", __FILE__,__LINE__,__FUNCTION__,str);
-                free(reg); free(tmp.s); return NULL;
-            }
-            if ( sp==ep ) to = MAX_CSI_COOR-1;
-            _regions_add(reg, tmp.s, from, to);
-            if ( !*ep ) break;
-            sp = ep;
-        }
-        else
-        {
-            if ( tmp.l ) _regions_add(reg, tmp.s, -1, -1);
-            if ( !*ep ) break;
-            sp = ++ep;
-        }
-    }
-    free(tmp.s);
-    return reg;
-}
-
-// ichr,ifrom,ito are 0-based;
-// returns -1 on error, 0 if the line is a comment line, 1 on success
-static int _regions_parse_line(char *line, int ichr,int ifrom,int ito, char **chr,char **chr_end,int *from,int *to)
-{
-    *chr_end = NULL;
-
-    if ( line[0]=='#' ) return 0;
-
-    int k,l;    // index of the start and end column of the tab-delimited file
-    if ( ifrom <= ito )
-        k = ifrom, l = ito;
-    else
-        l = ifrom, k = ito;
-
-    int i;
-    char *se = line, *ss = NULL; // start and end
-    char *tmp;
-    for (i=0; i<=k && *se; i++)
-    {
-        ss = i==0 ? se++ : ++se;
-        while (*se && *se!='\t') se++;
-    }
-    if ( i<=k ) return -1;
-    if ( k==l )
-    {
-        *from = *to = hts_parse_decimal(ss, &tmp, 0);
-        if ( tmp==ss ) return -1;
-    }
-    else
-    {
-        if ( k==ifrom )
-            *from = hts_parse_decimal(ss, &tmp, 0);
-        else
-            *to = hts_parse_decimal(ss, &tmp, 0);
-        if ( ss==tmp ) return -1;
-
-        for (i=k; i<l && *se; i++)
-        {
-            ss = ++se;
-            while (*se && *se!='\t') se++;
-        }
-        if ( i<l ) return -1;
-        if ( k==ifrom )
-            *to = hts_parse_decimal(ss, &tmp, 0);
-        else
-            *from = hts_parse_decimal(ss, &tmp, 0);
-        if ( ss==tmp ) return -1;
-    }
-
-    ss = se = line;
-    for (i=0; i<=ichr && *se; i++)
-    {
-        if ( i>0 ) ss = ++se;
-        while (*se && *se!='\t') se++;
-    }
-    if ( i<=ichr ) return -1;
-    *chr_end = se;
-    *chr = ss;
-    return 1;
-}
-
-bcf_sr_regions_t *bcf_sr_regions_init(const char *regions, int is_file, int ichr, int ifrom, int ito)
-{
-    bcf_sr_regions_t *reg;
-    if ( !is_file ) return _regions_init_string(regions);
-
-    reg = (bcf_sr_regions_t *) calloc(1, sizeof(bcf_sr_regions_t));
-    reg->start = reg->end = -1;
-    reg->prev_start = reg->prev_seq = -1;
-
-    reg->file = hts_open(regions, "rb");
-    if ( !reg->file )
-    {
-        fprintf(stderr,"[%s:%d %s] Could not open file: %s\n", __FILE__,__LINE__,__FUNCTION__,regions);
-        free(reg);
-        return NULL;
-    }
-
-    reg->tbx = tbx_index_load(regions);
-    if ( !reg->tbx )
-    {
-        int len = strlen(regions);
-        int is_bed  = strcasecmp(".bed",regions+len-4) ? 0 : 1;
-        if ( !is_bed && !strcasecmp(".bed.gz",regions+len-7) ) is_bed = 1;
-
-        if ( reg->file->format.format==vcf ) ito = 1;
-
-        // read the whole file, tabix index is not present
-        while ( hts_getline(reg->file, KS_SEP_LINE, &reg->line) > 0 )
-        {
-            char *chr, *chr_end;
-            int from, to, ret;
-            ret = _regions_parse_line(reg->line.s, ichr,ifrom,abs(ito), &chr,&chr_end,&from,&to);
-            if ( ret < 0 )
-            {
-                if ( ito<0 )
-                    ret = _regions_parse_line(reg->line.s, ichr,ifrom,ifrom, &chr,&chr_end,&from,&to);
-                if ( ret<0 )
-                {
-                    fprintf(stderr,"[%s:%d] Could not parse the file %s, using the columns %d,%d[,%d]\n", __FILE__,__LINE__,regions,ichr+1,ifrom+1,ito+1);
-                    hts_close(reg->file); reg->file = NULL; free(reg);
-                    return NULL;
-                }
-            }
-            if ( !ret ) continue;
-            if ( is_bed ) from++;
-            *chr_end = 0;
-            _regions_add(reg, chr, from, to);
-            *chr_end = '\t';
-        }
-        hts_close(reg->file); reg->file = NULL;
-        if ( !reg->nseqs ) { free(reg); return NULL; }
-        return reg;
-    }
-
-    reg->seq_names = (char**) tbx_seqnames(reg->tbx, &reg->nseqs);
-    if ( !reg->seq_hash )
-        reg->seq_hash = khash_str2int_init();
-    int i;
-    for (i=0; i<reg->nseqs; i++)
-    {
-        khash_str2int_set(reg->seq_hash,reg->seq_names[i],i);
-    }
-    reg->fname  = strdup(regions);
-    reg->is_bin = 1;
-    return reg;
-}
-
-void bcf_sr_regions_destroy(bcf_sr_regions_t *reg)
-{
-    int i;
-    free(reg->fname);
-    if ( reg->itr ) tbx_itr_destroy(reg->itr);
-    if ( reg->tbx ) tbx_destroy(reg->tbx);
-    if ( reg->file ) hts_close(reg->file);
-    if ( reg->als ) free(reg->als);
-    if ( reg->als_str.s ) free(reg->als_str.s);
-    free(reg->line.s);
-    if ( reg->regs )
-    {
-         // free only in-memory names, tbx names are const
-        for (i=0; i<reg->nseqs; i++)
-        {
-            free(reg->seq_names[i]);
-            free(reg->regs[i].regs);
-        }
-    }
-    free(reg->regs);
-    free(reg->seq_names);
-    khash_str2int_destroy(reg->seq_hash);
-    free(reg);
-}
-
-int bcf_sr_regions_seek(bcf_sr_regions_t *reg, const char *seq)
-{
-    reg->iseq = reg->start = reg->end = -1;
-    if ( khash_str2int_get(reg->seq_hash, seq, &reg->iseq) < 0 ) return -1;  // sequence seq not in regions
-
-    // using in-memory regions
-    if ( reg->regs )
-    {
-        reg->regs[reg->iseq].creg = -1;
-        return 0;
-    }
-
-    // reading regions from tabix
-    if ( reg->itr ) tbx_itr_destroy(reg->itr);
-    reg->itr = tbx_itr_querys(reg->tbx, seq);
-    if ( reg->itr ) return 0;
-
-    return -1;
-}
-
-int bcf_sr_regions_next(bcf_sr_regions_t *reg)
-{
-    if ( reg->iseq<0 ) return -1;
-    reg->start = reg->end = -1;
-    reg->nals = 0;
-
-    // using in-memory regions
-    if ( reg->regs )
-    {
-        while ( reg->iseq < reg->nseqs )
-        {
-            reg->regs[reg->iseq].creg++;
-            if ( reg->regs[reg->iseq].creg < reg->regs[reg->iseq].nregs ) break;
-            reg->iseq++;
-        }
-        if ( reg->iseq >= reg->nseqs ) { reg->iseq = -1; return -1; } // no more regions left
-        region1_t *creg = &reg->regs[reg->iseq].regs[reg->regs[reg->iseq].creg];
-        reg->start = creg->start;
-        reg->end   = creg->end;
-        return 0;
-    }
-
-    // reading from tabix
-    char *chr, *chr_end;
-    int ichr = 0, ifrom = 1, ito = 2, is_bed = 0, from, to;
-    if ( reg->tbx )
-    {
-        ichr   = reg->tbx->conf.sc-1;
-        ifrom  = reg->tbx->conf.bc-1;
-        ito    = reg->tbx->conf.ec-1;
-        if ( ito<0 ) ito = ifrom;
-        is_bed = reg->tbx->conf.preset==TBX_UCSC ? 1 : 0;
-    }
-
-    int ret = 0;
-    while ( !ret )
-    {
-        if ( reg->itr )
-        {
-            // tabix index present, reading a chromosome block
-            ret = tbx_itr_next(reg->file, reg->tbx, reg->itr, &reg->line);
-            if ( ret<0 ) { reg->iseq = -1; return -1; }
-        }
-        else
-        {
-            if ( reg->is_bin )
-            {
-                // Waited for seek which never came. Reopen in text mode and stream
-                // through the regions, otherwise hts_getline would fail
-                hts_close(reg->file);
-                reg->file = hts_open(reg->fname, "r");
-                if ( !reg->file )
-                {
-                    fprintf(stderr,"[%s:%d %s] Could not open file: %s\n", __FILE__,__LINE__,__FUNCTION__,reg->fname);
-                    reg->file = NULL;
-                    bcf_sr_regions_destroy(reg);
-                    return -1;
-                }
-                reg->is_bin = 0;
-            }
-
-            // tabix index absent, reading the whole file
-            ret = hts_getline(reg->file, KS_SEP_LINE, &reg->line);
-            if ( ret<0 ) { reg->iseq = -1; return -1; }
-        }
-        ret = _regions_parse_line(reg->line.s, ichr,ifrom,ito, &chr,&chr_end,&from,&to);
-        if ( ret<0 )
-        {
-            fprintf(stderr,"[%s:%d] Could not parse the file %s, using the columns %d,%d,%d\n", __FILE__,__LINE__,reg->fname,ichr+1,ifrom+1,ito+1);
-            return -1;
-        }
-    }
-    if ( is_bed ) from++;
-
-    *chr_end = 0;
-    if ( khash_str2int_get(reg->seq_hash, chr, &reg->iseq)<0 )
-    {
-        fprintf(stderr,"Broken tabix index? The sequence \"%s\" not in dictionary [%s]\n", chr,reg->line.s);
-        exit(1);
-    }
-    *chr_end = '\t';
-
-    reg->start = from - 1;
-    reg->end   = to - 1;
-    return 0;
-}
-
-static int _regions_match_alleles(bcf_sr_regions_t *reg, int als_idx, bcf1_t *rec)
-{
-    if ( reg->regs )
-    {
-        // payload is not supported for in-memory regions, switch to regidx instead in future
-        fprintf(stderr,"Error: Compressed and indexed targets file is required\n");
-        exit(1);
-    }
-
-    int i = 0, max_len = 0;
-    if ( !reg->nals )
-    {
-        char *ss = reg->line.s;
-        while ( i<als_idx && *ss )
-        {
-            if ( *ss=='\t' ) i++;
-            ss++;
-        }
-        char *se = ss;
-        reg->nals = 1;
-        while ( *se && *se!='\t' )
-        {
-            if ( *se==',' ) reg->nals++;
-            se++;
-        }
-        ks_resize(&reg->als_str, se-ss+1+reg->nals);
-        reg->als_str.l = 0;
-        hts_expand(char*,reg->nals,reg->mals,reg->als);
-        reg->nals = 0;
-
-        se = ss;
-        while ( *(++se) )
-        {
-            if ( *se=='\t' ) break;
-            if ( *se!=',' ) continue;
-            reg->als[reg->nals] = &reg->als_str.s[reg->als_str.l];
-            kputsn(ss,se-ss,&reg->als_str);
-            if ( &reg->als_str.s[reg->als_str.l] - reg->als[reg->nals] > max_len ) max_len = &reg->als_str.s[reg->als_str.l] - reg->als[reg->nals];
-            reg->als_str.l++;
-            reg->nals++;
-            ss = ++se;
-        }
-        reg->als[reg->nals] = &reg->als_str.s[reg->als_str.l];
-        kputsn(ss,se-ss,&reg->als_str);
-        if ( &reg->als_str.s[reg->als_str.l] - reg->als[reg->nals] > max_len ) max_len = &reg->als_str.s[reg->als_str.l] - reg->als[reg->nals];
-        reg->nals++;
-        reg->als_type = max_len > 1 ? VCF_INDEL : VCF_SNP;  // this is a simplified check, see vcf.c:bcf_set_variant_types
-    }
-    int type = bcf_get_variant_types(rec);
-    if ( reg->als_type & VCF_INDEL )
-        return type & VCF_INDEL ? 1 : 0;
-    return !(type & VCF_INDEL) ? 1 : 0;
-}
-
-int bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, int start, int end)
-{
-    int iseq;
-    if ( khash_str2int_get(reg->seq_hash, seq, &iseq)<0 ) return -1;    // no such sequence
-
-    if ( reg->prev_seq==-1 || iseq!=reg->prev_seq || reg->prev_start > start ) // new chromosome or after a seek
-    {
-        // flush regions left on previous chromosome
-        if ( reg->missed_reg_handler && reg->prev_seq!=-1 && reg->iseq!=-1 )
-            bcf_sr_regions_flush(reg);
-
-        bcf_sr_regions_seek(reg, seq);
-        reg->start = reg->end = -1;
-    }
-    if ( reg->prev_seq==iseq && reg->iseq!=iseq ) return -2;    // no more regions on this chromosome
-    reg->prev_seq = reg->iseq;
-    reg->prev_start = start;
-
-    while ( iseq==reg->iseq && reg->end < start )
-    {
-        if ( bcf_sr_regions_next(reg) < 0 ) return -2;  // no more regions left
-        if ( reg->iseq != iseq ) return -1; // does not overlap any regions
-        if ( reg->missed_reg_handler && reg->end < start ) reg->missed_reg_handler(reg, reg->missed_reg_data);
-    }
-    if ( reg->start <= end ) return 0;    // region overlap
-    return -1;  // no overlap
-}
-
-void bcf_sr_regions_flush(bcf_sr_regions_t *reg)
-{
-    if ( !reg->missed_reg_handler || reg->prev_seq==-1 ) return;
-    while ( !bcf_sr_regions_next(reg) ) reg->missed_reg_handler(reg, reg->missed_reg_data);
-    return;
-}
-