7df6e18265341f87a69fba808aa1f92f8ebca841
markd
  Wed Apr 15 13:39:42 2026 -0700
move copy of htslib

diff --git src/htslib/cram/cram_io.c src/htslib/cram/cram_io.c
deleted file mode 100644
index 0328b6e9470..00000000000
--- src/htslib/cram/cram_io.c
+++ /dev/null
@@ -1,4599 +0,0 @@
-/*
-Copyright (c) 2012-2014 Genome Research Ltd.
-Author: James Bonfield <jkb@sanger.ac.uk>
-
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met:
-
-   1. Redistributions of source code must retain the above copyright notice, 
-this list of conditions and the following disclaimer.
-
-   2. Redistributions in binary form must reproduce the above copyright notice, 
-this list of conditions and the following disclaimer in the documentation 
-and/or other materials provided with the distribution.
-
-   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
-Institute nor the names of its contributors may be used to endorse or promote
-products derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND 
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
-DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/*
- * CRAM I/O primitives.
- *
- * - ITF8 encoding and decoding.
- * - Block based I/O
- * - Zlib inflating and deflating (memory)
- * - CRAM basic data structure reading and writing
- * - File opening / closing
- * - Reference sequence handling
- */
-
-/*
- * TODO: BLOCK_GROW, BLOCK_RESIZE, BLOCK_APPEND and itf8_put_blk all need
- * a way to return errors for when malloc fails.
- */
-
-#include <config.h>
-
-#include <stdio.h>
-#include <errno.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <zlib.h>
-#ifdef HAVE_LIBBZ2
-#include <bzlib.h>
-#endif
-#ifdef HAVE_LIBLZMA
-#include <lzma.h>
-#endif
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <math.h>
-#include <ctype.h>
-
-#include "cram/cram.h"
-#include "cram/os.h"
-#include "htslib/hts.h"
-#include "cram/open_trace_file.h"
-#include "cram/rANS_static.h"
-
-//#define REF_DEBUG
-
-#ifdef REF_DEBUG
-#include <sys/syscall.h>
-#define gettid() (int)syscall(SYS_gettid)
-
-#define RP(...) fprintf (stderr, __VA_ARGS__)
-#else
-#define RP(...) 
-#endif
-
-#include "htslib/hfile.h"
-#include "htslib/bgzf.h"
-#include "htslib/faidx.h"
-
-#define TRIAL_SPAN 50
-#define NTRIALS 3
-
-
-/* ----------------------------------------------------------------------
- * ITF8 encoding and decoding.
- *
-* Also see the itf8_get and itf8_put macros in cram_io.h
- */
-
-/*
- * LEGACY: consider using itf8_decode_crc.
- *
- * Reads an integer in ITF-8 encoding from 'cp' and stores it in
- * *val.
- *
- * Returns the number of bytes read on success
- *        -1 on failure
- */
-int itf8_decode(cram_fd *fd, int32_t *val_p) {
-    static int nbytes[16] = {
-	0,0,0,0, 0,0,0,0,                               // 0000xxxx - 0111xxxx
-	1,1,1,1,                                        // 1000xxxx - 1011xxxx
-	2,2,                                            // 1100xxxx - 1101xxxx
-	3,                                              // 1110xxxx
-	4,                                              // 1111xxxx
-    };
-
-    static int nbits[16] = {
-	0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // 0000xxxx - 0111xxxx
-	0x3f, 0x3f, 0x3f, 0x3f,                         // 1000xxxx - 1011xxxx
-	0x1f, 0x1f,                                     // 1100xxxx - 1101xxxx
-	0x0f,                                           // 1110xxxx
-	0x0f,                                           // 1111xxxx
-    };
-
-    int32_t val = hgetc(fd->fp);
-    if (val == -1)
-	return -1;
-
-    int i = nbytes[val>>4];
-    val &= nbits[val>>4];
-
-    switch(i) {
-    case 0:
-	*val_p = val;
-	return 1;
-
-    case 1:
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	*val_p = val;
-	return 2;
-
-    case 2:
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	*val_p = val;
-	return 3;
-
-    case 3:
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	*val_p = val;
-	return 4;
-
-    case 4: // really 3.5 more, why make it different?
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<4) | (((unsigned char)hgetc(fd->fp)) & 0x0f);
-	*val_p = val;
-    }
-
-    return 5;
-}
-
-int itf8_decode_crc(cram_fd *fd, int32_t *val_p, uint32_t *crc) {
-    static int nbytes[16] = {
-	0,0,0,0, 0,0,0,0,                               // 0000xxxx - 0111xxxx
-	1,1,1,1,                                        // 1000xxxx - 1011xxxx
-	2,2,                                            // 1100xxxx - 1101xxxx
-	3,                                              // 1110xxxx
-	4,                                              // 1111xxxx
-    };
-
-    static int nbits[16] = {
-	0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // 0000xxxx - 0111xxxx
-	0x3f, 0x3f, 0x3f, 0x3f,                         // 1000xxxx - 1011xxxx
-	0x1f, 0x1f,                                     // 1100xxxx - 1101xxxx
-	0x0f,                                           // 1110xxxx
-	0x0f,                                           // 1111xxxx
-    };
-    unsigned char c[5];
-
-    int32_t val = hgetc(fd->fp);
-    if (val == -1)
-	return -1;
-
-    c[0]=val;
-
-    int i = nbytes[val>>4];
-    val &= nbits[val>>4];
-
-    switch(i) {
-    case 0:
-	*val_p = val;
-	*crc = crc32(*crc, c, 1);
-	return 1;
-
-    case 1:
-	val = (val<<8) | (c[1]=hgetc(fd->fp));
-	*val_p = val;
-	*crc = crc32(*crc, c, 2);
-	return 2;
-
-    case 2:
-	val = (val<<8) | (c[1]=hgetc(fd->fp));
-	val = (val<<8) | (c[2]=hgetc(fd->fp));
-	*val_p = val;
-	*crc = crc32(*crc, c, 3);
-	return 3;
-
-    case 3:
-	val = (val<<8) | (c[1]=hgetc(fd->fp));
-	val = (val<<8) | (c[2]=hgetc(fd->fp));
-	val = (val<<8) | (c[3]=hgetc(fd->fp));
-	*val_p = val;
-	*crc = crc32(*crc, c, 4);
-	return 4;
-
-    case 4: // really 3.5 more, why make it different?
-	val = (val<<8) |   (c[1]=hgetc(fd->fp));
-	val = (val<<8) |   (c[2]=hgetc(fd->fp));
-	val = (val<<8) |   (c[3]=hgetc(fd->fp));
-	val = (val<<4) | (((c[4]=hgetc(fd->fp))) & 0x0f);
-	*val_p = val;
-	*crc = crc32(*crc, c, 5);
-    }
-
-    return 5;
-}
-
-/*
- * Encodes and writes a single integer in ITF-8 format.
- * Returns 0 on success
- *        -1 on failure
- */
-int itf8_encode(cram_fd *fd, int32_t val) {
-    char buf[5];
-    int len = itf8_put(buf, val);
-    return hwrite(fd->fp, buf, len) == len ? 0 : -1;
-}
- 
-const int itf8_bytes[16] = {
-    1, 1, 1, 1, 1, 1, 1, 1,
-    2, 2, 2, 2, 3, 3, 4, 5
-};
-
-#ifndef ITF8_MACROS
-/*
- * As above, but decoding from memory
- */
-int itf8_get(char *cp, int32_t *val_p) {
-    unsigned char *up = (unsigned char *)cp;
-    
-    if (up[0] < 0x80) {
-	*val_p =   up[0];
-	return 1;
-    } else if (up[0] < 0xc0) {
-	*val_p = ((up[0] <<8) |  up[1])                           & 0x3fff;
-	return 2;
-    } else if (up[0] < 0xe0) {
-	*val_p = ((up[0]<<16) | (up[1]<< 8) |  up[2])             & 0x1fffff;
-	return 3;
-    } else if (up[0] < 0xf0) {
-	*val_p = ((up[0]<<24) | (up[1]<<16) | (up[2]<<8) | up[3]) & 0x0fffffff;
-	return 4;
-    } else {
-	*val_p = ((up[0] & 0x0f)<<28) | (up[1]<<20) | (up[2]<<12) | (up[3]<<4) | (up[4] & 0x0f);
-	return 5;
-    }
-}
-
-/*
- * Stores a value to memory in ITF-8 format.
- *
- * Returns the number of bytes required to store the number.
- * This is a maximum of 5 bytes.
- */
-int itf8_put(char *cp, int32_t val) {
-    if        (!(val & ~0x00000007f)) { // 1 byte
-	*cp = val;
-	return 1;
-    } else if (!(val & ~0x00003fff)) { // 2 byte
-	*cp++ = (val >> 8 ) | 0x80;
-	*cp   = val & 0xff;
-	return 2;
-    } else if (!(val & ~0x01fffff)) { // 3 byte
-	*cp++ = (val >> 16) | 0xc0;
-	*cp++ = (val >> 8 ) & 0xff;
-	*cp   = val & 0xff;
-	return 3;
-    } else if (!(val & ~0x0fffffff)) { // 4 byte
-	*cp++ = (val >> 24) | 0xe0;
-	*cp++ = (val >> 16) & 0xff;
-	*cp++ = (val >> 8 ) & 0xff;
-	*cp   = val & 0xff;
-	return 4;
-    } else {                           // 5 byte
-	*cp++ = 0xf0 | ((val>>28) & 0xff);
-	*cp++ = (val >> 20) & 0xff;
-	*cp++ = (val >> 12) & 0xff;
-	*cp++ = (val >> 4 ) & 0xff;
-	*cp = val & 0x0f;
-	return 5;
-    }
-}
-#endif
-
-/* 64-bit itf8 variant */
-int ltf8_put(char *cp, int64_t val) {
-    if        (!(val & ~((1LL<<7)-1))) {
-	*cp = val;
-	return 1;
-    } else if (!(val & ~((1LL<<(6+8))-1))) {
-	*cp++ = (val >> 8 ) | 0x80;
-	*cp   = val & 0xff;
-	return 2;
-    } else if (!(val & ~((1LL<<(5+2*8))-1))) {
-	*cp++ = (val >> 16) | 0xc0;
-	*cp++ = (val >> 8 ) & 0xff;
-	*cp   = val & 0xff;
-	return 3;
-    } else if (!(val & ~((1LL<<(4+3*8))-1))) {
-	*cp++ = (val >> 24) | 0xe0;
-	*cp++ = (val >> 16) & 0xff;
-	*cp++ = (val >> 8 ) & 0xff;
-	*cp   = val & 0xff;
-	return 4;
-    } else if (!(val & ~((1LL<<(3+4*8))-1))) {
-	*cp++ = (val >> 32) | 0xf0;
-	*cp++ = (val >> 24) & 0xff;
-	*cp++ = (val >> 16) & 0xff;
-	*cp++ = (val >> 8 ) & 0xff;
-	*cp   = val & 0xff;
-	return 5;
-    } else if (!(val & ~((1LL<<(2+5*8))-1))) {
-	*cp++ = (val >> 40) | 0xf8;
-	*cp++ = (val >> 32) & 0xff;
-	*cp++ = (val >> 24) & 0xff;
-	*cp++ = (val >> 16) & 0xff;
-	*cp++ = (val >> 8 ) & 0xff;
-	*cp   = val & 0xff;
-	return 6;
-    } else if (!(val & ~((1LL<<(1+6*8))-1))) {
-	*cp++ = (val >> 48) | 0xfc;
-	*cp++ = (val >> 40) & 0xff;
-	*cp++ = (val >> 32) & 0xff;
-	*cp++ = (val >> 24) & 0xff;
-	*cp++ = (val >> 16) & 0xff;
-	*cp++ = (val >> 8 ) & 0xff;
-	*cp   = val & 0xff;
-	return 7;
-    } else if (!(val & ~((1LL<<(7*8))-1))) {
-	*cp++ = (val >> 56) | 0xfe;
-	*cp++ = (val >> 48) & 0xff;
-	*cp++ = (val >> 40) & 0xff;
-	*cp++ = (val >> 32) & 0xff;
-	*cp++ = (val >> 24) & 0xff;
-	*cp++ = (val >> 16) & 0xff;
-	*cp++ = (val >> 8 ) & 0xff;
-	*cp   = val & 0xff;
-	return 8;
-    } else {
-	*cp++ = 0xff;
-	*cp++ = (val >> 56) & 0xff;
-	*cp++ = (val >> 48) & 0xff;
-	*cp++ = (val >> 40) & 0xff;
-	*cp++ = (val >> 32) & 0xff;
-	*cp++ = (val >> 24) & 0xff;
-	*cp++ = (val >> 16) & 0xff;
-	*cp++ = (val >> 8 ) & 0xff;
-	*cp   = val & 0xff;
-	return 9;
-    }
-}
-
-int ltf8_get(char *cp, int64_t *val_p) {
-    unsigned char *up = (unsigned char *)cp;
-    
-    if (up[0] < 0x80) {
-	*val_p =   up[0];
-	return 1;
-    } else if (up[0] < 0xc0) {
-	*val_p = (((uint64_t)up[0]<< 8) |
-		   (uint64_t)up[1]) & (((1LL<<(6+8)))-1);
-	return 2;
-    } else if (up[0] < 0xe0) {
-	*val_p = (((uint64_t)up[0]<<16) |
-		  ((uint64_t)up[1]<< 8) |
-		   (uint64_t)up[2]) & ((1LL<<(5+2*8))-1);
-	return 3;
-    } else if (up[0] < 0xf0) {
-	*val_p = (((uint64_t)up[0]<<24) |
-		  ((uint64_t)up[1]<<16) |
-		  ((uint64_t)up[2]<< 8) |
-		   (uint64_t)up[3]) & ((1LL<<(4+3*8))-1);
-	return 4;
-    } else if (up[0] < 0xf8) {
-	*val_p = (((uint64_t)up[0]<<32) |
-		  ((uint64_t)up[1]<<24) |
-		  ((uint64_t)up[2]<<16) |
-		  ((uint64_t)up[3]<< 8) |
-		   (uint64_t)up[4]) & ((1LL<<(3+4*8))-1);
-	return 5;
-    } else if (up[0] < 0xfc) {
-	*val_p = (((uint64_t)up[0]<<40) |
-		  ((uint64_t)up[1]<<32) |
-		  ((uint64_t)up[2]<<24) |
-		  ((uint64_t)up[3]<<16) |
-		  ((uint64_t)up[4]<< 8) |
-		   (uint64_t)up[5]) & ((1LL<<(2+5*8))-1);
-	return 6;
-    } else if (up[0] < 0xfe) {
-	*val_p = (((uint64_t)up[0]<<48) |
-		  ((uint64_t)up[1]<<40) |
-		  ((uint64_t)up[2]<<32) |
-		  ((uint64_t)up[3]<<24) |
-		  ((uint64_t)up[4]<<16) |
-		  ((uint64_t)up[5]<< 8) |
-		   (uint64_t)up[6]) & ((1LL<<(1+6*8))-1);
-	return 7;
-    } else if (up[0] < 0xff) {
-	*val_p = (((uint64_t)up[1]<<48) |
-		  ((uint64_t)up[2]<<40) |
-		  ((uint64_t)up[3]<<32) |
-		  ((uint64_t)up[4]<<24) |
-		  ((uint64_t)up[5]<<16) |
-		  ((uint64_t)up[6]<< 8) |
-		   (uint64_t)up[7]) & ((1LL<<(7*8))-1);
-	return 8;
-    } else {
-	*val_p = (((uint64_t)up[1]<<56) |
-		  ((uint64_t)up[2]<<48) |
-		  ((uint64_t)up[3]<<40) |
-		  ((uint64_t)up[4]<<32) |
-		  ((uint64_t)up[5]<<24) |
-		  ((uint64_t)up[6]<<16) |
-		  ((uint64_t)up[7]<< 8) |
-		   (uint64_t)up[8]);
-	return 9;
-    }
-}
-
-/*
- * LEGACY: consider using ltf8_decode_crc.
- */
-int ltf8_decode(cram_fd *fd, int64_t *val_p) {
-    int c = hgetc(fd->fp);
-    int64_t val = (unsigned char)c;
-    if (c == -1)
-	return -1;
-
-    if (val < 0x80) {
-	*val_p =   val;
-	return 1;
-
-    } else if (val < 0xc0) {
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	*val_p = val & (((1LL<<(6+8)))-1);
-	return 2;
-
-    } else if (val < 0xe0) {
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	*val_p = val & ((1LL<<(5+2*8))-1);
-	return 3;
-
-    } else if (val < 0xf0) {
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	*val_p = val & ((1LL<<(4+3*8))-1);
-	return 4;
-
-    } else if (val < 0xf8) {
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	*val_p = val & ((1LL<<(3+4*8))-1);
-	return 5;
-
-    } else if (val < 0xfc) {
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	*val_p = val & ((1LL<<(2+5*8))-1);
-	return 6;
-
-    } else if (val < 0xfe) {
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	*val_p = val & ((1LL<<(1+6*8))-1);
-	return 7;
-
-    } else if (val < 0xff) {
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	*val_p = val & ((1LL<<(7*8))-1);
-	return 8;
-
-    } else {
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	val = (val<<8) | (unsigned char)hgetc(fd->fp);
-	*val_p = val;
-    }
-
-    return 9;
-}
-
-int ltf8_decode_crc(cram_fd *fd, int64_t *val_p, uint32_t *crc) {
-    unsigned char c[9];
-    int64_t val = (unsigned char)hgetc(fd->fp);
-    if (val == -1)
-	return -1;
-
-    c[0] = val;
-
-    if (val < 0x80) {
-	*val_p =   val;
-	*crc = crc32(*crc, c, 1);
-	return 1;
-
-    } else if (val < 0xc0) {
-	val = (val<<8) | (c[1]=hgetc(fd->fp));;
-	*val_p = val & (((1LL<<(6+8)))-1);
-	*crc = crc32(*crc, c, 2);
-	return 2;
-
-    } else if (val < 0xe0) {
-	val = (val<<8) | (c[1]=hgetc(fd->fp));;
-	val = (val<<8) | (c[2]=hgetc(fd->fp));;
-	*val_p = val & ((1LL<<(5+2*8))-1);
-	*crc = crc32(*crc, c, 3);
-	return 3;
-
-    } else if (val < 0xf0) {
-	val = (val<<8) | (c[1]=hgetc(fd->fp));;
-	val = (val<<8) | (c[2]=hgetc(fd->fp));;
-	val = (val<<8) | (c[3]=hgetc(fd->fp));;
-	*val_p = val & ((1LL<<(4+3*8))-1);
-	*crc = crc32(*crc, c, 4);
-	return 4;
-
-    } else if (val < 0xf8) {
-	val = (val<<8) | (c[1]=hgetc(fd->fp));;
-	val = (val<<8) | (c[2]=hgetc(fd->fp));;
-	val = (val<<8) | (c[3]=hgetc(fd->fp));;
-	val = (val<<8) | (c[4]=hgetc(fd->fp));;
-	*val_p = val & ((1LL<<(3+4*8))-1);
-	*crc = crc32(*crc, c, 5);
-	return 5;
-
-    } else if (val < 0xfc) {
-	val = (val<<8) | (c[1]=hgetc(fd->fp));;
-	val = (val<<8) | (c[2]=hgetc(fd->fp));;
-	val = (val<<8) | (c[3]=hgetc(fd->fp));;
-	val = (val<<8) | (c[4]=hgetc(fd->fp));;
-	val = (val<<8) | (c[5]=hgetc(fd->fp));;
-	*val_p = val & ((1LL<<(2+5*8))-1);
-	*crc = crc32(*crc, c, 6);
-	return 6;
-
-    } else if (val < 0xfe) {
-	val = (val<<8) | (c[1]=hgetc(fd->fp));;
-	val = (val<<8) | (c[2]=hgetc(fd->fp));;
-	val = (val<<8) | (c[3]=hgetc(fd->fp));;
-	val = (val<<8) | (c[4]=hgetc(fd->fp));;
-	val = (val<<8) | (c[5]=hgetc(fd->fp));;
-	val = (val<<8) | (c[6]=hgetc(fd->fp));;
-	*val_p = val & ((1LL<<(1+6*8))-1);
-	*crc = crc32(*crc, c, 7);
-	return 7;
-
-    } else if (val < 0xff) {
-	val = (val<<8) | (c[1]=hgetc(fd->fp));;
-	val = (val<<8) | (c[2]=hgetc(fd->fp));;
-	val = (val<<8) | (c[3]=hgetc(fd->fp));;
-	val = (val<<8) | (c[4]=hgetc(fd->fp));;
-	val = (val<<8) | (c[5]=hgetc(fd->fp));;
-	val = (val<<8) | (c[6]=hgetc(fd->fp));;
-	val = (val<<8) | (c[7]=hgetc(fd->fp));;
-	*val_p = val & ((1LL<<(7*8))-1);
-	*crc = crc32(*crc, c, 8);
-	return 8;
-
-    } else {
-	val = (val<<8) | (c[1]=hgetc(fd->fp));;
-	val = (val<<8) | (c[2]=hgetc(fd->fp));;
-	val = (val<<8) | (c[3]=hgetc(fd->fp));;
-	val = (val<<8) | (c[4]=hgetc(fd->fp));;
-	val = (val<<8) | (c[5]=hgetc(fd->fp));;
-	val = (val<<8) | (c[6]=hgetc(fd->fp));;
-	val = (val<<8) | (c[7]=hgetc(fd->fp));;
-	val = (val<<8) | (c[8]=hgetc(fd->fp));;
-	*crc = crc32(*crc, c, 9);
-	*val_p = val;
-    }
-
-    return 9;
-}
-
-/*
- * Pushes a value in ITF8 format onto the end of a block.
- * This shouldn't be used for high-volume data as it is not the fastest
- * method.
- *
- * Returns the number of bytes written
- */
-int itf8_put_blk(cram_block *blk, int val) {
-    char buf[5];
-    int sz;
-
-    sz = itf8_put(buf, val);
-    BLOCK_APPEND(blk, buf, sz);
-    return sz;
-}
-
-/*
- * Decodes a 32-bit little endian value from fd and stores in val.
- *
- * Returns the number of bytes read on success
- *         -1 on failure
- */
-int int32_decode(cram_fd *fd, int32_t *val) {
-    int32_t i;
-    if (4 != hread(fd->fp, &i, 4))
-	return -1;
-
-    *val = le_int4(i);
-    return 4;
-}
-
-/*
- * Encodes a 32-bit little endian value 'val' and writes to fd.
- *
- * Returns the number of bytes written on success
- *         -1 on failure
- */
-int int32_encode(cram_fd *fd, int32_t val) {
-    val = le_int4(val);
-    if (4 != hwrite(fd->fp, &val, 4))
-	return -1;
-
-    return 4;
-}
-
-/* As int32_decoded/encode, but from/to blocks instead of cram_fd */
-int int32_get_blk(cram_block *b, int32_t *val) {
-    if (b->uncomp_size - BLOCK_SIZE(b) < 4)
-	return -1;
-
-    *val =
-	 b->data[b->byte  ]        |
-	(b->data[b->byte+1] <<  8) |
-	(b->data[b->byte+2] << 16) |
-	(b->data[b->byte+3] << 24);
-    BLOCK_SIZE(b) += 4;
-    return 4;
-}
-
-/* As int32_decoded/encode, but from/to blocks instead of cram_fd */
-int int32_put_blk(cram_block *b, int32_t val) {
-    unsigned char cp[4];
-    cp[0] = ( val      & 0xff);
-    cp[1] = ((val>>8)  & 0xff);
-    cp[2] = ((val>>16) & 0xff);
-    cp[3] = ((val>>24) & 0xff);
-
-    BLOCK_APPEND(b, cp, 4);
-    return b->data ? 0 : -1;
-}
-
-/* ----------------------------------------------------------------------
- * zlib compression code - from Gap5's tg_iface_g.c
- * They're static here as they're only used within the cram_compress_block
- * and cram_uncompress_block functions, which are the external interface.
- */
-char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size) {
-    z_stream s;
-    unsigned char *data = NULL; /* Uncompressed output */
-    int data_alloc = 0;
-    int err;
-
-    /* Starting point at uncompressed size, and scale after that */
-    data = malloc(data_alloc = csize*1.2+100);
-    if (!data)
-	return NULL;
-
-    /* Initialise zlib stream */
-    s.zalloc = Z_NULL; /* use default allocation functions */
-    s.zfree  = Z_NULL;
-    s.opaque = Z_NULL;
-    s.next_in  = (unsigned char *)cdata;
-    s.avail_in = csize;
-    s.total_in = 0;
-    s.next_out  = data;
-    s.avail_out = data_alloc;
-    s.total_out = 0;
-
-    //err = inflateInit(&s);
-    err = inflateInit2(&s, 15 + 32);
-    if (err != Z_OK) {
-	fprintf(stderr, "zlib inflateInit error: %s\n", s.msg);
-	free(data);
-	return NULL;
-    }
-
-    /* Decode to 'data' array */
-    for (;s.avail_in;) {
-	unsigned char *data_tmp;
-	int alloc_inc;
-
-	s.next_out = &data[s.total_out];
-	err = inflate(&s, Z_NO_FLUSH);
-	if (err == Z_STREAM_END)
-	    break;
-
-	if (err != Z_OK) {
-	    fprintf(stderr, "zlib inflate error: %s\n", s.msg);
-	    if (data)
-		free(data);
-	    return NULL;
-	}
-
-	/* More to come, so realloc based on growth so far */
-	alloc_inc = (double)s.avail_in/s.total_in * s.total_out + 100;
-	data = realloc((data_tmp = data), data_alloc += alloc_inc);
-	if (!data) {
-	    free(data_tmp);
-	    return NULL;
-	}
-	s.avail_out += alloc_inc;
-    }
-    inflateEnd(&s);
-
-    *size = s.total_out;
-    return (char *)data;
-}
-
-static char *zlib_mem_deflate(char *data, size_t size, size_t *cdata_size,
-			      int level, int strat) {
-    z_stream s;
-    unsigned char *cdata = NULL; /* Compressed output */
-    int cdata_alloc = 0;
-    int cdata_pos = 0;
-    int err;
-
-    cdata = malloc(cdata_alloc = size*1.05+100);
-    if (!cdata)
-	return NULL;
-    cdata_pos = 0;
-
-    /* Initialise zlib stream */
-    s.zalloc = Z_NULL; /* use default allocation functions */
-    s.zfree  = Z_NULL;
-    s.opaque = Z_NULL;
-    s.next_in  = (unsigned char *)data;
-    s.avail_in = size;
-    s.total_in = 0;
-    s.next_out  = cdata;
-    s.avail_out = cdata_alloc;
-    s.total_out = 0;
-    s.data_type = Z_BINARY;
-
-    err = deflateInit2(&s, level, Z_DEFLATED, 15|16, 9, strat);
-    if (err != Z_OK) {
-	fprintf(stderr, "zlib deflateInit2 error: %s\n", s.msg);
-	return NULL;
-    }
-
-    /* Encode to 'cdata' array */
-    for (;s.avail_in;) {
-	s.next_out = &cdata[cdata_pos];
-	s.avail_out = cdata_alloc - cdata_pos;
-	if (cdata_alloc - cdata_pos <= 0) {
-	    fprintf(stderr, "Deflate produced larger output than expected. Abort\n"); 
-	    return NULL;
-	}
-	err = deflate(&s, Z_NO_FLUSH);
-	cdata_pos = cdata_alloc - s.avail_out;
-	if (err != Z_OK) {
-	    fprintf(stderr, "zlib deflate error: %s\n", s.msg);
-	    break;
-	}
-    }
-    if (deflate(&s, Z_FINISH) != Z_STREAM_END) {
-	fprintf(stderr, "zlib deflate error: %s\n", s.msg);
-    }
-    *cdata_size = s.total_out;
-
-    if (deflateEnd(&s) != Z_OK) {
-	fprintf(stderr, "zlib deflate error: %s\n", s.msg);
-    }
-    return (char *)cdata;
-}
-
-#ifdef HAVE_LIBLZMA
-/* ------------------------------------------------------------------------ */
-/*
- * Data compression routines using liblzma (xz)
- *
- * On a test set this shrunk the main db from 136157104 bytes to 114796168, but
- * caused tg_index to grow from 2m43.707s to 15m3.961s. Exporting as bfastq
- * went from 18.3s to 36.3s. So decompression suffers too, but not as bad
- * as compression times.
- *
- * For now we disable this functionality. If it's to be reenabled make sure you
- * improve the mem_inflate implementation as it's just a test hack at the
- * moment.
- */
-
-static char *lzma_mem_deflate(char *data, size_t size, size_t *cdata_size,
-			      int level) {
-    char *out;
-    size_t out_size = lzma_stream_buffer_bound(size);
-    *cdata_size = 0;
-
-    out = malloc(out_size);
-
-    /* Single call compression */
-    if (LZMA_OK != lzma_easy_buffer_encode(level, LZMA_CHECK_CRC32, NULL,
-					   (uint8_t *)data, size,
-					   (uint8_t *)out, cdata_size,
-					   out_size))
-    	return NULL;
-
-    return out;
-}
-
-static char *lzma_mem_inflate(char *cdata, size_t csize, size_t *size) {
-    lzma_stream strm = LZMA_STREAM_INIT;
-    size_t out_size = 0, out_pos = 0;
-    char *out = NULL;
-    int r;
-
-    /* Initiate the decoder */
-    if (LZMA_OK != lzma_stream_decoder(&strm, 50000000, 0))
-	return NULL;
-
-    /* Decode loop */
-    strm.avail_in = csize;
-    strm.next_in = (uint8_t *)cdata;
-
-    for (;strm.avail_in;) {
-	if (strm.avail_in > out_size - out_pos) {
-	    out_size += strm.avail_in * 4 + 32768;
-	    out = realloc(out, out_size);
-	}
-	strm.avail_out = out_size - out_pos;
-	strm.next_out = (uint8_t *)&out[out_pos];
-
-	r = lzma_code(&strm, LZMA_RUN);
-	if (LZMA_OK != r && LZMA_STREAM_END != r) {
-	    fprintf(stderr, "r=%d\n", r);
-	    fprintf(stderr, "mem=%"PRId64"d\n", (int64_t)lzma_memusage(&strm));
-	    return NULL;
-	}
-
-	out_pos = strm.total_out;
-
-	if (r == LZMA_STREAM_END)
-	    break;
-    }
-
-    /* finish up any unflushed data; necessary? */
-    r = lzma_code(&strm, LZMA_FINISH);
-    if (r != LZMA_OK && r != LZMA_STREAM_END) {
-	fprintf(stderr, "r=%d\n", r);
-	return NULL;
-    }
-
-    out = realloc(out, strm.total_out);
-    *size = strm.total_out;
-
-    lzma_end(&strm);
-
-    return out;
-}
-#endif
-
-/* ----------------------------------------------------------------------
- * CRAM blocks - the dynamically growable data block. We have code to
- * create, update, (un)compress and read/write.
- *
- * These are derived from the deflate_interlaced.c blocks, but with the
- * CRAM extension of content types and IDs.
- */
-
-/*
- * Allocates a new cram_block structure with a specified content_type and
- * id.
- *
- * Returns block pointer on success
- *         NULL on failure
- */
-cram_block *cram_new_block(enum cram_content_type content_type,
-			   int content_id) {
-    cram_block *b = malloc(sizeof(*b));
-    if (!b)
-	return NULL;
-    b->method = b->orig_method = RAW;
-    b->content_type = content_type;
-    b->content_id = content_id;
-    b->comp_size = 0;
-    b->uncomp_size = 0;
-    b->data = NULL;
-    b->alloc = 0;
-    b->byte = 0;
-    b->bit = 7; // MSB
-
-    return b;
-}
-
-/*
- * Reads a block from a cram file.
- * Returns cram_block pointer on success.
- *         NULL on failure
- */
-cram_block *cram_read_block(cram_fd *fd) {
-    cram_block *b = malloc(sizeof(*b));
-    unsigned char c;
-    uint32_t crc = 0;
-    if (!b)
-	return NULL;
-
-    //fprintf(stderr, "Block at %d\n", (int)ftell(fd->fp));
-
-    if (-1 == (b->method      = hgetc(fd->fp))) { free(b); return NULL; }
-    c = b->method; crc = crc32(crc, &c, 1);
-    if (-1 == (b->content_type= hgetc(fd->fp))) { free(b); return NULL; }
-    c = b->content_type; crc = crc32(crc, &c, 1);
-    if (-1 == itf8_decode_crc(fd, &b->content_id, &crc))  { free(b); return NULL; }
-    if (-1 == itf8_decode_crc(fd, &b->comp_size, &crc))   { free(b); return NULL; }
-    if (-1 == itf8_decode_crc(fd, &b->uncomp_size, &crc)) { free(b); return NULL; }
-
-    //    fprintf(stderr, "  method %d, ctype %d, cid %d, csize %d, ucsize %d\n",
-    //	    b->method, b->content_type, b->content_id, b->comp_size, b->uncomp_size);
-
-    if (b->method == RAW) {
-	b->alloc = b->uncomp_size;
-	if (!(b->data = malloc(b->uncomp_size))){ free(b); return NULL; }
-	if (b->uncomp_size != hread(fd->fp, b->data, b->uncomp_size)) {
-	    free(b->data);
-	    free(b);
-	    return NULL;
-	}
-    } else {
-	b->alloc = b->comp_size;
-	if (!(b->data = malloc(b->comp_size)))  { free(b); return NULL; }
-	if (b->comp_size != hread(fd->fp, b->data, b->comp_size)) {
-	    free(b->data);
-	    free(b);
-	    return NULL;
-	}
-    }
-
-    if (CRAM_MAJOR_VERS(fd->version) >= 3) {
-	if (-1 == int32_decode(fd, (int32_t *)&b->crc32)) {
-	    free(b);
-	    return NULL;
-	}
-
-	crc = crc32(crc, b->data ? b->data : (uc *)"", b->alloc);
-	if (crc != b->crc32) {
-	    fprintf(stderr, "Block CRC32 failure\n");
-	    free(b->data);
-	    free(b);
-	    return NULL;
-	}
-    }
-
-    b->orig_method = b->method;
-    b->idx = 0;
-    b->byte = 0;
-    b->bit = 7; // MSB
-
-    return b;
-}
-
-
-/*
- * Computes the size of a cram block, including the block
- * header itself.
- */
-uint32_t cram_block_size(cram_block *b) {
-    unsigned char dat[100], *cp = dat;;
-    uint32_t sz;
-
-    *cp++ = b->method;
-    *cp++ = b->content_type;
-    cp += itf8_put(cp, b->content_id);
-    cp += itf8_put(cp, b->comp_size);
-    cp += itf8_put(cp, b->uncomp_size);
-
-    sz = cp-dat + 4;
-    sz += b->method == RAW ? b->uncomp_size : b->comp_size;
-
-    return sz;
-}
-
-/*
- * Writes a CRAM block.
- * Returns 0 on success
- *        -1 on failure
- */
-int cram_write_block(cram_fd *fd, cram_block *b) {
-    assert(b->method != RAW || (b->comp_size == b->uncomp_size));
-
-    if (hputc(b->method,       fd->fp)  == EOF) return -1;
-    if (hputc(b->content_type, fd->fp)  == EOF) return -1;
-    if (itf8_encode(fd, b->content_id)  ==  -1) return -1;
-    if (itf8_encode(fd, b->comp_size)   ==  -1) return -1;
-    if (itf8_encode(fd, b->uncomp_size) ==  -1) return -1;
-
-    if (b->method == RAW) {
-	if (b->uncomp_size != hwrite(fd->fp, b->data, b->uncomp_size))
-	    return -1;
-    } else {
-	if (b->comp_size != hwrite(fd->fp, b->data, b->comp_size))
-	    return -1;
-    }
-
-    if (CRAM_MAJOR_VERS(fd->version) >= 3) {
-	unsigned char dat[100], *cp = dat;;
-	uint32_t crc;
-
-	*cp++ = b->method;
-	*cp++ = b->content_type;
-	cp += itf8_put(cp, b->content_id);
-	cp += itf8_put(cp, b->comp_size);
-	cp += itf8_put(cp, b->uncomp_size);
-	crc = crc32(0L, dat, cp-dat);
-
-	if (b->method == RAW) {
-	    b->crc32 = crc32(crc, b->data ? b->data : (uc*)"", b->uncomp_size);
-	} else {
-	    b->crc32 = crc32(crc, b->data ? b->data : (uc*)"", b->comp_size);
-	}
-
-	if (-1 == int32_encode(fd, b->crc32))
-	    return -1;
-    }
-
-    return 0;
-}
-
-/*
- * Frees a CRAM block, deallocating internal data too.
- */
-void cram_free_block(cram_block *b) {
-    if (!b)
-	return;
-    if (b->data)
-	free(b->data);
-    free(b);
-}
-
-/*
- * Uncompresses a CRAM block, if compressed.
- */
-int cram_uncompress_block(cram_block *b) {
-    char *uncomp;
-    size_t uncomp_size = 0;
-
-    if (b->uncomp_size == 0) {
-	// blank block
-	b->method = RAW;
-	return 0;
-    }
-
-    switch (b->method) {
-    case RAW:
-	return 0;
-
-    case GZIP:
-	uncomp = zlib_mem_inflate((char *)b->data, b->comp_size, &uncomp_size);
-	if (!uncomp)
-	    return -1;
-	if ((int)uncomp_size != b->uncomp_size) {
-	    free(uncomp);
-	    return -1;
-	}
-	free(b->data);
-	b->data = (unsigned char *)uncomp;
-	b->alloc = uncomp_size;
-	b->method = RAW;
-	break;
-
-#ifdef HAVE_LIBBZ2
-    case BZIP2: {
-	unsigned int usize = b->uncomp_size;
-	if (!(uncomp = malloc(usize)))
-	    return -1;
-	if (BZ_OK != BZ2_bzBuffToBuffDecompress(uncomp, &usize,
-						(char *)b->data, b->comp_size,
-						0, 0)) {
-	    free(uncomp);
-	    return -1;
-	}
-	free(b->data);
-	b->data = (unsigned char *)uncomp;
-	b->alloc = usize;
-	b->method = RAW;
-	b->uncomp_size = usize; // Just incase it differs
-	break;
-    }
-#else
-    case BZIP2:
-	fprintf(stderr, "Bzip2 compression is not compiled into this "
-		"version.\nPlease rebuild and try again.\n");
-	return -1;
-#endif
-
-#ifdef HAVE_LIBLZMA
-    case LZMA:
-	uncomp = lzma_mem_inflate((char *)b->data, b->comp_size, &uncomp_size);
-	if (!uncomp)
-	    return -1;
-	if ((int)uncomp_size != b->uncomp_size)
-	    return -1;
-	free(b->data);
-	b->data = (unsigned char *)uncomp;
-	b->alloc = uncomp_size;
-	b->method = RAW;
-	break;
-#else
-    case LZMA:
-	fprintf(stderr, "Lzma compression is not compiled into this "
-		"version.\nPlease rebuild and try again.\n");
-	return -1;
-	break;
-#endif
-
-    case RANS: {
-	unsigned int usize = b->uncomp_size, usize2;
-	uncomp = (char *)rans_uncompress(b->data, b->comp_size, &usize2);
-	if (!uncomp || usize != usize2)
- 	    return -1;
-	free(b->data);
-	b->data = (unsigned char *)uncomp;
-	b->alloc = usize2;
-	b->method = RAW;
-	b->uncomp_size = usize2; // Just incase it differs
-	//fprintf(stderr, "Expanded %d to %d\n", b->comp_size, b->uncomp_size);
-	break;
-    }
-
-    default:
-	return -1;
-    }
-
-    return 0;
-}
-
-static char *cram_compress_by_method(char *in, size_t in_size,
-				     size_t *out_size,
-				     enum cram_block_method method,
-				     int level, int strat) {
-    switch (method) {
-    case GZIP:
-	return zlib_mem_deflate(in, in_size, out_size, level, strat);
-
-    case BZIP2: {
-#ifdef HAVE_LIBBZ2
-	unsigned int comp_size = in_size*1.01 + 600;
-	char *comp = malloc(comp_size);
-	if (!comp)
-	    return NULL;
-
-	if (BZ_OK != BZ2_bzBuffToBuffCompress(comp, &comp_size,
-					      in, in_size,
-					      level, 0, 30)) {
-	    free(comp);
-	    return NULL;
-	}
-	*out_size = comp_size;
-	return comp;
-#else
-	return NULL;
-#endif
-    }
-
-    case LZMA:
-#ifdef HAVE_LIBLZMA
-	return lzma_mem_deflate(in, in_size, out_size, level);
-#else
-	return NULL;
-#endif
-
-    case RANS0: {
-	unsigned int out_size_i;
-	unsigned char *cp;
-	cp = rans_compress((unsigned char *)in, in_size, &out_size_i, 0);
-	*out_size = out_size_i;
-	return (char *)cp;
-    }
-
-    case RANS1: {
-	unsigned int out_size_i;
-	unsigned char *cp;
-	
-	cp = rans_compress((unsigned char *)in, in_size, &out_size_i, 1);
-	*out_size = out_size_i;
-	return (char *)cp;
-    }
-
-    case RAW:
-	break;
-
-    default:
-	return NULL;
-    }
-
-    return NULL;
-}
-
-
-/*
- * Compresses a block using one of two different zlib strategies. If we only
- * want one choice set strat2 to be -1.
- *
- * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED
- * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is
- * significantly faster.
- *
- * Method and level -1 implies defaults, as specified in cram_fd.
- */
-int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics,
-			int method, int level) {
-
-    char *comp = NULL;
-    size_t comp_size = 0;
-    int strat;
-
-    if (b->method != RAW) {
-        // Maybe already compressed if s->block[0] was compressed and
-        // we have e.g. s->block[DS_BA] set to s->block[0] due to only
-        // one base type present and hence using E_HUFFMAN on block 0.
-        // A second explicit attempt to compress the same block then
-        // occurs.
-        return 0;
-    }
-
-    if (method == -1) {
-	method = 1<<GZIP;
-	if (fd->use_bz2)
-	    method |= 1<<BZIP2;
-	if (fd->use_lzma)
-	    method |= 1<<LZMA;
-    }
-
-    if (level == -1)
-	level = fd->level;
-
-    //fprintf(stderr, "IN: block %d, sz %d\n", b->content_id, b->uncomp_size);
-
-    if (method == RAW || level == 0 || b->uncomp_size == 0) {
-	b->method = RAW;
-	b->comp_size = b->uncomp_size;
-	//fprintf(stderr, "Skip block id %d\n", b->content_id);
-	return 0;
-    }
-
-    if (metrics) {
-	pthread_mutex_lock(&fd->metrics_lock);
-	if (metrics->trial > 0 || --metrics->next_trial <= 0) {
-	    size_t sz_best = INT_MAX;
-	    size_t sz_gz_rle = 0;
-	    size_t sz_gz_def = 0;
-	    size_t sz_rans0 = 0;
-	    size_t sz_rans1 = 0;
-	    size_t sz_bzip2 = 0;
-	    size_t sz_lzma = 0;
-	    int method_best = 0;
-	    char *c_best = NULL, *c = NULL;
-
-	    if (metrics->revised_method)
-		method = metrics->revised_method;
-	    else
-		metrics->revised_method = method;
-
-	    if (metrics->next_trial == 0) {
-		metrics->next_trial = TRIAL_SPAN;
-		metrics->trial = NTRIALS;
-		metrics->sz_gz_rle /= 2;
-		metrics->sz_gz_def /= 2;
-		metrics->sz_rans0  /= 2;
-		metrics->sz_rans1  /= 2;
-		metrics->sz_bzip2  /= 2;
-		metrics->sz_lzma   /= 2;
-	    }
-
-	    pthread_mutex_unlock(&fd->metrics_lock);
-	    
-	    if (method & (1<<GZIP_RLE)) {
-		c = cram_compress_by_method((char *)b->data, b->uncomp_size,
-					    &sz_gz_rle, GZIP, 1, Z_RLE);
-		if (c && sz_best > sz_gz_rle) {
-		    sz_best = sz_gz_rle;
-		    method_best = GZIP_RLE;
-		    if (c_best)
-			free(c_best);
-		    c_best = c;
-		} else if (c) {
-		    free(c);
-		} else {
-		    sz_gz_rle = b->uncomp_size*2+1000;
-		}
-
-		//fprintf(stderr, "Block %d; %d->%d\n", b->content_id, b->uncomp_size, sz_gz_rle);
-	    }
-
-	    if (method & (1<<GZIP)) {
-		c = cram_compress_by_method((char *)b->data, b->uncomp_size,
-					    &sz_gz_def, GZIP, level,
-					    Z_FILTERED);
-		if (c && sz_best > sz_gz_def) {
-		    sz_best = sz_gz_def;
-		    method_best = GZIP;
-		    if (c_best)
-			free(c_best);
-		    c_best = c;
-		} else if (c) {
-		    free(c);
-		} else {
-		    sz_gz_def = b->uncomp_size*2+1000;
-		}
-
-		//fprintf(stderr, "Block %d; %d->%d\n", b->content_id, b->uncomp_size, sz_gz_def);
-	    }
-
-	    if (method & (1<<RANS0)) {
-		c = cram_compress_by_method((char *)b->data, b->uncomp_size,
-					    &sz_rans0, RANS0, 0, 0);
-		if (c && sz_best > sz_rans0) {
-		    sz_best = sz_rans0;
-		    method_best = RANS0;
-		    if (c_best)
-			free(c_best);
-		    c_best = c;
-		} else if (c) {
-		    free(c);
-		} else {
-		    sz_rans0 = b->uncomp_size*2+1000;
-		}
-	    }
-
-	    if (method & (1<<RANS1)) {
-		c = cram_compress_by_method((char *)b->data, b->uncomp_size,
-					    &sz_rans1, RANS1, 0, 0);
-		if (c && sz_best > sz_rans1) {
-		    sz_best = sz_rans1;
-		    method_best = RANS1;
-		    if (c_best)
-			free(c_best);
-		    c_best = c;
-		} else if (c) {
-		    free(c);
-		} else {
-		    sz_rans1 = b->uncomp_size*2+1000;
-		}
-	    }
-
-	    if (method & (1<<BZIP2)) {
-		c = cram_compress_by_method((char *)b->data, b->uncomp_size,
-					    &sz_bzip2, BZIP2, level, 0);
-		if (c && sz_best > sz_bzip2) {
-		    sz_best = sz_bzip2;
-		    method_best = BZIP2;
-		    if (c_best)
-			free(c_best);
-		    c_best = c;
-		} else if (c) {
-		    free(c);
-		} else {
-		    sz_bzip2 = b->uncomp_size*2+1000;
-		}
-	    }
-
-	    if (method & (1<<LZMA)) {
-		c = cram_compress_by_method((char *)b->data, b->uncomp_size,
-					    &sz_lzma, LZMA, level, 0);
-		if (c && sz_best > sz_lzma) {
-		    sz_best = sz_lzma;
-		    method_best = LZMA;
-		    if (c_best)
-			free(c_best);
-		    c_best = c;
-		} else if (c) {
-		    free(c);
-		} else {
-		    sz_lzma = b->uncomp_size*2+1000;
-		}
-	    }
-
-	    //fprintf(stderr, "sz_best = %d\n", sz_best);
-
-	    free(b->data);
-	    b->data = (unsigned char *)c_best;
-	    //printf("method_best = %s\n", cram_block_method2str(method_best));
-	    b->method = method_best == GZIP_RLE ? GZIP : method_best;
-	    b->comp_size = sz_best;
-
-	    pthread_mutex_lock(&fd->metrics_lock);
-	    metrics->sz_gz_rle += sz_gz_rle;
-	    metrics->sz_gz_def += sz_gz_def;
-	    metrics->sz_rans0  += sz_rans0;
-	    metrics->sz_rans1  += sz_rans1;
-	    metrics->sz_bzip2  += sz_bzip2;
-	    metrics->sz_lzma   += sz_lzma;
-	    if (--metrics->trial == 0) {
-		int best_method = RAW;
-		int best_sz = INT_MAX;
-
-		// Scale methods by cost
-		if (fd->level <= 3) {
-		    metrics->sz_rans1  *= 1.02;
-		    metrics->sz_gz_def *= 1.04;
-		    metrics->sz_bzip2  *= 1.08;
-		    metrics->sz_lzma   *= 1.10;
-		} else if (fd->level <= 6) {
-		    metrics->sz_rans1  *= 1.01;
-		    metrics->sz_gz_def *= 1.02;
-		    metrics->sz_bzip2  *= 1.03;
-		    metrics->sz_lzma   *= 1.05;
-		}
-
-		if (method & (1<<GZIP_RLE) && best_sz > metrics->sz_gz_rle)
-		    best_sz = metrics->sz_gz_rle, best_method = GZIP_RLE;
-
-		if (method & (1<<GZIP) && best_sz > metrics->sz_gz_def)
-		    best_sz = metrics->sz_gz_def, best_method = GZIP;
-
-		if (method & (1<<RANS0) && best_sz > metrics->sz_rans0)
-		    best_sz = metrics->sz_rans0, best_method = RANS0;
-
-		if (method & (1<<RANS1) && best_sz > metrics->sz_rans1)
-		    best_sz = metrics->sz_rans1, best_method = RANS1;
-
-		if (method & (1<<BZIP2) && best_sz > metrics->sz_bzip2)
-		    best_sz = metrics->sz_bzip2, best_method = BZIP2;
-
-		if (method & (1<<LZMA) && best_sz > metrics->sz_lzma)
-		    best_sz = metrics->sz_lzma, best_method = LZMA;
-
-		if (best_method == GZIP_RLE) {
-		    metrics->method = GZIP;
-		    metrics->strat  = Z_RLE;
-		} else {
-		    metrics->method = best_method;
-		    metrics->strat  = Z_FILTERED;
-		}
-
-		// If we see at least MAXFAIL trials in a row for a specific
-		// compression method with more than MAXDELTA aggregate
-		// size then we drop this from the list of methods used
-		// for this block type.
-#define MAXDELTA 0.20
-#define MAXFAILS 4
-		if (best_method == GZIP_RLE) {
-		    metrics->gz_rle_cnt = 0;
-		    metrics->gz_rle_extra = 0;
-		} else if (best_sz < metrics->sz_gz_rle) {
-		    double r = (double)metrics->sz_gz_rle / best_sz - 1;
-		    if (++metrics->gz_rle_cnt >= MAXFAILS && 
-			(metrics->gz_rle_extra += r) >= MAXDELTA)
-			method &= ~(1<<GZIP_RLE);
-		}
-
-		if (best_method == GZIP) {
-		    metrics->gz_def_cnt = 0;
-		    metrics->gz_def_extra = 0;
-		} else if (best_sz < metrics->sz_gz_def) {
-		    double r = (double)metrics->sz_gz_def / best_sz - 1;
-		    if (++metrics->gz_def_cnt >= MAXFAILS &&
-			(metrics->gz_def_extra += r) >= MAXDELTA)
-			method &= ~(1<<GZIP);
-		}
-
-		if (best_method == RANS0) {
-		    metrics->rans0_cnt = 0;
-		    metrics->rans0_extra = 0;
-		} else if (best_sz < metrics->sz_rans0) {
-		    double r = (double)metrics->sz_rans0 / best_sz - 1;
-		    if (++metrics->rans0_cnt >= MAXFAILS &&
-			(metrics->rans0_extra += r) >= MAXDELTA)
-			method &= ~(1<<RANS0);
-		}
-
-		if (best_method == RANS1) {
-		    metrics->rans1_cnt = 0;
-		    metrics->rans1_extra = 0;
-		} else if (best_sz < metrics->sz_rans1) {
-		    double r = (double)metrics->sz_rans1 / best_sz - 1;
-		    if (++metrics->rans1_cnt >= MAXFAILS &&
-			(metrics->rans1_extra += r) >= MAXDELTA)
-			method &= ~(1<<RANS1);
-		}
-
-		if (best_method == BZIP2) {
-		    metrics->bzip2_cnt = 0;
-		    metrics->bzip2_extra = 0;
-		} else if (best_sz < metrics->sz_bzip2) {
-		    double r = (double)metrics->sz_bzip2 / best_sz - 1;
-		    if (++metrics->bzip2_cnt >= MAXFAILS &&
-			(metrics->bzip2_extra += r) >= MAXDELTA)
-			method &= ~(1<<BZIP2);
-		}
-
-		if (best_method == LZMA) {
-		    metrics->lzma_cnt = 0;
-		    metrics->lzma_extra = 0;
-		} else if (best_sz < metrics->sz_lzma) {
-		    double r = (double)metrics->sz_lzma / best_sz - 1;
-		    if (++metrics->lzma_cnt >= MAXFAILS &&
-			(metrics->lzma_extra += r) >= MAXDELTA)
-			method &= ~(1<<LZMA);
-		}
-
-		//if (method != metrics->revised_method)
-		//    fprintf(stderr, "%d: method from %x to %x\n",
-		//	    b->content_id, metrics->revised_method, method);
-		metrics->revised_method = method;
-	    }
-	    pthread_mutex_unlock(&fd->metrics_lock);
-	} else {
-	    strat = metrics->strat;
-	    method = metrics->method;
-
-	    pthread_mutex_unlock(&fd->metrics_lock);
-	    comp = cram_compress_by_method((char *)b->data, b->uncomp_size,
-					   &comp_size, method,
-					   level, strat);
-	    if (!comp)
-		return -1;
-	    free(b->data);
-	    b->data = (unsigned char *)comp;
-	    b->comp_size = comp_size;
-	    b->method = method;
-	}
-
-    } else {
-	// no cached metrics, so just do zlib?
-	comp = cram_compress_by_method((char *)b->data, b->uncomp_size,
-				       &comp_size, GZIP, level, Z_FILTERED);
-	if (!comp) {
-	    fprintf(stderr, "Compression failed!\n");
-	    return -1;
-	}
-	free(b->data);
-	b->data = (unsigned char *)comp;
-	b->comp_size = comp_size;
-	b->method = GZIP;
-    }
-
-    if (fd->verbose)
-	fprintf(stderr, "Compressed block ID %d from %d to %d by method %s\n",
-		b->content_id, b->uncomp_size, b->comp_size,
-		cram_block_method2str(b->method));
-
-    if (b->method == RANS1)
-	b->method = RANS0; // Spec just has RANS (not 0/1) with auto-sensing
-
-    return 0;
-}
-
-cram_metrics *cram_new_metrics(void) {
-    cram_metrics *m = calloc(1, sizeof(*m));
-    if (!m)
-	return NULL;
-    m->trial = NTRIALS-1;
-    m->next_trial = TRIAL_SPAN;
-    m->method = RAW;
-    m->strat = 0;
-    m->revised_method = 0;
-
-    return m;
-}
-
-char *cram_block_method2str(enum cram_block_method m) {
-    switch(m) {
-    case RAW:	   return "RAW";
-    case GZIP:	   return "GZIP";
-    case BZIP2:	   return "BZIP2";
-    case LZMA:     return "LZMA";
-    case RANS0:    return "RANS0";
-    case RANS1:    return "RANS1";
-    case GZIP_RLE: return "GZIP_RLE";
-    case ERROR:    break;
-    }
-    return "?";
-}
-
-char *cram_content_type2str(enum cram_content_type t) {
-    switch (t) {
-    case FILE_HEADER:         return "FILE_HEADER";
-    case COMPRESSION_HEADER:  return "COMPRESSION_HEADER";
-    case MAPPED_SLICE:        return "MAPPED_SLICE";
-    case UNMAPPED_SLICE:      return "UNMAPPED_SLICE";
-    case EXTERNAL:            return "EXTERNAL";
-    case CORE:                return "CORE";
-    case CT_ERROR:            break;
-    }
-    return "?";
-}
-
-/*
- * Extra error checking on fclose to really ensure data is written.
- * Care needs to be taken to handle pipes vs real files.
- *
- * Returns 0 on success
- *        -1 on failure.
- */
-int paranoid_fclose(FILE *fp) {
-    if (-1 == fflush(fp) && errno != EBADF) {
-	fclose(fp);
-	return -1;
-    }
-
-    errno = 0;
-    if (-1 == fsync(fileno(fp))) {
-	if (errno != EINVAL) { // eg pipe
-	    fclose(fp);
-	    return -1;
-	}
-    }
-    return fclose(fp);
-}
-
-/* ----------------------------------------------------------------------
- * Reference sequence handling
- *
- * These revolve around the refs_t structure, which may potentially be
- * shared between multiple cram_fd.
- *
- * We start with refs_create() to allocate an empty refs_t and then
- * populate it with @SQ line data using refs_from_header(). This is done on
- * cram_open().  Also at start up we can call cram_load_reference() which
- * is used with "scramble -r foo.fa". This replaces the fd->refs with the
- * new one specified. In either case refs2id() is then called which
- * maps ref_entry names to @SQ ids (refs_t->ref_id[]).
- *
- * Later, possibly within a thread, we will want to know the actual ref
- * seq itself, obtained by calling cram_get_ref().  This may use the
- * UR: or M5: fields or the filename specified in the original
- * cram_load_reference() call.
- *
- * Given the potential for multi-threaded reference usage, we have
- * reference counting (sorry for the confusing double use of "ref") to
- * track the number of callers interested in any specific reference.
- */
-
-/*
- * Frees/unmaps a reference sequence and associated file handles.
- */
-static void ref_entry_free_seq(ref_entry *e) {
-    if (e->mf)
-	mfclose(e->mf);
-    if (e->seq && !e->mf)
-	free(e->seq);
-
-    e->seq = NULL;
-    e->mf = NULL;
-}
-
-void refs_free(refs_t *r) {
-    RP("refs_free()\n");
-
-    if (--r->count > 0)
-	return;
-
-    if (!r)
-	return;
-
-    if (r->pool)
-	string_pool_destroy(r->pool);
-
-    if (r->h_meta) {
-	khint_t k;
-
-	for (k = kh_begin(r->h_meta); k != kh_end(r->h_meta); k++) {
-	    ref_entry *e;
-
-	    if (!kh_exist(r->h_meta, k))
-		continue;
-	    if (!(e = kh_val(r->h_meta, k)))
-		continue;
-	    ref_entry_free_seq(e);
-	    free(e);
-	}
-
-	kh_destroy(refs, r->h_meta);
-    }
-    
-    if (r->ref_id)
-	free(r->ref_id);
-
-    if (r->fp)
-	bgzf_close(r->fp);
-
-    pthread_mutex_destroy(&r->lock);
-
-    free(r);
-}
-
-static refs_t *refs_create(void) {
-    refs_t *r = calloc(1, sizeof(*r));
-
-    RP("refs_create()\n");
-
-    if (!r)
-	return NULL;
-
-    if (!(r->pool = string_pool_create(8192)))
-	goto err;
-
-    r->ref_id = NULL; // see refs2id() to populate.
-    r->count = 1;
-    r->last = NULL;
-    r->last_id = -1;
-
-    if (!(r->h_meta = kh_init(refs)))
-	goto err;
-
-    pthread_mutex_init(&r->lock, NULL);
-
-    return r;
-
- err:
-    refs_free(r);
-    return NULL;
-}
-
-/*
- * Opens a reference fasta file as a BGZF stream, allowing for
- * compressed files.  It automatically builds a .fai file if
- * required and if compressed a .gzi bgzf index too.
- *
- * Returns a BGZF handle on success;
- *         NULL on failure.
- */
-static BGZF *bgzf_open_ref(char *fn, char *mode, int is_md5) {
-    BGZF *fp;
-
-    if (!is_md5) {
-	char fai_file[PATH_MAX];
-
-	snprintf(fai_file, PATH_MAX, "%s.fai", fn);
-	if (access(fai_file, R_OK) != 0)
-	    if (fai_build(fn) != 0)
-		return NULL;
-    }
-
-    if (!(fp = bgzf_open(fn, mode))) {
-	perror(fn);
-	return NULL;
-    }
-
-    if (fp->is_compressed == 1 && bgzf_index_load(fp, fn, ".gzi") < 0) {
-	fprintf(stderr, "Unable to load .gzi index '%s.gzi'\n", fn);
-	bgzf_close(fp);
-	return NULL;
-    }
-
-    return fp;
-}
-
-/*
- * Loads a FAI file for a reference.fasta.
- * "is_err" indicates whether failure to load is worthy of emitting an
- * error message. In some cases (eg with embedded references) we
- * speculatively load, just incase, and silently ignore errors.
- *
- * Returns the refs_t struct on success (maybe newly allocated);
- *         NULL on failure
- */
-static refs_t *refs_load_fai(refs_t *r_orig, char *fn, int is_err) {
-    struct stat sb;
-    FILE *fp = NULL;
-    char fai_fn[PATH_MAX];
-    char line[8192];
-    refs_t *r = r_orig;
-    size_t fn_l = strlen(fn);
-    int id = 0, id_alloc = 0;
-
-    RP("refs_load_fai %s\n", fn);
-
-    if (!r)
-	if (!(r = refs_create()))
-	    goto err;
-
-    /* Open reference, for later use */
-    if (stat(fn, &sb) != 0) {
-	if (is_err)
-	    perror(fn);
-	goto err;
-    }
-
-    if (r->fp)
-	if (bgzf_close(r->fp) != 0)
-	    goto err;
-    r->fp = NULL;
-
-    if (!(r->fn = string_dup(r->pool, fn)))
-	goto err;
-	
-    if (fn_l > 4 && strcmp(&fn[fn_l-4], ".fai") == 0)
-	r->fn[fn_l-4] = 0;
-
-    if (!(r->fp = bgzf_open_ref(r->fn, "r", 0)))
-	goto err;
-
-    /* Parse .fai file and load meta-data */
-    sprintf(fai_fn, "%.*s.fai", PATH_MAX-5, r->fn);
-
-    if (stat(fai_fn, &sb) != 0) {
-	if (is_err)
-	    perror(fai_fn);
-	goto err;
-    }
-    if (!(fp = fopen(fai_fn, "r"))) {
-	if (is_err)
-	    perror(fai_fn);
-	goto err;
-    }
-    while (fgets(line, 8192, fp) != NULL) {
-	ref_entry *e = malloc(sizeof(*e));
-	char *cp;
-	int n;
-	khint_t k;
-
-	if (!e)
-	    return NULL;
-
-	// id
-	for (cp = line; *cp && !isspace(*cp); cp++)
-	    ;
-	*cp++ = 0;
-	e->name = string_dup(r->pool, line);
-	
-	// length
-	while (*cp && isspace(*cp))
-	    cp++;
-	e->length = strtoll(cp, &cp, 10);
-
-	// offset
-	while (*cp && isspace(*cp))
-	    cp++;
-	e->offset = strtoll(cp, &cp, 10);
-
-	// bases per line
-	while (*cp && isspace(*cp))
-	    cp++;
-	e->bases_per_line = strtol(cp, &cp, 10);
-
-	// line length
-	while (*cp && isspace(*cp))
-	    cp++;
-	e->line_length = strtol(cp, &cp, 10);
-
-	// filename
-	e->fn = r->fn;
-
-	e->count = 0;
-	e->seq = NULL;
-	e->mf = NULL;
-	e->is_md5 = 0;
-
-	k = kh_put(refs, r->h_meta, e->name, &n);
-	if (-1 == n)  {
-	    free(e);
-	    return NULL;
-	}
-
-	if (n) {
-	    kh_val(r->h_meta, k) = e;
-	} else {
-	    ref_entry *re = kh_val(r->h_meta, k);
-	    if (re && (re->count != 0 || re->length != 0)) {
-		/* Keep old */
-		free(e);
-	    } else {
-		/* Replace old */
-		if (re)
-		    free(re);
-		kh_val(r->h_meta, k) = e;
-	    }
-	}
-
-	if (id >= id_alloc) {
-	    int x;
-
-	    id_alloc = id_alloc ?id_alloc*2 : 16;
-	    r->ref_id = realloc(r->ref_id, id_alloc * sizeof(*r->ref_id));
-
-	    for (x = id; x < id_alloc; x++)
-		r->ref_id[x] = NULL;
-	}
-	r->ref_id[id] = e;
-	r->nref = ++id;
-    }
-
-    return r;
-
- err:
-    if (fp)
-	fclose(fp);
-
-    if (!r_orig)
-	refs_free(r);
-    
-    return NULL;
-}
-
-/*
- * Verifies that the CRAM @SQ lines and .fai files match.
- */
-static void sanitise_SQ_lines(cram_fd *fd) {
-    int i;
-
-    if (!fd->header)
-	return;
-
-    if (!fd->refs || !fd->refs->h_meta)
-	return;
-
-    for (i = 0; i < fd->header->nref; i++) {
-	char *name = fd->header->ref[i].name;
-	khint_t k = kh_get(refs, fd->refs->h_meta, name);
-	ref_entry *r;
-
-	// We may have @SQ lines which have no known .fai, but do not
-	// in themselves pose a problem because they are unused in the file.
-	if (k == kh_end(fd->refs->h_meta))
-	    continue;
-
-	if (!(r = (ref_entry *)kh_val(fd->refs->h_meta, k)))
-	    continue;
-
-	if (r->length && r->length != fd->header->ref[i].len) {
-	    assert(strcmp(r->name, fd->header->ref[i].name) == 0);
-
-	    // Should we also check MD5sums here to ensure the correct
-	    // reference was given?
-	    fprintf(stderr, "WARNING: Header @SQ length mismatch for "
-		    "ref %s, %d vs %d\n",
-		    r->name, fd->header->ref[i].len, (int)r->length);
-
-	    // Fixing the parsed @SQ header will make MD:Z: strings work
-	    // and also stop it producing N for the sequence.
-	    fd->header->ref[i].len = r->length;
-	}
-    }
-}
-
-/*
- * Indexes references by the order they appear in a BAM file. This may not
- * necessarily be the same order they appear in the fasta reference file.
- *
- * Returns 0 on success
- *        -1 on failure
- */
-int refs2id(refs_t *r, SAM_hdr *h) {
-    int i;
-
-    if (r->ref_id)
-	free(r->ref_id);
-    if (r->last)
-	r->last = NULL;
-
-    r->ref_id = calloc(h->nref, sizeof(*r->ref_id));
-    if (!r->ref_id)
-	return -1;
-
-    r->nref = h->nref;
-    for (i = 0; i < h->nref; i++) {
-	khint_t k = kh_get(refs, r->h_meta, h->ref[i].name);
-	if (k != kh_end(r->h_meta)) {
-	    r->ref_id[i] = kh_val(r->h_meta, k);
-	} else {
-	    fprintf(stderr, "Unable to find ref name '%s'\n",
-		    h->ref[i].name);
-	}
-    }
-
-    return 0;
-}
-
-/*
- * Generates refs_t entries based on @SQ lines in the header.
- * Returns 0 on success
- *         -1 on failure
- */
-static int refs_from_header(refs_t *r, cram_fd *fd, SAM_hdr *h) {
-    int i, j;
-
-    if (!r)
-	return -1;
-
-    if (!h || h->nref == 0)
-	return 0;
-
-    //fprintf(stderr, "refs_from_header for %p mode %c\n", fd, fd->mode);
-
-    /* Existing refs are fine, as long as they're compatible with the hdr. */
-    if (!(r->ref_id = realloc(r->ref_id, (r->nref + h->nref) * sizeof(*r->ref_id))))
-	return -1;
-
-    /* Copy info from h->ref[i] over to r */
-    for (i = 0, j = r->nref; i < h->nref; i++) {
-	SAM_hdr_type *ty;
-	SAM_hdr_tag *tag;
-	khint_t k;
-	int n;
-
-	k = kh_get(refs, r->h_meta, h->ref[i].name);
-	if (k != kh_end(r->h_meta))
-	    // Ref already known about
-	    continue;
-
-	if (!(r->ref_id[j] = calloc(1, sizeof(ref_entry))))
-	    return -1;
-
-	if (!h->ref[i].name)
-	    return -1;
-
-	r->ref_id[j]->name = string_dup(r->pool, h->ref[i].name);
-	r->ref_id[j]->length = 0; // marker for not yet loaded
-
-	/* Initialise likely filename if known */
-	if ((ty = sam_hdr_find(h, "SQ", "SN", h->ref[i].name))) {
-	    if ((tag = sam_hdr_find_key(h, ty, "M5", NULL))) {
-		r->ref_id[j]->fn = string_dup(r->pool, tag->str+3);
-		//fprintf(stderr, "Tagging @SQ %s / %s\n", r->ref_id[h]->name, r->ref_id[h]->fn);
-	    }
-	}
-
-	k = kh_put(refs, r->h_meta, r->ref_id[j]->name, &n);
-	if (n <= 0) // already exists or error
-	    return -1;
-	kh_val(r->h_meta, k) = r->ref_id[j];
-
-	j++;
-    }
-    r->nref = j;
-
-    return 0;
-}
-
-/*
- * Attaches a header to a cram_fd.
- *
- * This should be used when creating a new cram_fd for writing where
- * we have an SAM_hdr already constructed (eg from a file we've read
- * in).
- */
-int cram_set_header(cram_fd *fd, SAM_hdr *hdr) {
-    if (fd->header)
-	sam_hdr_free(fd->header);
-    fd->header = hdr;
-    return refs_from_header(fd->refs, fd, hdr);
-}
-
-/*
- * Converts a directory and a filename into an expanded path, replacing %s
- * in directory with the filename and %[0-9]+s with portions of the filename
- * Any remaining parts of filename are added to the end with /%s.
- */
-void expand_cache_path(char *path, char *dir, char *fn) {
-    char *cp;
-
-    while ((cp = strchr(dir, '%'))) {
-	strncpy(path, dir, cp-dir);
-	path += cp-dir;
-
-	if (*++cp == 's') {
-	    strcpy(path, fn);
-	    path += strlen(fn);
-	    fn += strlen(fn);
-	    cp++;
-	} else if (*cp >= '0' && *cp <= '9') {
-	    char *endp;
-	    long l;
-
-	    l = strtol(cp, &endp, 10);
-	    l = MIN(l, strlen(fn));
-	    if (*endp == 's') {
-		strncpy(path, fn, l);
-		path += l;
-		fn += l;
-		*path = 0;
-		cp = endp+1;
-	    } else {
-		*path++ = '%';
-		*path++ = *cp++;
-	    }
-	} else {
-	    *path++ = '%';
-	    *path++ = *cp++;
-	}
-	dir = cp;
-    }
-    strcpy(path, dir);
-    path += strlen(dir);
-    if (*fn && path[-1] != '/')
-	*path++ = '/';
-    strcpy(path, fn);
-}
-
-/*
- * Make the directory containing path and any prefix directories.
- */
-void mkdir_prefix(char *path, int mode) {
-    char *cp = strrchr(path, '/');
-    if (!cp)
-	return;
-
-    *cp = 0;
-    if (is_directory(path)) {
-	*cp = '/';
-	return;
-    }
-
-    if (mkdir(path, mode) == 0) {
-	chmod(path, mode);
-	*cp = '/';
-	return;
-    }
-
-    mkdir_prefix(path, mode);
-    mkdir(path, mode);
-    chmod(path, mode);
-    *cp = '/';
-}
-
-/*
- * Return the cache directory to use, based on the first of these
- * environment variables to be set to a non-empty value.
- */
-static const char *get_cache_basedir(const char **extra) {
-    char *base;
-
-    *extra = "";
-
-    base = getenv("XDG_CACHE_HOME");
-    if (base && *base) return base;
-
-    base = getenv("HOME");
-    if (base && *base) { *extra = "/.cache"; return base; }
-
-    base = getenv("TMPDIR");
-    if (base && *base) return base;
-
-    base = getenv("TEMP");
-    if (base && *base) return base;
-
-    return "/tmp";
-}
-
-#ifdef UCSC_CRAM
-/* put the CRAM reference sequence cachdir and the url to
- * grab the reference sequence into the CRAM header
- */
-void cram_set_cache_url(htsFile *cramFile, char *cacheDir, char *refUrl) {
-    cram_fd *fd = cramFile->fp.cram;
-
-    if (cacheDir)
-        strncpy(fd->cacheDir, cacheDir, sizeof(fd->cacheDir));
-    if (refUrl)
-        strncpy(fd->refUrl, refUrl, sizeof(fd->refUrl));
-    else
-        strncpy(fd->refUrl, "http://www.ebi.ac.uk:80/ena/cram/md5/%s", sizeof(fd->refUrl));
-}
-
-/* get the url from which to grab some CRAM reference sequence */
-char *cram_get_ref_url(htsFile *cramFile) {
-    cram_fd *fd = cramFile->fp.cram;
-    return fd->refUrl;
-}
-
-/* get the cache directory for this CRAM*/
-char *cram_get_cache_dir(htsFile *cramFile) {
-    cram_fd *fd = cramFile->fp.cram;
-    return fd->cacheDir;
-}
-
-/* get the MD5 sum for this CRAM */
-char *cram_get_Md5(htsFile *cramFile) {
-    cram_fd *fd = cramFile->fp.cram;
-    return fd->md5Ref;
-}
-#endif
-
-/*
- * Queries the M5 string from the header and attempts to populate the
- * reference from this using the REF_PATH environment.
- *
- * Returns 0 on sucess
- *        -1 on failure
- */
-static int cram_populate_ref(cram_fd *fd, int id, ref_entry *r) {
-    char *ref_path = fd->refUrl;
-    SAM_hdr_type *ty;
-    SAM_hdr_tag *tag;
-    char path[PATH_MAX], path_tmp[PATH_MAX], cache[PATH_MAX];
-    char *local_cache = fd->cacheDir;
-    mFILE *mf;
-    int local_path = 0;
-
-    fd->md5Ref[0] = 0;
-    if (fd->verbose)
-	fprintf(stderr, "cram_populate_ref on fd %p, id %d\n", fd, id);
-
-    if (!ref_path || *ref_path == '\0') {
-	/*
-	 * If we have no ref path, we use the EBI server.
-	 * However to avoid spamming it we require a local ref cache too.
-	 */
-	ref_path = "http://www.ebi.ac.uk:80/ena/cram/md5/%s";
-	if (!local_cache || *local_cache == '\0') {
-	    const char *extra;
-	    const char *base = get_cache_basedir(&extra);
-	    snprintf(cache,PATH_MAX, "%s%s/hts-ref/%%2s/%%2s/%%s", base, extra);
-	    local_cache = cache;
-	    if (fd->verbose)
-		fprintf(stderr, "Populating local cache: %s\n", local_cache);
-	}
-    }
-
-    if (!r->name)
-	return -1;
-
-    if (!(ty = sam_hdr_find(fd->header, "SQ", "SN", r->name)))
-	return -1;
-
-    if (!(tag = sam_hdr_find_key(fd->header, ty, "M5", NULL)))
-	goto no_M5;
-
-    if (fd->verbose)
-	fprintf(stderr, "Querying ref %s\n", tag->str+3);
-
-    /* Use cache if available */
-    if (local_cache && *local_cache) {
-	expand_cache_path(path, local_cache, tag->str+3);
-	local_path = 1;
-    }
-
-#ifndef HAVE_MMAP
-    char *path2;
-    /* Search local files in REF_PATH; we can open them and return as above */
-    if (!local_path && (path2 = find_path(tag->str+3, ref_path))) {
-	strncpy(path, path2, PATH_MAX);
-	free(path2);
-	if (is_file(path)) // incase it's too long
-	    local_path = 1;
-    }
-#endif
-
-    /* Found via REF_CACHE or local REF_PATH file */
-    if (local_path) {
-	struct stat sb;
-	BGZF *fp;
-
-	if (0 == stat(path, &sb) && (fp = bgzf_open(path, "r"))) {
-	    r->length = sb.st_size;
-	    r->offset = r->line_length = r->bases_per_line = 0;
-
-	    r->fn = string_dup(fd->refs->pool, path);
-
-	    if (fd->refs->fp)
-		if (bgzf_close(fd->refs->fp) != 0)
-		    return -1;
-	    fd->refs->fp = fp;
-	    fd->refs->fn = r->fn;
-	    r->is_md5 = 1;
-
-	    // Fall back to cram_get_ref() where it'll do the actual
-	    // reading of the file.
-	    return 0;
-	}
-    }
-
-
-#ifdef UCSC_CRAM
-    // don't download cram reference, just pass the file name upward
-    sprintf(fd->md5Ref, "%s", tag->str+3);
-    return -1;
-#endif
-
-    /* Otherwise search full REF_PATH; slower as loads entire file */
-    if ((mf = open_path_mfile(tag->str+3, ref_path, NULL))) {
-	size_t sz;
-	r->seq = mfsteal(mf, &sz);
-	if (r->seq) {
-	    r->mf = NULL;
-	} else {
-	    // keep mf around as we couldn't detach
-	    r->seq = mf->data;
-	    r->mf = mf;
-	}
-	r->length = sz;
-	r->is_md5 = 1;
-    } else {
-	refs_t *refs;
-	char *fn;
-
-    no_M5:
-	/* Failed to find in search path or M5 cache, see if @SQ UR: tag? */
-	if (!(tag = sam_hdr_find_key(fd->header, ty, "UR", NULL)))
-	    return -1;
-
-	fn = (strncmp(tag->str+3, "file:", 5) == 0)
-	    ? tag->str+8
-	    : tag->str+3;
-
-	if (fd->refs->fp) {
-	    if (bgzf_close(fd->refs->fp) != 0)
-		return -1;
-	    fd->refs->fp = NULL;
-	}
-	if (!(refs = refs_load_fai(fd->refs, fn, 0)))
-	    return -1;
-	sanitise_SQ_lines(fd);
-
-	fd->refs = refs;
-	if (fd->refs->fp) {
-	    if (bgzf_close(fd->refs->fp) != 0)
-		return -1;
-	    fd->refs->fp = NULL;
-	}
-
-	if (!fd->refs->fn)
-	    return -1;
-
-	if (-1 == refs2id(fd->refs, fd->header))
-	    return -1;
-	if (!fd->refs->ref_id || !fd->refs->ref_id[id])
-	    return -1;
-
-	// Local copy already, so fall back to cram_get_ref().
-	return 0;
-    }
-
-#ifdef UCSC_CRAM
-    assert(!((local_cache && *local_cache))); 
-#endif
-    /* Populate the local disk cache if required */
-    if (local_cache && *local_cache) {
-	FILE *fp;
-	int i;
-
-	expand_cache_path(path, local_cache, tag->str+3);
-	if (fd->verbose)
-	    fprintf(stderr, "Path='%s'\n", path);
-	mkdir_prefix(path, 01777);
-
-	i = 0;
-	do {
-	    sprintf(path_tmp, "%s.tmp_%d", path, /*getpid(),*/ i);
-	    i++;
-	    fp = fopen(path_tmp, "wx");
-	} while (fp == NULL && errno == EEXIST);
-	if (!fp) {
-	    perror(path_tmp);
-
-	    // Not fatal - we have the data already so keep going.
-	    return 0;
-	}
-
-	if (r->length != fwrite(r->seq, 1, r->length, fp)) {
-	    perror(path);
-	}
-	if (-1 == paranoid_fclose(fp)) {
-	    unlink(path_tmp);
-	} else {
-	    if (0 == chmod(path_tmp, 0444))
-		rename(path_tmp, path);
-	    else
-		unlink(path_tmp);
-	}
-    }
-
-    return 0;
-}
-
-static void cram_ref_incr_locked(refs_t *r, int id) {
-    RP("%d INC REF %d, %d %p\n", gettid(), id, (int)(id>=0?r->ref_id[id]->count+1:-999), id>=0?r->ref_id[id]->seq:(char *)1);
-
-    if (id < 0 || !r->ref_id[id]->seq)
-	return;
-
-    if (r->last_id == id)
-	r->last_id = -1;
-
-    ++r->ref_id[id]->count;
-}
-
-void cram_ref_incr(refs_t *r, int id) {
-    pthread_mutex_lock(&r->lock);
-    cram_ref_incr_locked(r, id);
-    pthread_mutex_unlock(&r->lock);
-}
-
-static void cram_ref_decr_locked(refs_t *r, int id) {
-    RP("%d DEC REF %d, %d %p\n", gettid(), id, (int)(id>=0?r->ref_id[id]->count-1:-999), id>=0?r->ref_id[id]->seq:(char *)1);
-
-    if (id < 0 || !r->ref_id[id]->seq) {
-	assert(r->ref_id[id]->count >= 0);
-	return;
-    }
-
-    if (--r->ref_id[id]->count <= 0) {
-	assert(r->ref_id[id]->count == 0);
-	if (r->last_id >= 0) {
-	    if (r->ref_id[r->last_id]->count <= 0 &&
-		r->ref_id[r->last_id]->seq) {
-		RP("%d FREE REF %d (%p)\n", gettid(),
-		   r->last_id, r->ref_id[r->last_id]->seq);
-		ref_entry_free_seq(r->ref_id[r->last_id]);
-		r->ref_id[r->last_id]->length = 0;
-	    }
-	}
-	r->last_id = id;
-    }
-}
-
-void cram_ref_decr(refs_t *r, int id) {
-    pthread_mutex_lock(&r->lock);
-    cram_ref_decr_locked(r, id);
-    pthread_mutex_unlock(&r->lock);
-}
-
-/*
- * Used by cram_ref_load and cram_ref_get. The file handle will have
- * already been opened, so we can catch it. The ref_entry *e informs us
- * of whether this is a multi-line fasta file or a raw MD5 style file.
- * Either way we create a single contiguous sequence.
- *
- * Returns all or part of a reference sequence on success (malloced);
- *         NULL on failure.
- */
-static char *load_ref_portion(BGZF *fp, ref_entry *e, int start, int end) {
-    off_t offset, len;
-    char *seq;
-
-    if (end < start)
-	end = start;
-
-    /*
-     * Compute locations in file. This is trivial for the MD5 files, but
-     * is still necessary for the fasta variants.
-     */
-    offset = e->line_length
-	? e->offset + (start-1)/e->bases_per_line * e->line_length +
-	  (start-1) % e->bases_per_line
-	: start-1;
-
-    len = (e->line_length
-	   ? e->offset + (end-1)/e->bases_per_line * e->line_length + 
-	     (end-1) % e->bases_per_line
-	   : end-1) - offset + 1;
-
-    if (bgzf_useek(fp, offset, SEEK_SET) < 0) {
-	perror("bgzf_useek() on reference file");
-	return NULL;
-    }
-
-    if (len == 0 || !(seq = malloc(len))) {
-	return NULL;
-    }
-
-    if (len != bgzf_read(fp, seq, len)) {
-	perror("bgzf_read() on reference file");
-	free(seq);
-	return NULL;
-    }
-
-    /* Strip white-space if required. */
-    if (len != end-start+1) {
-	int i, j;
-	char *cp = seq;
-	char *cp_to;
-
-	for (i = j = 0; i < len; i++) {
-	    if (cp[i] >= '!' && cp[i] <= '~')
-		cp[j++] = toupper(cp[i]);
-	}
-	cp_to = cp+j;
-
-	if (cp_to - seq != end-start+1) {
-	    fprintf(stderr, "Malformed reference file?\n");
-	    free(seq);
-	    return NULL;
-	}
-    } else {
-	int i;
-	for (i = 0; i < len; i++) {
-	    seq[i] = toupper(seq[i]);
-	}
-    }
-
-    return seq;
-}
-
-/*
- * Load the entire reference 'id'.
- * This also increments the reference count by 1.
- *
- * Returns ref_entry on success;
- *         NULL on failure
- */
-ref_entry *cram_ref_load(refs_t *r, int id, int is_md5) {
-    ref_entry *e = r->ref_id[id];
-    int start = 1, end = e->length;
-    char *seq;
-
-    if (e->seq) {
-	return e;
-    }
-
-    assert(e->count == 0);
-
-    if (r->last) {
-#ifdef REF_DEBUG
-	int idx = 0;
-	for (idx = 0; idx < r->nref; idx++)
-	    if (r->last == r->ref_id[idx])
-		break;
-	RP("%d cram_ref_load DECR %d\n", gettid(), idx);
-#endif
-	assert(r->last->count > 0);
-	if (--r->last->count <= 0) {
-	    RP("%d FREE REF %d (%p)\n", gettid(), id, r->ref_id[id]->seq);
-	    if (r->last->seq)
-		ref_entry_free_seq(r->last);
-	}
-    }
-
-    /* Open file if it's not already the current open reference */
-    if (strcmp(r->fn, e->fn) || r->fp == NULL) {
-	if (r->fp)
-	    if (bgzf_close(r->fp) != 0)
-		return NULL;
-	r->fn = e->fn;
-	if (!(r->fp = bgzf_open_ref(r->fn, "r", is_md5)))
-	    return NULL;
-    }
-
-    RP("%d Loading ref %d (%d..%d)\n", gettid(), id, start, end);
-
-    if (!(seq = load_ref_portion(r->fp, e, start, end))) {
-	return NULL;
-    }
-
-    RP("%d Loaded ref %d (%d..%d) = %p\n", gettid(), id, start, end, seq);
-
-    RP("%d INC REF %d, %d\n", gettid(), id, (int)(e->count+1));
-    e->seq = seq;
-    e->mf = NULL;
-    e->count++;
-
-    /*
-     * Also keep track of last used ref so incr/decr loops on the same
-     * sequence don't cause load/free loops.
-     */
-    RP("%d cram_ref_load INCR %d => %d\n", gettid(), id, e->count+1);
-    r->last = e;
-    e->count++; 
-
-    return e;
-}
-
-/*
- * Returns a portion of a reference sequence from start to end inclusive.
- * The returned pointer is owned by either the cram_file fd or by the
- * internal refs_t structure and should not be freed  by the caller.
- *
- * The difference is whether or not this refs_t is in use by just the one
- * cram_fd or by multiples, or whether we have multiple threads accessing
- * references. In either case fd->shared will be true and we start using
- * reference counting to track the number of users of a specific reference
- * sequence.
- *
- * Otherwise the ref seq returned is allocated as part of cram_fd itself
- * and will be freed up on the next call to cram_get_ref or cram_close.
- *
- * To return the entire reference sequence, specify start as 1 and end
- * as 0.
- *
- * To cease using a reference, call cram_ref_decr().
- *
- * Returns reference on success,
- *         NULL on failure
- */
-char *cram_get_ref(cram_fd *fd, int id, int start, int end) {
-    ref_entry *r;
-    char *seq;
-    int ostart = start;
-
-    if (id == -1)
-	return NULL;
-
-    /* FIXME: axiomatic query of r->seq being true?
-     * Or shortcut for unsorted data where we load once and never free?
-     */
-
-    //fd->shared_ref = 1; // hard code for now to simplify things
-
-    pthread_mutex_lock(&fd->ref_lock);
-
-    RP("%d cram_get_ref on fd %p, id %d, range %d..%d\n", gettid(), fd, id, start, end);
-
-    /*
-     * Unsorted data implies we want to fetch an entire reference at a time.
-     * We just deal with this at the moment by claiming we're sharing
-     * references instead, which has the same requirement.
-     */
-    if (fd->unsorted)
-	fd->shared_ref = 1;
-
-
-    /* Sanity checking: does this ID exist? */
-    if (id >= fd->refs->nref) {
-	fprintf(stderr, "No reference found for id %d\n", id);
-	pthread_mutex_unlock(&fd->ref_lock);
-	return NULL;
-    }
-
-    if (!fd->refs || !fd->refs->ref_id[id]) {
-	fprintf(stderr, "No reference found for id %d\n", id);
-	pthread_mutex_unlock(&fd->ref_lock);
-	return NULL;
-    }
-
-    if (!(r = fd->refs->ref_id[id])) {
-	fprintf(stderr, "No reference found for id %d\n", id);
-	pthread_mutex_unlock(&fd->ref_lock);
-	return NULL;
-    }
-
-
-    /*
-     * It has an entry, but may not have been populated yet.
-     * Any manually loaded .fai files have their lengths known.
-     * A ref entry computed from @SQ lines (M5 or UR field) will have
-     * r->length == 0 unless it's been loaded once and verified that we have
-     * an on-disk filename for it.
-     *
-     * 19 Sep 2013: Moved the lock here as the cram_populate_ref code calls
-     * open_path_mfile and libcurl, which isn't multi-thread safe unless I
-     * rewrite my code to have one curl handle per thread.
-     */
-    pthread_mutex_lock(&fd->refs->lock);
-    if (r->length == 0) {
-	if (cram_populate_ref(fd, id, r) == -1) {
-	    fprintf(stderr, "Failed to populate reference for id %d\n", id);
-	    pthread_mutex_unlock(&fd->refs->lock);
-	    pthread_mutex_unlock(&fd->ref_lock);
-	    return NULL;
-	}
-	r = fd->refs->ref_id[id];
-	if (fd->unsorted)
-	    cram_ref_incr_locked(fd->refs, id);
-    }
-
-
-    /*
-     * We now know that we the filename containing the reference, so check
-     * for limits. If it's over half the reference we'll load all of it in
-     * memory as this will speed up subsequent calls.
-     */
-    if (end < 1)
-	end = r->length;
-    if (end >= r->length)
-	end  = r->length; 
-    assert(start >= 1);
-
-    if (end - start >= 0.5*r->length || fd->shared_ref) {
-	start = 1;
-	end = r->length;
-    }
-    
-    /*
-     * Maybe we have it cached already? If so use it.
-     *
-     * Alternatively if we don't have the sequence but we're sharing
-     * references and/or are asking for the entire length of it, then
-     * load the full reference into the refs structure and return
-     * a pointer to that one instead.
-     */
-    if (fd->shared_ref || r->seq || (start == 1 && end == r->length)) {
-	char *cp;
-
-	if (id >= 0) {
-	    if (r->seq) {
-		cram_ref_incr_locked(fd->refs, id);
-	    } else {
-		ref_entry *e;
-		if (!(e = cram_ref_load(fd->refs, id, r->is_md5))) {
-		    pthread_mutex_unlock(&fd->refs->lock);
-		    pthread_mutex_unlock(&fd->ref_lock);
-		    return NULL;
-		}
-
-		/* unsorted data implies cache ref indefinitely, to avoid
-		 * continually loading and unloading.
-		 */
-		if (fd->unsorted)
-		    cram_ref_incr_locked(fd->refs, id);
-	    }	    
-
-	    fd->ref = NULL; /* We never access it directly */
-	    fd->ref_start = 1;
-	    fd->ref_end   = r->length;
-	    fd->ref_id    = id;
-
-	    cp = fd->refs->ref_id[id]->seq + ostart-1;
-	} else {
-	    fd->ref = NULL;
-	    cp = NULL;
-	}
-
-	RP("%d cram_get_ref returning for id %d, count %d\n", gettid(), id, (int)r->count);
-
-	pthread_mutex_unlock(&fd->refs->lock);
-	pthread_mutex_unlock(&fd->ref_lock);
-	return cp;
-    }
-
-    /*
-     * Otherwise we're not sharing, we don't have a copy of it already and
-     * we're only asking for a small portion of it.
-     *
-     * In this case load up just that segment ourselves, freeing any old
-     * small segments in the process.
-     */
-
-    /* Unmapped ref ID */
-    if (id < 0) {
-	if (fd->ref_free) {
-	    free(fd->ref_free);
-	    fd->ref_free = NULL;
-	}
-	fd->ref = NULL;
-	fd->ref_id = id;
-	pthread_mutex_unlock(&fd->refs->lock);
-	pthread_mutex_unlock(&fd->ref_lock);
-	return NULL;
-    }
-
-    /* Open file if it's not already the current open reference */
-    if (strcmp(fd->refs->fn, r->fn) || fd->refs->fp == NULL) {
-	if (fd->refs->fp)
-	    if (bgzf_close(fd->refs->fp) != 0)
-		return NULL;
-	fd->refs->fn = r->fn;
-	if (!(fd->refs->fp = bgzf_open_ref(fd->refs->fn, "r", r->is_md5))) {
-	    pthread_mutex_unlock(&fd->refs->lock);
-	    pthread_mutex_unlock(&fd->ref_lock);
-	    return NULL;
-	}
-    }
-
-    if (!(fd->ref = load_ref_portion(fd->refs->fp, r, start, end))) {
-	pthread_mutex_unlock(&fd->refs->lock);
-	pthread_mutex_unlock(&fd->ref_lock);
-	return NULL;
-    }
-
-    if (fd->ref_free)
-	free(fd->ref_free);
-
-    fd->ref_id    = id;
-    fd->ref_start = start;
-    fd->ref_end   = end;
-    fd->ref_free = fd->ref;
-    seq = fd->ref;
-
-    pthread_mutex_unlock(&fd->refs->lock);
-    pthread_mutex_unlock(&fd->ref_lock);
-
-    return seq + ostart - start;
-}
-
-/*
- * If fd has been opened for reading, it may be permitted to specify 'fn'
- * as NULL and let the code auto-detect the reference by parsing the
- * SAM header @SQ lines.
- */
-int cram_load_reference(cram_fd *fd, char *fn) {
-    int ret = 0;
-
-    if (fn) {
-	fd->refs = refs_load_fai(fd->refs, fn,
-				 !(fd->embed_ref && fd->mode == 'r'));
-	fn = fd->refs ? fd->refs->fn : NULL;
-	if (!fn)
-	    ret = -1;
-	sanitise_SQ_lines(fd);
-    }
-    fd->ref_fn = fn;
-
-    if ((!fd->refs || (fd->refs->nref == 0 && !fn)) && fd->header) {
-	if (fd->refs)
-	    refs_free(fd->refs);
-	if (!(fd->refs = refs_create()))
-	    return -1;
-	if (-1 == refs_from_header(fd->refs, fd, fd->header))
-	    return -1;
-    }
-
-    if (fd->header)
-	if (-1 == refs2id(fd->refs, fd->header))
-	    return -1;
-
-    return ret;
-}
-
-/* ----------------------------------------------------------------------
- * Containers
- */
-
-/*
- * Creates a new container, specifying the maximum number of slices
- * and records permitted.
- *
- * Returns cram_container ptr on success
- *         NULL on failure
- */
-cram_container *cram_new_container(int nrec, int nslice) {
-    cram_container *c = calloc(1, sizeof(*c));
-    enum cram_DS_ID id;
-
-    if (!c)
-	return NULL;
-
-    c->curr_ref = -2;
-
-    c->max_c_rec = nrec * nslice;
-    c->curr_c_rec = 0;
-
-    c->max_rec = nrec;
-    c->record_counter = 0;
-    c->num_bases = 0;
-
-    c->max_slice = nslice;
-    c->curr_slice = 0;
-
-    c->pos_sorted = 1;
-    c->max_apos   = 0;
-    c->multi_seq  = 0;
-
-    c->bams = NULL;
-
-    if (!(c->slices = (cram_slice **)calloc(nslice, sizeof(cram_slice *))))
-	goto err;
-    c->slice = NULL;
-
-    if (!(c->comp_hdr = cram_new_compression_header()))
-	goto err;
-    c->comp_hdr_block = NULL;
-
-    for (id = DS_RN; id < DS_TN; id++)
-	if (!(c->stats[id] = cram_stats_create())) goto err;
-    
-    //c->aux_B_stats = cram_stats_create();
-
-    if (!(c->tags_used = kh_init(s_i2i)))
-	goto err;
-    c->refs_used = 0;
-
-    return c;
-
- err:
-    if (c) {
-	if (c->slices)
-	    free(c->slices);
-	free(c);
-    }
-    return NULL;
-}
-
-void cram_free_container(cram_container *c) {
-    enum cram_DS_ID id;
-    int i;
-
-    if (!c)
-	return;
-
-    if (c->refs_used)
-	free(c->refs_used);
-
-    if (c->landmark)
-	free(c->landmark);
-
-    if (c->comp_hdr)
-	cram_free_compression_header(c->comp_hdr);
-
-    if (c->comp_hdr_block)
-	cram_free_block(c->comp_hdr_block);
-
-    if (c->slices) {
-	for (i = 0; i < c->max_slice; i++)
-	    if (c->slices[i])
-		cram_free_slice(c->slices[i]);
-	free(c->slices);
-    }
-
-    for (id = DS_RN; id < DS_TN; id++)
-	if (c->stats[id]) cram_stats_free(c->stats[id]);
-
-    //if (c->aux_B_stats) cram_stats_free(c->aux_B_stats);
-    
-    if (c->tags_used) kh_destroy(s_i2i, c->tags_used);
-
-    free(c);
-}
-
-/*
- * Reads a container header.
- *
- * Returns cram_container on success
- *         NULL on failure or no container left (fd->err == 0).
- */
-cram_container *cram_read_container(cram_fd *fd) {
-    cram_container c2, *c;
-    int i, s;
-    size_t rd = 0;
-    uint32_t crc = 0;
-    
-    fd->err = 0;
-    fd->eof = 0;
-
-    memset(&c2, 0, sizeof(c2));
-    if (CRAM_MAJOR_VERS(fd->version) == 1) {
-	if ((s = itf8_decode_crc(fd, &c2.length, &crc)) == -1) {
-	    fd->eof = fd->empty_container ? 1 : 2;
-	    return NULL;
-	} else {
-	    rd+=s;
-	}
-    } else {
-	uint32_t len;
-	if ((s = int32_decode(fd, &c2.length)) == -1) {
-	    if (CRAM_MAJOR_VERS(fd->version) == 2 &&
-		CRAM_MINOR_VERS(fd->version) == 0)
-		fd->eof = 1; // EOF blocks arrived in v2.1
-	    else
-		fd->eof = fd->empty_container ? 1 : 2;
-	    return NULL;
-	} else {
-	    rd+=s;
-	}
-	len = le_int4(c2.length);
-	crc = crc32(0L, (unsigned char *)&len, 4);
-    }
-    if ((s = itf8_decode_crc(fd, &c2.ref_seq_id, &crc))   == -1) return NULL; else rd+=s;
-    if ((s = itf8_decode_crc(fd, &c2.ref_seq_start, &crc))== -1) return NULL; else rd+=s;
-    if ((s = itf8_decode_crc(fd, &c2.ref_seq_span, &crc)) == -1) return NULL; else rd+=s;
-    if ((s = itf8_decode_crc(fd, &c2.num_records, &crc))  == -1) return NULL; else rd+=s;
-
-    if (CRAM_MAJOR_VERS(fd->version) == 1) {
-	c2.record_counter = 0;
-	c2.num_bases = 0;
-    } else {
-	if (CRAM_MAJOR_VERS(fd->version) >= 3) {
-	    if ((s = ltf8_decode_crc(fd, &c2.record_counter, &crc)) == -1)
-		return NULL;
-	    else
-		rd += s;
-	} else {
-	    int32_t i32;
-	    if ((s = itf8_decode_crc(fd, &i32, &crc)) == -1)
-		return NULL;
-	    else
-		rd += s;
-	    c2.record_counter = i32;
-	}
-
-	if ((s = ltf8_decode_crc(fd, &c2.num_bases, &crc))== -1)
-	    return NULL;
-	else
-	    rd += s;
-    }
-    if ((s = itf8_decode_crc(fd, &c2.num_blocks, &crc))   == -1) return NULL; else rd+=s;
-    if ((s = itf8_decode_crc(fd, &c2.num_landmarks, &crc))== -1) return NULL; else rd+=s;
-
-    if (!(c = calloc(1, sizeof(*c))))
-	return NULL;
-
-    *c = c2;
-
-    if (!(c->landmark = malloc(c->num_landmarks * sizeof(int32_t))) &&
-	c->num_landmarks) {
-	fd->err = errno;
-	cram_free_container(c);
-	return NULL;
-    }  
-    for (i = 0; i < c->num_landmarks; i++) {
-	if ((s = itf8_decode_crc(fd, &c->landmark[i], &crc)) == -1) {
-	    cram_free_container(c);
-	    return NULL;
-	} else {
-	    rd += s;
-	}
-    }
-
-    if (CRAM_MAJOR_VERS(fd->version) >= 3) {
-	if (-1 == int32_decode(fd, (int32_t *)&c->crc32))
-	    return NULL;
-	else
-	    rd+=4;
-
-	if (crc != c->crc32) {
-	    fprintf(stderr, "Container header CRC32 failure\n");
-	    cram_free_container(c);
-	    return NULL;
-	}
-    }
-
-    c->offset = rd;
-    c->slices = NULL;
-    c->curr_slice = 0;
-    c->max_slice = c->num_landmarks;
-    c->slice_rec = 0;
-    c->curr_rec = 0;
-    c->max_rec = 0;
-
-    if (c->ref_seq_id == -2) {
-	c->multi_seq = 1;
-	fd->multi_seq = 1;
-    }
-
-    fd->empty_container =
-	(c->num_records == 0 &&
-	 c->ref_seq_id == -1 &&
-	 c->ref_seq_start == 0x454f46 /* EOF */) ? 1 : 0;
-
-    return c;
-}
-
-
-/* MAXIMUM storage size needed for the container. */
-int cram_container_size(cram_container *c) {
-    return 55 + 5*c->num_landmarks;
-}
-
-
-/*
- * Stores the container structure in dat and returns *size as the
- * number of bytes written to dat[].  The input size of dat is also
- * held in *size and should be initialised to cram_container_size(c).
- *
- * Returns 0 on success;
- *        -1 on failure
- */
-int cram_store_container(cram_fd *fd, cram_container *c, char *dat, int *size)
-{
-    char *cp = dat;
-    int i;
-
-    // Check the input buffer is large enough according to our stated
-    // requirements. (NOTE: it may actually take less.)
-    if (cram_container_size(c) > *size)
-        return -1;
-
-    if (CRAM_MAJOR_VERS(fd->version) == 1) {
-	cp += itf8_put(cp, c->length);
-    } else {
-	*(int32_t *)cp = le_int4(c->length);
-	cp += 4;
-    }
-    if (c->multi_seq) {
-	cp += itf8_put(cp, -2);
-	cp += itf8_put(cp, 0);
-	cp += itf8_put(cp, 0);
-    } else {
-	cp += itf8_put(cp, c->ref_seq_id);
-	cp += itf8_put(cp, c->ref_seq_start);
-	cp += itf8_put(cp, c->ref_seq_span);
-    }
-    cp += itf8_put(cp, c->num_records);
-    if (CRAM_MAJOR_VERS(fd->version) == 2) {
-	cp += itf8_put(cp, c->record_counter);
-	cp += ltf8_put(cp, c->num_bases);
-    } else if (CRAM_MAJOR_VERS(fd->version) >= 3) {
-	cp += ltf8_put(cp, c->record_counter);
-	cp += ltf8_put(cp, c->num_bases);
-    }
-
-    cp += itf8_put(cp, c->num_blocks);
-    cp += itf8_put(cp, c->num_landmarks);
-    for (i = 0; i < c->num_landmarks; i++)
-	cp += itf8_put(cp, c->landmark[i]);
-
-    if (CRAM_MAJOR_VERS(fd->version) >= 3) {
-	c->crc32 = crc32(0L, (uc *)dat, cp-dat);
-	cp[0] =  c->crc32        & 0xff;
-	cp[1] = (c->crc32 >>  8) & 0xff;
-	cp[2] = (c->crc32 >> 16) & 0xff;
-	cp[3] = (c->crc32 >> 24) & 0xff;
-	cp += 4;
-    }
-
-    *size = cp-dat; // actual used size
-
-    return 0;
-}
-
-
-/*
- * Writes a container structure.
- *
- * Returns 0 on success
- *        -1 on failure
- */
-int cram_write_container(cram_fd *fd, cram_container *c) {
-    char buf_a[1024], *buf = buf_a, *cp;
-    int i;
-
-    if (55 + c->num_landmarks * 5 >= 1024)
-	buf = malloc(55 + c->num_landmarks * 5);
-    cp = buf;
-
-    if (CRAM_MAJOR_VERS(fd->version) == 1) {
-	cp += itf8_put(cp, c->length);
-    } else {
-	*(int32_t *)cp = le_int4(c->length);
-	cp += 4;
-    }
-    if (c->multi_seq) {
-	cp += itf8_put(cp, -2);
-	cp += itf8_put(cp, 0);
-	cp += itf8_put(cp, 0);
-    } else {
-	cp += itf8_put(cp, c->ref_seq_id);
-	cp += itf8_put(cp, c->ref_seq_start);
-	cp += itf8_put(cp, c->ref_seq_span);
-    }
-    cp += itf8_put(cp, c->num_records);
-    if (CRAM_MAJOR_VERS(fd->version) == 2) {
-	cp += itf8_put(cp, c->record_counter);
-	cp += ltf8_put(cp, c->num_bases);
-    } else if (CRAM_MAJOR_VERS(fd->version) >= 3) {
-	cp += ltf8_put(cp, c->record_counter);
-	cp += ltf8_put(cp, c->num_bases);
-    }
-
-    cp += itf8_put(cp, c->num_blocks);
-    cp += itf8_put(cp, c->num_landmarks);
-    for (i = 0; i < c->num_landmarks; i++)
-	cp += itf8_put(cp, c->landmark[i]);
-
-    if (CRAM_MAJOR_VERS(fd->version) >= 3) {
-	c->crc32 = crc32(0L, (uc *)buf, cp-buf);
-	cp[0] =  c->crc32        & 0xff;
-	cp[1] = (c->crc32 >>  8) & 0xff;
-	cp[2] = (c->crc32 >> 16) & 0xff;
-	cp[3] = (c->crc32 >> 24) & 0xff;
-	cp += 4;
-    }
-
-    if (cp-buf != hwrite(fd->fp, buf, cp-buf)) {
-	if (buf != buf_a)
-	    free(buf);
-	return -1;
-    }
-
-    if (buf != buf_a)
-	free(buf);
-
-    return 0;
-}
-
-// common component shared by cram_flush_container{,_mt}
-static int cram_flush_container2(cram_fd *fd, cram_container *c) {
-    int i, j;
-
-    if (c->curr_slice > 0 && !c->slices)
-	return -1;
-
-    //fprintf(stderr, "Writing container %d, sum %u\n", c->record_counter, sum);
-
-    /* Write the container struct itself */
-    if (0 != cram_write_container(fd, c))
-	return -1;
-
-    /* And the compression header */
-    if (0 != cram_write_block(fd, c->comp_hdr_block))
-	return -1;
-
-    /* Followed by the slice blocks */
-    for (i = 0; i < c->curr_slice; i++) {
-	cram_slice *s = c->slices[i];
-
-	if (0 != cram_write_block(fd, s->hdr_block))
-	    return -1;
-
-	for (j = 0; j < s->hdr->num_blocks; j++) {
-	    if (0 != cram_write_block(fd, s->block[j]))
-		return -1;
-	}
-    }
-
-    return hflush(fd->fp) == 0 ? 0 : -1;
-}
-
-/*
- * Flushes a completely or partially full container to disk, writing
- * container structure, header and blocks. This also calls the encoder
- * functions.
- *
- * Returns 0 on success
- *        -1 on failure
- */
-int cram_flush_container(cram_fd *fd, cram_container *c) {
-    /* Encode the container blocks and generate compression header */
-    if (0 != cram_encode_container(fd, c))
-	return -1;
-
-    return cram_flush_container2(fd, c);
-}
-
-typedef struct {
-    cram_fd *fd;
-    cram_container *c;
-} cram_job;
-
-void *cram_flush_thread(void *arg) {
-    cram_job *j = (cram_job *)arg;
-
-    /* Encode the container blocks and generate compression header */
-    if (0 != cram_encode_container(j->fd, j->c)) {
-	fprintf(stderr, "cram_encode_container failed\n");
-	return NULL;
-    }
-
-    return arg;
-}
-
-static int cram_flush_result(cram_fd *fd) {
-    int i, ret = 0;
-    t_pool_result *r;
-
-    while ((r = t_pool_next_result(fd->rqueue))) {
-	cram_job *j = (cram_job *)r->data;
-	cram_container *c;
-
-	if (!j) {
-	    t_pool_delete_result(r, 0);
-	    return -1;
-	}
-
-	fd = j->fd;
-	c = j->c;
-
-	if (0 != cram_flush_container2(fd, c))
-	    return -1;
-
-	/* Free the container */
-	for (i = 0; i < c->max_slice; i++) {
-	    cram_free_slice(c->slices[i]);
-	    c->slices[i] = NULL;
-	}
-
-	c->slice = NULL;
-	c->curr_slice = 0;
-
-	cram_free_container(c);
-
-	ret |= hflush(fd->fp) == 0 ? 0 : -1;
-
-	t_pool_delete_result(r, 1);
-    }
-
-    return ret;
-}
-
-int cram_flush_container_mt(cram_fd *fd, cram_container *c) {
-    cram_job *j;
-
-    if (!fd->pool)
-	return cram_flush_container(fd, c);
-
-    if (!(j = malloc(sizeof(*j))))
-	return -1;
-    j->fd = fd;
-    j->c = c;
-    
-    t_pool_dispatch(fd->pool, fd->rqueue, cram_flush_thread, j);
-
-    return cram_flush_result(fd);
-}
-
-/* ----------------------------------------------------------------------
- * Compression headers; the first part of the container
- */
-
-/*
- * Creates a new blank container compression header
- *
- * Returns header ptr on success
- *         NULL on failure
- */
-cram_block_compression_hdr *cram_new_compression_header(void) {
-    cram_block_compression_hdr *hdr = calloc(1, sizeof(*hdr));
-    if (!hdr)
-	return NULL;
-
-    if (!(hdr->TD_blk = cram_new_block(CORE, 0))) {
-	free(hdr);
-	return NULL;
-    }
-
-    if (!(hdr->TD_hash = kh_init(m_s2i))) {
-	cram_free_block(hdr->TD_blk);
-	free(hdr);
-	return NULL;
-    }
-
-    if (!(hdr->TD_keys = string_pool_create(8192))) {
-	kh_destroy(m_s2i, hdr->TD_hash);
-	cram_free_block(hdr->TD_blk);
-	free(hdr);
-	return NULL;
-    }
-
-    return hdr;
-}
-
-void cram_free_compression_header(cram_block_compression_hdr *hdr) {
-    int i;
-
-    if (hdr->landmark)
-	free(hdr->landmark);
-
-    if (hdr->preservation_map)
-	kh_destroy(map, hdr->preservation_map);
-
-    for (i = 0; i < CRAM_MAP_HASH; i++) {
-	cram_map *m, *m2;
-	for (m = hdr->rec_encoding_map[i]; m; m = m2) {
-	    m2 = m->next;
-	    if (m->codec)
-		m->codec->free(m->codec);
-	    free(m);
-	}
-    }
-
-    for (i = 0; i < CRAM_MAP_HASH; i++) {
-	cram_map *m, *m2;
-	for (m = hdr->tag_encoding_map[i]; m; m = m2) {
-	    m2 = m->next;
-	    if (m->codec)
-		m->codec->free(m->codec);
-	    free(m);
-	}
-    }
-
-    for (i = 0; i < DS_END; i++) {
-	if (hdr->codecs[i])
-	    hdr->codecs[i]->free(hdr->codecs[i]);
-    }
-
-    if (hdr->TL)
-	free(hdr->TL);
-    if (hdr->TD_blk)
-	cram_free_block(hdr->TD_blk);
-    if (hdr->TD_hash)
-	kh_destroy(m_s2i, hdr->TD_hash);
-    if (hdr->TD_keys)
-	string_pool_destroy(hdr->TD_keys);
-
-    free(hdr);
-}
-
-
-/* ----------------------------------------------------------------------
- * Slices and slice headers
- */
-
-void cram_free_slice_header(cram_block_slice_hdr *hdr) {
-    if (!hdr)
-	return;
-
-    if (hdr->block_content_ids)
-	free(hdr->block_content_ids);
-
-    free(hdr);
-
-    return;
-}
-
-void cram_free_slice(cram_slice *s) {
-    if (!s)
-	return;
-
-    if (s->hdr_block)
-	cram_free_block(s->hdr_block);
-
-    if (s->block) {
-	int i;
-
-	if (s->hdr) {
-	    for (i = 0; i < s->hdr->num_blocks; i++) {
-		cram_free_block(s->block[i]);
-	    }
-	}
-	free(s->block);
-    }
-
-    if (s->block_by_id)
-	free(s->block_by_id);
-
-    if (s->hdr)
-	cram_free_slice_header(s->hdr);
-
-    if (s->seqs_blk)
-	cram_free_block(s->seqs_blk);
-
-    if (s->qual_blk)
-	cram_free_block(s->qual_blk);
-
-    if (s->name_blk)
-	cram_free_block(s->name_blk);
-
-    if (s->aux_blk)
-	cram_free_block(s->aux_blk);
-
-    if (s->aux_OQ_blk)
-	cram_free_block(s->aux_OQ_blk);
-
-    if (s->aux_BQ_blk)
-	cram_free_block(s->aux_BQ_blk);
-
-    if (s->aux_FZ_blk)
-	cram_free_block(s->aux_FZ_blk);
-
-    if (s->aux_oq_blk)
-	cram_free_block(s->aux_oq_blk);
-
-    if (s->aux_os_blk)
-	cram_free_block(s->aux_os_blk);
-
-    if (s->aux_oz_blk)
-	cram_free_block(s->aux_oz_blk);
-
-    if (s->base_blk)
-	cram_free_block(s->base_blk);
-
-    if (s->soft_blk)
-	cram_free_block(s->soft_blk);
-
-    if (s->cigar)
-	free(s->cigar);
-
-    if (s->crecs)
-	free(s->crecs);
-
-    if (s->features)
-	free(s->features);
-
-    if (s->TN)
-	free(s->TN);
-
-    if (s->pair_keys)
-	string_pool_destroy(s->pair_keys);
-
-    if (s->pair[0])
-	kh_destroy(m_s2i, s->pair[0]);
-    if (s->pair[1])
-	kh_destroy(m_s2i, s->pair[1]);
-
-    free(s);
-}
-
-/*
- * Creates a new empty slice in memory, for subsequent writing to
- * disk.
- *
- * Returns cram_slice ptr on success
- *         NULL on failure
- */
-cram_slice *cram_new_slice(enum cram_content_type type, int nrecs) {
-    cram_slice *s = calloc(1, sizeof(*s));
-    if (!s)
-	return NULL;
-
-    if (!(s->hdr = (cram_block_slice_hdr *)calloc(1, sizeof(*s->hdr))))
-	goto err;
-    s->hdr->content_type = type;
-
-    s->hdr_block = NULL;
-    s->block = NULL;
-    s->block_by_id = NULL;
-    s->last_apos = 0;
-    if (!(s->crecs = malloc(nrecs * sizeof(cram_record))))  goto err;
-    s->cigar = NULL;
-    s->cigar_alloc = 0;
-    s->ncigar = 0;
-
-    if (!(s->seqs_blk = cram_new_block(EXTERNAL, 0)))       goto err;
-    if (!(s->qual_blk = cram_new_block(EXTERNAL, DS_QS)))   goto err;
-    if (!(s->name_blk = cram_new_block(EXTERNAL, DS_RN)))   goto err;
-    if (!(s->aux_blk  = cram_new_block(EXTERNAL, DS_aux)))  goto err;
-    if (!(s->base_blk = cram_new_block(EXTERNAL, DS_IN)))   goto err;
-    if (!(s->soft_blk = cram_new_block(EXTERNAL, DS_SC)))   goto err;
-
-    s->features = NULL;
-    s->nfeatures = s->afeatures = 0;
-
-#ifndef TN_external
-    s->TN = NULL;
-    s->nTN = s->aTN = 0;
-#endif
-
-    // Volatile keys as we do realloc in dstring
-    if (!(s->pair_keys = string_pool_create(8192))) goto err;
-    if (!(s->pair[0] = kh_init(m_s2i)))             goto err;
-    if (!(s->pair[1] = kh_init(m_s2i)))             goto err;
-    
-#ifdef BA_external
-    s->BA_len = 0;
-#endif
-
-    return s;
-
- err:
-    if (s)
-	cram_free_slice(s);
-
-    return NULL;
-}
-
-/*
- * Loads an entire slice.
- * FIXME: In 1.0 the native unit of slices within CRAM is broken
- * as slices contain references to objects in other slices.
- * To work around this while keeping the slice oriented outer loop
- * we read all slices and stitch them together into a fake large
- * slice instead.
- *
- * Returns cram_slice ptr on success
- *         NULL on failure
- */
-cram_slice *cram_read_slice(cram_fd *fd) {
-    cram_block *b = cram_read_block(fd);
-    cram_slice *s = calloc(1, sizeof(*s));
-    int i, n, max_id, min_id;
-
-    if (!b || !s)
-	goto err;
-
-    s->hdr_block = b;
-    switch (b->content_type) {
-    case MAPPED_SLICE:
-    case UNMAPPED_SLICE:
-	if (!(s->hdr = cram_decode_slice_header(fd, b)))
-	    goto err;
-	break;
-
-    default:
-	fprintf(stderr, "Unexpected block of type %s\n",
-		cram_content_type2str(b->content_type));
-	goto err;
-    }
-
-    if (s->hdr->num_blocks < 1) {
-        fprintf(stderr, "Slice does not include any data blocks.\n");
-	goto err;
-    }
-
-    s->block = calloc(n = s->hdr->num_blocks, sizeof(*s->block));
-    if (!s->block)
-	goto err;
-
-    for (max_id = i = 0, min_id = INT_MAX; i < n; i++) {
-	if (!(s->block[i] = cram_read_block(fd)))
-	    goto err;
-
-	if (s->block[i]->content_type == EXTERNAL) {
-	    if (max_id < s->block[i]->content_id)
-		max_id = s->block[i]->content_id;
-	    if (min_id > s->block[i]->content_id)
-		min_id = s->block[i]->content_id;
-	}
-    }
-    if (min_id >= 0 && max_id < 1024) {
-	if (!(s->block_by_id = calloc(1024, sizeof(s->block[0]))))
-	    goto err;
-
-	for (i = 0; i < n; i++) {
-	    if (s->block[i]->content_type != EXTERNAL)
-		continue;
-	    s->block_by_id[s->block[i]->content_id] = s->block[i];
-	}
-    }
-
-    /* Initialise encoding/decoding tables */
-    s->cigar = NULL;
-    s->cigar_alloc = 0;
-    s->ncigar = 0;
-
-    if (!(s->seqs_blk = cram_new_block(EXTERNAL, 0)))      goto err;
-    if (!(s->qual_blk = cram_new_block(EXTERNAL, DS_QS)))  goto err;
-    if (!(s->name_blk = cram_new_block(EXTERNAL, DS_RN)))  goto err;
-    if (!(s->aux_blk  = cram_new_block(EXTERNAL, DS_aux))) goto err;
-    if (!(s->base_blk = cram_new_block(EXTERNAL, DS_IN)))  goto err;
-    if (!(s->soft_blk = cram_new_block(EXTERNAL, DS_SC)))  goto err;
-
-    s->crecs = NULL;
-
-    s->last_apos = s->hdr->ref_seq_start;
-    
-    return s;
-
- err:
-    if (b)
-	cram_free_block(b);
-    if (s) {
-	s->hdr_block = NULL;
-	cram_free_slice(s);
-    }
-    return NULL;
-}
-
-
-/* ----------------------------------------------------------------------
- * CRAM file definition (header)
- */
-
-/*
- * Reads a CRAM file definition structure.
- * Returns file_def ptr on success
- *         NULL on failure
- */
-cram_file_def *cram_read_file_def(cram_fd *fd) {
-    cram_file_def *def = malloc(sizeof(*def));
-    if (!def)
-	return NULL;
-
-    if (26 != hread(fd->fp, &def->magic[0], 26)) {
-	free(def);
-	return NULL;
-    }
-
-    if (memcmp(def->magic, "CRAM", 4) != 0) {
-	free(def);
-	return NULL;
-    }
-
-    if (def->major_version > 3) {
-	fprintf(stderr, "CRAM version number mismatch\n"
-		"Expected 1.x, 2.x or 3.x, got %d.%d\n",
-		def->major_version, def->minor_version);
-	free(def);
-	return NULL;
-    }
-
-    fd->first_container += 26;
-    fd->last_slice = 0;
-
-    return def;
-}
-
-/*
- * Writes a cram_file_def structure to cram_fd.
- * Returns 0 on success
- *        -1 on failure
- */
-int cram_write_file_def(cram_fd *fd, cram_file_def *def) {
-    return (hwrite(fd->fp, &def->magic[0], 26) == 26) ? 0 : -1;
-}
-
-void cram_free_file_def(cram_file_def *def) {
-    if (def) free(def);
-}
-
-/* ----------------------------------------------------------------------
- * SAM header I/O
- */
-
-
-/*
- * Reads the SAM header from the first CRAM data block.
- * Also performs minimal parsing to extract read-group
- * and sample information.
-
- * Returns SAM hdr ptr on success
- *         NULL on failure
- */
-SAM_hdr *cram_read_SAM_hdr(cram_fd *fd) {
-    int32_t header_len;
-    char *header;
-    SAM_hdr *hdr;
-
-    /* 1.1 onwards stores the header in the first block of a container */
-    if (CRAM_MAJOR_VERS(fd->version) == 1) {
-	/* Length */
-	if (-1 == int32_decode(fd, &header_len))
-	    return NULL;
-
-	/* Alloc and read */
-	if (header_len < 0 || NULL == (header = malloc((size_t) header_len+1)))
-	    return NULL;
-
-	if (header_len != hread(fd->fp, header, header_len))
-	    return NULL;
-	header[header_len] = '\0';
-
-	fd->first_container += 4 + header_len;
-    } else {
-	cram_container *c = cram_read_container(fd);
-	cram_block *b;
-	int i, len;
-
-	if (!c)
-	    return NULL;
-
-	if (c->num_blocks < 1) {
-	    cram_free_container(c);
-	    return NULL;
-	}
-
-	if (!(b = cram_read_block(fd))) {
-	    cram_free_container(c);
-	    return NULL;
-	}
-	if (cram_uncompress_block(b) != 0) {
-	    cram_free_container(c);
-	    return NULL;
-	}
-
-	len = b->comp_size + 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) +
-	    itf8_size(b->content_id) + 
-	    itf8_size(b->uncomp_size) + 
-	    itf8_size(b->comp_size);
-
-	/* Extract header from 1st block */
-	if (-1 == int32_get_blk(b, &header_len) ||
-            header_len < 0 || /* Spec. says signed...  why? */
-	    b->uncomp_size - 4 < header_len) {
-	    cram_free_container(c);
-	    cram_free_block(b);
-	    return NULL;
-	}
-	if (NULL == (header = malloc((size_t) header_len+1))) {
-	    cram_free_container(c);
-	    cram_free_block(b);
-	    return NULL;
-	}
-	memcpy(header, BLOCK_END(b), header_len);
-	header[header_len]='\0';
-	cram_free_block(b);
-
-	/* Consume any remaining blocks */
-	for (i = 1; i < c->num_blocks; i++) {
-	    if (!(b = cram_read_block(fd))) {
-		cram_free_container(c);
-		return NULL;
-	    }
-	    len += b->comp_size + 2 + 4*(CRAM_MAJOR_VERS(fd->version) >= 3) +
-		itf8_size(b->content_id) + 
-		itf8_size(b->uncomp_size) + 
-		itf8_size(b->comp_size);
-	    cram_free_block(b);
-	}
-
-	if (c->length > 0 && len > 0 && c->length > len) {
-	    // Consume padding
-	    char *pads = malloc(c->length - len);
-	    if (!pads) {
-		cram_free_container(c);
-		return NULL;
-	    }
-
-	    if (c->length - len != hread(fd->fp, pads, c->length - len)) {
-		cram_free_container(c);
-		return NULL;
-	    }
-	    free(pads);
-	}
-
-	cram_free_container(c);
-    }
-
-    /* Parse */
-    hdr = sam_hdr_parse_(header, header_len);
-    free(header);
-
-    return hdr;
-}
-
-/*
- * Converts 'in' to a full pathname to store in out.
- * Out must be at least PATH_MAX bytes long.
- */
-static void full_path(char *out, char *in) {
-    if (*in == '/') {
-	strncpy(out, in, PATH_MAX);
-	out[PATH_MAX-1] = 0;
-    } else {
-	int len;
-
-	// unable to get dir or out+in is too long
-	if (!getcwd(out, PATH_MAX) ||
-	    (len = strlen(out))+1+strlen(in) >= PATH_MAX) {
-	    strncpy(out, in, PATH_MAX);
-	    out[PATH_MAX-1] = 0;
-	    return;
-	}
-
-	sprintf(out+len, "/%.*s", PATH_MAX - len, in);
-
-	// FIXME: cope with `pwd`/../../../foo.fa ?
-    }
-}
-
-/*
- * Writes a CRAM SAM header.
- * Returns 0 on success
- *        -1 on failure
- */
-int cram_write_SAM_hdr(cram_fd *fd, SAM_hdr *hdr) {
-    int header_len;
-    int blank_block = (CRAM_MAJOR_VERS(fd->version) >= 3);
-
-    /* Write CRAM MAGIC if not yet written. */
-    if (fd->file_def->major_version == 0) {
-	fd->file_def->major_version = CRAM_MAJOR_VERS(fd->version);
-	fd->file_def->minor_version = CRAM_MINOR_VERS(fd->version);
-	if (0 != cram_write_file_def(fd, fd->file_def))
-	    return -1;
-    }
-
-    /* 1.0 requires an UNKNOWN read-group */
-    if (CRAM_MAJOR_VERS(fd->version) == 1) {
-	if (!sam_hdr_find_rg(hdr, "UNKNOWN"))
-	    if (sam_hdr_add(hdr, "RG",
-			    "ID", "UNKNOWN", "SM", "UNKNOWN", NULL))
-		return -1;
-    }
-
-    /* Fix M5 strings */
-    if (fd->refs && !fd->no_ref) {
-	int i;
-	for (i = 0; i < hdr->nref; i++) {
-	    SAM_hdr_type *ty;
-	    char *ref;
-
-	    if (!(ty = sam_hdr_find(hdr, "SQ", "SN", hdr->ref[i].name)))
-		return -1;
-
-	    if (!sam_hdr_find_key(hdr, ty, "M5", NULL)) {
-		char unsigned buf[16];
-		char buf2[33];
-		int rlen;
-		hts_md5_context *md5;
-
-		if (!fd->refs ||
-		    !fd->refs->ref_id ||
-		    !fd->refs->ref_id[i]) {
-		    return -1;
-		}
-		rlen = fd->refs->ref_id[i]->length;
-		if (!(md5 = hts_md5_init()))
-		    return -1;
-		ref = cram_get_ref(fd, i, 1, rlen);
-		if (NULL == ref) return -1;
-		rlen = fd->refs->ref_id[i]->length; /* In case it just loaded */
-		hts_md5_update(md5, ref, rlen);
-		hts_md5_final(buf, md5);
-		hts_md5_destroy(md5);
-		cram_ref_decr(fd->refs, i);
-
-		hts_md5_hex(buf2, buf);
-		if (sam_hdr_update(hdr, ty, "M5", buf2, NULL))
-		    return -1;
-	    }
-
-	    if (fd->ref_fn) {
-		char ref_fn[PATH_MAX];
-		full_path(ref_fn, fd->ref_fn);
-		if (sam_hdr_update(hdr, ty, "UR", ref_fn, NULL))
-		    return -1;
-	    }
-	}
-    }
-
-    if (sam_hdr_rebuild(hdr))
-	return -1;
-
-    /* Length */
-    header_len = sam_hdr_length(hdr);
-    if (CRAM_MAJOR_VERS(fd->version) == 1) {
-	if (-1 == int32_encode(fd, header_len))
-	    return -1;
-
-	/* Text data */
-	if (header_len != hwrite(fd->fp, sam_hdr_str(hdr), header_len))
-	    return -1;
-    } else {
-	/* Create block(s) inside a container */
-	cram_block *b = cram_new_block(FILE_HEADER, 0);
-	cram_container *c = cram_new_container(0, 0);
-	int padded_length;
-	char *pads;
-	int is_cram_3 = (CRAM_MAJOR_VERS(fd->version) >= 3);
-
-	if (!b || !c) {
-	    if (b) cram_free_block(b);
-	    if (c) cram_free_container(c);
-	    return -1;
-	}
-
-	int32_put_blk(b, header_len);
-	BLOCK_APPEND(b, sam_hdr_str(hdr), header_len);
-	BLOCK_UPLEN(b);
-
-	// Compress header block if V3.0 and above
-	if (CRAM_MAJOR_VERS(fd->version) >= 3)
-	    cram_compress_block(fd, b, NULL, -1, -1);
-
-	if (blank_block) {
-	    c->length = b->comp_size + 2 + 4*is_cram_3 +
-		itf8_size(b->content_id)   + 
-		itf8_size(b->uncomp_size)  +
-		itf8_size(b->comp_size);
-
-	    c->num_blocks = 2;
-	    c->num_landmarks = 2;
-	    if (!(c->landmark = malloc(2*sizeof(*c->landmark)))) {
-		cram_free_block(b);
-		cram_free_container(c);
-		return -1;
-	    }
-	    c->landmark[0] = 0;
-	    c->landmark[1] = c->length;
-
-	    // Plus extra storage for uncompressed secondary blank block
-	    padded_length = MIN(c->length*.5, 10000);
-	    c->length += padded_length + 2 + 4*is_cram_3 +
-		itf8_size(b->content_id) + 
-		itf8_size(padded_length)*2;
-	} else {
-	    // Pad the block instead.
-	    c->num_blocks = 1;
-	    c->num_landmarks = 1;
-	    if (!(c->landmark = malloc(sizeof(*c->landmark))))
-		return -1;
-	    c->landmark[0] = 0;
-
-	    padded_length = MAX(c->length*1.5, 10000) - c->length;
-
-	    c->length = b->comp_size + padded_length +
-		2 + 4*is_cram_3 +
-		itf8_size(b->content_id)   + 
-		itf8_size(b->uncomp_size)  +
-		itf8_size(b->comp_size);
-
-	    if (NULL == (pads = calloc(1, padded_length))) {
-		cram_free_block(b);
-		cram_free_container(c);
-		return -1;
-	    }
-	    BLOCK_APPEND(b, pads, padded_length);
-	    BLOCK_UPLEN(b);
-	    free(pads);
-	}
-
-	if (-1 == cram_write_container(fd, c)) {
-	    cram_free_block(b);
-	    cram_free_container(c);
-	    return -1;
-	}
-
-	if (-1 == cram_write_block(fd, b)) {
-	    cram_free_block(b);
-	    cram_free_container(c);
-	    return -1;
-	}
-
-	if (blank_block) {
-	    BLOCK_RESIZE(b, padded_length);
-	    memset(BLOCK_DATA(b), 0, padded_length);
-	    BLOCK_SIZE(b) = padded_length;
-	    BLOCK_UPLEN(b);
-	    b->method = RAW;
-	    if (-1 == cram_write_block(fd, b)) {
-		cram_free_block(b);
-		cram_free_container(c);
-		return -1;
-	    }
-	}
-
-	cram_free_block(b);
-	cram_free_container(c);
-    }
-
-    if (-1 == refs_from_header(fd->refs, fd, fd->header))
-	return -1;
-    if (-1 == refs2id(fd->refs, fd->header))
-	return -1;
-
-    if (0 != hflush(fd->fp))
-	return -1;
-
-    RP("=== Finishing saving header ===\n");
-
-    return 0;
-}
-
-/* ----------------------------------------------------------------------
- * The top-level cram opening, closing and option handling
- */
-
-/*
- * Initialises the lookup tables. These could be global statics, but they're
- * clumsy to setup in a multi-threaded environment unless we generate
- * verbatim code and include that.
- */
-static void cram_init_tables(cram_fd *fd) {
-    int i;
-
-    memset(fd->L1, 4, 256);
-    fd->L1['A'] = 0; fd->L1['a'] = 0;
-    fd->L1['C'] = 1; fd->L1['c'] = 1;
-    fd->L1['G'] = 2; fd->L1['g'] = 2;
-    fd->L1['T'] = 3; fd->L1['t'] = 3;
-
-    memset(fd->L2, 5, 256);
-    fd->L2['A'] = 0; fd->L2['a'] = 0;
-    fd->L2['C'] = 1; fd->L2['c'] = 1;
-    fd->L2['G'] = 2; fd->L2['g'] = 2;
-    fd->L2['T'] = 3; fd->L2['t'] = 3;
-    fd->L2['N'] = 4; fd->L2['n'] = 4;
-
-    if (CRAM_MAJOR_VERS(fd->version) == 1) {
-	for (i = 0; i < 0x200; i++) {
-	    int f = 0;
-
-	    if (i & CRAM_FPAIRED)      f |= BAM_FPAIRED;
-	    if (i & CRAM_FPROPER_PAIR) f |= BAM_FPROPER_PAIR;
-	    if (i & CRAM_FUNMAP)       f |= BAM_FUNMAP;
-	    if (i & CRAM_FREVERSE)     f |= BAM_FREVERSE;
-	    if (i & CRAM_FREAD1)       f |= BAM_FREAD1;
-	    if (i & CRAM_FREAD2)       f |= BAM_FREAD2;
-	    if (i & CRAM_FSECONDARY)   f |= BAM_FSECONDARY;
-	    if (i & CRAM_FQCFAIL)      f |= BAM_FQCFAIL;
-	    if (i & CRAM_FDUP)         f |= BAM_FDUP;
-
-	    fd->bam_flag_swap[i]  = f;
-	}
-    
-	for (i = 0; i < 0x1000; i++) {
-	    int g = 0;
-
-	    if (i & BAM_FPAIRED)	   g |= CRAM_FPAIRED;
-	    if (i & BAM_FPROPER_PAIR)  g |= CRAM_FPROPER_PAIR;
-	    if (i & BAM_FUNMAP)        g |= CRAM_FUNMAP;
-	    if (i & BAM_FREVERSE)      g |= CRAM_FREVERSE;
-	    if (i & BAM_FREAD1)        g |= CRAM_FREAD1;
-	    if (i & BAM_FREAD2)        g |= CRAM_FREAD2;
-	    if (i & BAM_FSECONDARY)    g |= CRAM_FSECONDARY;
-	    if (i & BAM_FQCFAIL)       g |= CRAM_FQCFAIL;
-	    if (i & BAM_FDUP)          g |= CRAM_FDUP;
-
-	    fd->cram_flag_swap[i] = g;
-	}
-    } else {
-	/* NOP */
-	for (i = 0; i < 0x1000; i++)
-	    fd->bam_flag_swap[i] = i;
-	for (i = 0; i < 0x1000; i++)
-	    fd->cram_flag_swap[i] = i;
-    }
-
-    memset(fd->cram_sub_matrix, 4, 32*32);
-    for (i = 0; i < 32; i++) {
-	fd->cram_sub_matrix[i]['A'&0x1f]=0;
-	fd->cram_sub_matrix[i]['C'&0x1f]=1;
-	fd->cram_sub_matrix[i]['G'&0x1f]=2;
-	fd->cram_sub_matrix[i]['T'&0x1f]=3;
-	fd->cram_sub_matrix[i]['N'&0x1f]=4;
-    }
-    for (i = 0; i < 20; i+=4) {
-	int j;
-	for (j = 0; j < 20; j++) {
-	    fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3;
-	    fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3;
-	    fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3;
-	    fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][j]=3;
-	}
-	fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+0]&0x1f]=0;
-	fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+1]&0x1f]=1;
-	fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+2]&0x1f]=2;
-	fd->cram_sub_matrix["ACGTN"[i>>2]&0x1f][CRAM_SUBST_MATRIX[i+3]&0x1f]=3;
-    }
-}
-
-// Default version numbers for CRAM
-static int major_version = 3;
-static int minor_version = 0;
-
-/*
- * Opens a CRAM file for read (mode "rb") or write ("wb").
- * The filename may be "-" to indicate stdin or stdout.
- *
- * Returns file handle on success
- *         NULL on failure.
- */
-cram_fd *cram_open(const char *filename, const char *mode) {
-    hFILE *fp;
-    cram_fd *fd;
-    char fmode[3]= { mode[0], '\0', '\0' };
-
-    if (strlen(mode) > 1 && (mode[1] == 'b' || mode[1] == 'c')) {
-	fmode[1] = 'b';
-    }
-
-    fp = hopen(filename, fmode);
-    if (!fp)
-	return NULL;
-
-    fd = cram_dopen(fp, filename, mode);
-    if (!fd)
-	hclose_abruptly(fp);
-
-    return fd;
-}
-
-/* Opens an existing stream for reading or writing.
- *
- * Returns file handle on success;
- *         NULL on failure.
- */
-cram_fd *cram_dopen(hFILE *fp, const char *filename, const char *mode) {
-    int i;
-    char *cp;
-    cram_fd *fd = calloc(1, sizeof(*fd));
-    if (!fd)
-	return NULL;
-
-    fd->level = 5;
-    for (i = 0; mode[i]; i++) {
-	if (mode[i] >= '0' && mode[i] <= '9') {
-	    fd->level = mode[i] - '0';
-	    break;
-	}
-    }
-
-    fd->fp = fp;
-    fd->mode = *mode;
-    fd->first_container = 0;
-
-    if (fd->mode == 'r') {
-	/* Reader */
-
-	if (!(fd->file_def = cram_read_file_def(fd)))
-	    goto err;
-
-	fd->version = fd->file_def->major_version * 256 +
-	    fd->file_def->minor_version;
-
-	if (!(fd->header = cram_read_SAM_hdr(fd)))
-	    goto err;
-
-    } else {
-	/* Writer */
-	cram_file_def *def = calloc(1, sizeof(*def));
-	if (!def)
-	    return NULL;
-
-	fd->file_def = def;
-
-	def->magic[0] = 'C';
-	def->magic[1] = 'R';
-	def->magic[2] = 'A';
-	def->magic[3] = 'M';
-	def->major_version = 0; // Indicator to write file def later.
-	def->minor_version = 0;
-	memset(def->file_id, 0, 20);
-	strncpy(def->file_id, filename, 20);
-
-	fd->version = major_version * 256 + minor_version;
-
-	/* SAM header written later along with this file_def */
-    }
-
-    cram_init_tables(fd);
-
-    fd->prefix = strdup((cp = strrchr(filename, '/')) ? cp+1 : filename);
-    if (!fd->prefix)
-	goto err;
-    fd->first_base = fd->last_base = -1;
-    fd->record_counter = 0;
-
-    fd->ctr = NULL;
-    fd->refs  = refs_create();
-    if (!fd->refs)
-	goto err;
-    fd->ref_id = -2;
-    fd->ref = NULL;
-
-    fd->decode_md = 0;
-    fd->verbose = 0;
-    fd->seqs_per_slice = SEQS_PER_SLICE;
-    fd->slices_per_container = SLICE_PER_CNT;
-    fd->embed_ref = 0;
-    fd->no_ref = 0;
-    fd->ignore_md5 = 0;
-    fd->use_bz2 = 0;
-    fd->use_rans = (CRAM_MAJOR_VERS(fd->version) >= 3);
-    fd->use_lzma = 0;
-    fd->multi_seq = -1;
-    fd->unsorted   = 0;
-    fd->shared_ref = 0;
-
-    fd->index       = NULL;
-    fd->own_pool    = 0;
-    fd->pool        = NULL;
-    fd->rqueue      = NULL;
-    fd->job_pending = NULL;
-    fd->ooc         = 0;
-    fd->required_fields = INT_MAX;
-
-    for (i = 0; i < DS_END; i++)
-	fd->m[i] = cram_new_metrics();
-
-    fd->range.refid = -2; // no ref.
-    fd->eof = 1;          // See samtools issue #150
-    fd->ref_fn = NULL;
-
-    fd->bl = NULL;
-
-    /* Initialise dummy refs from the @SQ headers */
-    if (-1 == refs_from_header(fd->refs, fd, fd->header))
-	goto err;
-
-    return fd;
-
- err:
-    if (fd)
-	free(fd);
-
-    return NULL;
-}
-
-/*
- * Seek within a CRAM file.
- *
- * Returns 0 on success
- *        -1 on failure
- */
-int cram_seek(cram_fd *fd, off_t offset, int whence) {
-    char buf[65536];
-
-    fd->ooc = 0;
-
-    if (hseek(fd->fp, offset, whence) >= 0)
-	return 0;
-
-    if (!(whence == SEEK_CUR && offset >= 0))
-	return -1;
-
-    /* Couldn't fseek, but we're in SEEK_CUR mode so read instead */
-    while (offset > 0) {
-	int len = MIN(65536, offset);
-	if (len != hread(fd->fp, buf, len))
-	    return -1;
-	offset -= len;
-    }
-
-    return 0;
-}
-
-/*
- * Flushes a CRAM file.
- * Useful for when writing to stdout without wishing to close the stream.
- *
- * Returns 0 on success
- *        -1 on failure
- */
-int cram_flush(cram_fd *fd) {
-    if (!fd)
-	return -1;
-
-    if (fd->mode == 'w' && fd->ctr) {
-	if(fd->ctr->slice)
-	    fd->ctr->curr_slice++;
-	if (-1 == cram_flush_container_mt(fd, fd->ctr))
-	    return -1;
-    }
-
-    return 0;
-}
-
-/*
- * Closes a CRAM file.
- * Returns 0 on success
- *        -1 on failure
- */
-int cram_close(cram_fd *fd) {
-    spare_bams *bl, *next;
-    int i;
-	
-    if (!fd)
-	return -1;
-
-    if (fd->mode == 'w' && fd->ctr) {
-	if(fd->ctr->slice)
-	    fd->ctr->curr_slice++;
-	if (-1 == cram_flush_container_mt(fd, fd->ctr))
-	    return -1;
-    }
-
-    if (fd->pool && fd->eof >= 0) {
-	t_pool_flush(fd->pool);
-
-	if (0 != cram_flush_result(fd))
-	    return -1;
-
-	pthread_mutex_destroy(&fd->metrics_lock);
-	pthread_mutex_destroy(&fd->ref_lock);
-	pthread_mutex_destroy(&fd->bam_list_lock);
-
-	fd->ctr = NULL; // prevent double freeing
-
-	//fprintf(stderr, "CRAM: destroy queue %p\n", fd->rqueue);
-
-	t_results_queue_destroy(fd->rqueue);
-    }
-
-    if (fd->mode == 'w') {
-	/* Write EOF block */
-	if (CRAM_MAJOR_VERS(fd->version) == 3) {
-	    if (38 != hwrite(fd->fp,
-			     "\x0f\x00\x00\x00\xff\xff\xff\xff" // Cont HDR
-			     "\x0f\xe0\x45\x4f\x46\x00\x00\x00" // Cont HDR
-			     "\x00\x01\x00"                     // Cont HDR
-			     "\x05\xbd\xd9\x4f"                 // CRC32
-			     "\x00\x01\x00\x06\x06"             // Comp.HDR blk
-			     "\x01\x00\x01\x00\x01\x00"         // Comp.HDR blk
-			     "\xee\x63\x01\x4b",                // CRC32
-			     38))
-		return -1;
-	} else {
-	    if (30 != hwrite(fd->fp,
-			     "\x0b\x00\x00\x00\xff\xff\xff\xff"
-			     "\x0f\xe0\x45\x4f\x46\x00\x00\x00"
-			     "\x00\x01\x00\x00\x01\x00\x06\x06"
-			     "\x01\x00\x01\x00\x01\x00", 30))
-		return -1;
-	}
-    }
-
-    for (bl = fd->bl; bl; bl = next) {
-	int i, max_rec = fd->seqs_per_slice * fd->slices_per_container;
-
-	next = bl->next;
-	for (i = 0; i < max_rec; i++) {
-	    if (bl->bams[i])
-		bam_free(bl->bams[i]);
-	}
-	free(bl->bams);
-	free(bl);
-    }
-
-    if (hclose(fd->fp) != 0)
-	return -1;
-
-    if (fd->file_def)
-	cram_free_file_def(fd->file_def);
-
-    if (fd->header)
-	sam_hdr_free(fd->header);
-
-    free(fd->prefix);
-
-    if (fd->ctr)
-	cram_free_container(fd->ctr);
-
-    if (fd->refs)
-	refs_free(fd->refs);
-    if (fd->ref_free)
-        free(fd->ref_free);
-
-    for (i = 0; i < DS_END; i++)
-	if (fd->m[i])
-	    free(fd->m[i]);
-
-    if (fd->index)
-	cram_index_free(fd);
-
-    if (fd->own_pool && fd->pool)
-	t_pool_destroy(fd->pool, 0);
-
-    free(fd);
-    return 0;
-}
-
-/*
- * Returns 1 if we hit an EOF while reading.
- */
-int cram_eof(cram_fd *fd) {
-    return fd->eof;
-}
-
-
-/* 
- * Sets options on the cram_fd. See CRAM_OPT_* definitions in cram_structs.h.
- * Use this immediately after opening.
- *
- * Returns 0 on success
- *        -1 on failure
- */
-int cram_set_option(cram_fd *fd, enum hts_fmt_option opt, ...) {
-    int r;
-    va_list args;
-
-    va_start(args, opt);
-    r = cram_set_voption(fd, opt, args);
-    va_end(args);
-
-    return r;
-}
-
-/*
- * Sets options on the cram_fd. See CRAM_OPT_* definitions in cram_structs.h.
- * Use this immediately after opening.
- *
- * Returns 0 on success
- *        -1 on failure
- */
-int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args) {
-    refs_t *refs;
-
-    if (!fd) {
-	errno = EBADF;
-	return -1;
-    }
-
-    switch (opt) {
-    case CRAM_OPT_DECODE_MD:
-	fd->decode_md = va_arg(args, int);
-	break;
-
-    case CRAM_OPT_PREFIX:
-	if (fd->prefix)
-	    free(fd->prefix);
-	if (!(fd->prefix = strdup(va_arg(args, char *))))
-	    return -1;
-	break;
-
-    case CRAM_OPT_VERBOSITY:
-	fd->verbose = va_arg(args, int);
-	break;
-
-    case CRAM_OPT_SEQS_PER_SLICE:
-	fd->seqs_per_slice = va_arg(args, int);
-	break;
-
-    case CRAM_OPT_SLICES_PER_CONTAINER:
-	fd->slices_per_container = va_arg(args, int);
-	break;
-
-    case CRAM_OPT_EMBED_REF:
-	fd->embed_ref = va_arg(args, int);
-	break;
-
-    case CRAM_OPT_NO_REF:
-	fd->no_ref = va_arg(args, int);
-	break;
-
-    case CRAM_OPT_IGNORE_MD5:
-	fd->ignore_md5 = va_arg(args, int);
-	break;
-
-    case CRAM_OPT_USE_BZIP2:
-	fd->use_bz2 = va_arg(args, int);
-	break;
-
-    case CRAM_OPT_USE_RANS:
-	fd->use_rans = va_arg(args, int);
-	break;
-
-    case CRAM_OPT_USE_LZMA:
-	fd->use_lzma = va_arg(args, int);
-	break;
-
-    case CRAM_OPT_SHARED_REF:
-	fd->shared_ref = 1;
-	refs = va_arg(args, refs_t *);
-	if (refs != fd->refs) {
-	    if (fd->refs)
-		refs_free(fd->refs);
-	    fd->refs = refs;
-	    fd->refs->count++;
-	}
-	break;
-
-    case CRAM_OPT_RANGE:
-	fd->range = *va_arg(args, cram_range *);
-	return cram_seek_to_refpos(fd, &fd->range);
-
-    case CRAM_OPT_REFERENCE:
-	return cram_load_reference(fd, va_arg(args, char *));
-
-    case CRAM_OPT_VERSION: {
-	int major, minor;
-	char *s = va_arg(args, char *);
-	if (2 != sscanf(s, "%d.%d", &major, &minor)) {
-	    fprintf(stderr, "Malformed version string %s\n", s);
-	    return -1;
-	}
-	if (!((major == 1 &&  minor == 0) ||
-	      (major == 2 && (minor == 0 || minor == 1)) ||
-	      (major == 3 &&  minor == 0))) {
-	    fprintf(stderr, "Unknown version string; "
-		    "use 1.0, 2.0, 2.1 or 3.0\n");
-	    errno = EINVAL;
-	    return -1;
-	}
-	fd->version = major*256 + minor;
-
-	if (CRAM_MAJOR_VERS(fd->version) >= 3)
-	    fd->use_rans = 1;
-	break;
-    }
-
-    case CRAM_OPT_MULTI_SEQ_PER_SLICE:
-	fd->multi_seq = va_arg(args, int);
-	break;
-
-    case CRAM_OPT_NTHREADS: {
-	int nthreads =  va_arg(args, int);
-        if (nthreads > 1) {
-            if (!(fd->pool = t_pool_init(nthreads*2, nthreads)))
-                return -1;
-
-	    fd->rqueue = t_results_queue_init();
-	    pthread_mutex_init(&fd->metrics_lock, NULL);
-	    pthread_mutex_init(&fd->ref_lock, NULL);
-	    pthread_mutex_init(&fd->bam_list_lock, NULL);
-	    fd->shared_ref = 1;
-	    fd->own_pool = 1;
-        }
-	break;
-    }
-
-    case CRAM_OPT_THREAD_POOL:
-	fd->pool = va_arg(args, t_pool *);
-	if (fd->pool) {
-	    fd->rqueue = t_results_queue_init();
-	    pthread_mutex_init(&fd->metrics_lock, NULL);
-	    pthread_mutex_init(&fd->ref_lock, NULL);
-	    pthread_mutex_init(&fd->bam_list_lock, NULL);
-	}
-	fd->shared_ref = 1; // Needed to avoid clobbering ref between threads
-	fd->own_pool = 0;
-
-	//fd->qsize = 1;
-	//fd->decoded = calloc(fd->qsize, sizeof(cram_container *));
-	//t_pool_dispatch(fd->pool, cram_decoder_thread, fd);
-	break;
-
-    case CRAM_OPT_REQUIRED_FIELDS:
-	fd->required_fields = va_arg(args, int);
-	break;
-
-    case HTS_OPT_COMPRESSION_LEVEL:
-	fd->level = va_arg(args, int);
-	break;
-
-    default:
-	fprintf(stderr, "Unknown CRAM option code %d\n", opt);
-	errno = EINVAL;
-	return -1;
-    }
-
-    return 0;
-}