7df6e18265341f87a69fba808aa1f92f8ebca841
markd
  Wed Apr 15 13:39:42 2026 -0700
move copy of htslib

diff --git src/htslib/cram/cram_io.h src/htslib/cram/cram_io.h
deleted file mode 100644
index d3fe90e347e..00000000000
--- src/htslib/cram/cram_io.h
+++ /dev/null
@@ -1,669 +0,0 @@
-/*
-Copyright (c) 2012-2014 Genome Research Ltd.
-Author: James Bonfield <jkb@sanger.ac.uk>
-
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met:
-
-   1. Redistributions of source code must retain the above copyright notice, 
-this list of conditions and the following disclaimer.
-
-   2. Redistributions in binary form must reproduce the above copyright notice, 
-this list of conditions and the following disclaimer in the documentation 
-and/or other materials provided with the distribution.
-
-   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
-Institute nor the names of its contributors may be used to endorse or promote
-products derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND 
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
-DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/*! \file
- * Include cram.h instead.
- *
- * This is an internal part of the CRAM system and is automatically included
- * when you #include cram.h.
- *
- * Implements the low level CRAM I/O primitives.
- * This includes basic data types such as byte, int, ITF-8,
- * maps, bitwise I/O, etc.
- */
-
-#ifndef _CRAM_IO_H_
-#define _CRAM_IO_H_
-
-#define ITF8_MACROS
-
-#include <stdint.h>
-#include <cram/misc.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/**@{ ----------------------------------------------------------------------
- * ITF8 encoding and decoding.
- *
- * Also see the itf8_get and itf8_put macros.
- */
-
-/*! INTERNAL: Converts two characters into an integer for use in switch{} */
-#define CRAM_KEY(a,b) (((a)<<8)|((b)))
-
-/*! Reads an integer in ITF-8 encoding from 'fd' and stores it in
- * *val.
- *
- * @return
- * Returns the number of bytes read on success;
- *        -1 on failure
- */
-int itf8_decode(cram_fd *fd, int32_t *val);
-
-#ifndef ITF8_MACROS
-/*! Reads an integer in ITF-8 encoding from 'cp' and stores it in
- * *val.
- *
- * @return
- * Returns the number of bytes read on success;
- *        -1 on failure
- */
-int itf8_get(char *cp, int32_t *val_p);
-
-/*! Stores a value to memory in ITF-8 format.
- *
- * @return
- * Returns the number of bytes required to store the number.
- * This is a maximum of 5 bytes.
- */
-int itf8_put(char *cp, int32_t val);
-
-#else
-
-/*
- * Macro implementations of the above
- */
-#define itf8_get(c,v) (((uc)(c)[0]<0x80)?(*(v)=(uc)(c)[0],1):(((uc)(c)[0]<0xc0)?(*(v)=(((uc)(c)[0]<<8)|(uc)(c)[1])&0x3fff,2):(((uc)(c)[0]<0xe0)?(*(v)=(((uc)(c)[0]<<16)|((uc)(c)[1]<<8)|(uc)(c)[2])&0x1fffff,3):(((uc)(c)[0]<0xf0)?(*(v)=(((uc)(c)[0]<<24)|((uc)(c)[1]<<16)|((uc)(c)[2]<<8)|(uc)(c)[3])&0x0fffffff,4):(*(v)=(((uc)(c)[0]&0x0f)<<28)|((uc)(c)[1]<<20)|((uc)(c)[2]<<12)|((uc)(c)[3]<<4)|((uc)(c)[4]&0x0f),5)))))
-
-#define itf8_put(c,v) ((!((v)&~0x7f))?((c)[0]=(v),1):(!((v)&~0x3fff))?((c)[0]=((v)>>8)|0x80,(c)[1]=(v)&0xff,2):(!((v)&~0x1fffff))?((c)[0]=((v)>>16)|0xc0,(c)[1]=((v)>>8)&0xff,(c)[2]=(v)&0xff,3):(!((v)&~0xfffffff))?((c)[0]=((v)>>24)|0xe0,(c)[1]=((v)>>16)&0xff,(c)[2]=((v)>>8)&0xff,(c)[3]=(v)&0xff,4):((c)[0]=0xf0|(((v)>>28)&0xff),(c)[1]=((v)>>20)&0xff,(c)[2]=((v)>>12)&0xff,(c)[3]=((v)>>4)&0xff,(c)[4]=(v)&0xf,5))
-
-#define itf8_size(v) ((!((v)&~0x7f))?1:(!((v)&~0x3fff))?2:(!((v)&~0x1fffff))?3:(!((v)&~0xfffffff))?4:5)
-
-#endif
-
-int ltf8_get(char *cp, int64_t *val_p);
-int ltf8_put(char *cp, int64_t val);
-
-  /* Version of itf8_get that checks it hasn't run out of input */
-
-extern const int itf8_bytes[16];
-
-static inline int safe_itf8_get(const char *cp, const char *endp,
-                                int32_t *val_p) {
-    const unsigned char *up = (unsigned char *)cp;
-
-    if (endp - cp < 5 &&
-        (cp >= endp || endp - cp < itf8_bytes[up[0]>>4])) {
-        *val_p = 0;
-        return 0;
-    }
-
-    if (up[0] < 0x80) {
-        *val_p =   up[0];
-        return 1;
-    } else if (up[0] < 0xc0) {
-        *val_p = ((up[0] <<8) |  up[1])                           & 0x3fff;
-        return 2;
-    } else if (up[0] < 0xe0) {
-        *val_p = ((up[0]<<16) | (up[1]<< 8) |  up[2])             & 0x1fffff;
-        return 3;
-    } else if (up[0] < 0xf0) {
-        *val_p = ((up[0]<<24) | (up[1]<<16) | (up[2]<<8) | up[3]) & 0x0fffffff;
-        return 4;
-    } else {
-        *val_p = ((up[0] & 0x0f)<<28) | (up[1]<<20) | (up[2]<<12) | (up[3]<<4) | (up[4] & 0x0f);
-        return 5;
-    }
-}
-
-/*! Pushes a value in ITF8 format onto the end of a block.
- *
- * This shouldn't be used for high-volume data as it is not the fastest
- * method.
- *
- * @return
- * Returns the number of bytes written
- */
-int itf8_put_blk(cram_block *blk, int val);
-
-/*! Pulls a literal 32-bit value from a block.
- *
- * @returns the number of bytes decoded;
- *         -1 on failure.
- */
-int int32_get_blk(cram_block *b, int32_t *val);
-
-/*! Pushes a literal 32-bit value onto the end of a block.
- *
- * @return
- * Returns 0 on success;
- *        -1 on failure.
- */
-int int32_put_blk(cram_block *blk, int32_t val);
-
-
-/**@}*/
-/**@{ ----------------------------------------------------------------------
- * CRAM blocks - the dynamically growable data block. We have code to
- * create, update, (un)compress and read/write.
- *
- * These are derived from the deflate_interlaced.c blocks, but with the
- * CRAM extension of content types and IDs.
- */
-
-/*! Allocates a new cram_block structure with a specified content_type and
- * id.
- *
- * @return
- * Returns block pointer on success;
- *         NULL on failure
- */
-cram_block *cram_new_block(enum cram_content_type content_type,
-			   int content_id);
-
-/*! Reads a block from a cram file.
- *
- * @return
- * Returns cram_block pointer on success;
- *         NULL on failure
- */
-cram_block *cram_read_block(cram_fd *fd);
-
-/*! Writes a CRAM block.
- *
- * @return
- * Returns 0 on success;
- *        -1 on failure
- */
-int cram_write_block(cram_fd *fd, cram_block *b);
-
-/*! Frees a CRAM block, deallocating internal data too.
- */
-void cram_free_block(cram_block *b);
-
-/*! Uncompress a memory block using Zlib.
- *
- * @return
- * Returns 0 on success;
- *        -1 on failure
- */
-char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size);
-
-/*! Uncompresses a CRAM block, if compressed.
- *
- * @return
- * Returns 0 on success;
- *        -1 on failure
- */
-int cram_uncompress_block(cram_block *b);
-
-/*! Compresses a block.
- *
- * Compresses a block using one of two different zlib strategies. If we only
- * want one choice set strat2 to be -1.
- *
- * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED
- * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is
- * significantly faster.
- *
- * @return
- * Returns 0 on success;
- *        -1 on failure
- */
-int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics,
-			int method, int level);
-
-cram_metrics *cram_new_metrics(void);
-char *cram_block_method2str(enum cram_block_method m);
-char *cram_content_type2str(enum cram_content_type t);
-
-/*
- * Find an external block by its content_id
- */
-
-static inline cram_block *cram_get_block_by_id(cram_slice *slice, int id) {
-    if (slice->block_by_id && id >= 0 && id < 1024) {
-        return slice->block_by_id[id];
-    } else {
-        int i;
-        for (i = 0; i < slice->hdr->num_blocks; i++) {
-	    cram_block *b = slice->block[i];
-	    if (b && b->content_type == EXTERNAL && b->content_id == id)
-	        return b;
-	}
-    }
-    return NULL;
-}
-
-/* --- Accessor macros for manipulating blocks on a byte by byte basis --- */
-
-/* Block size and data pointer. */
-#define BLOCK_SIZE(b) ((b)->byte)
-#define BLOCK_DATA(b) ((b)->data)
-
-/* Returns the address one past the end of the block */
-#define BLOCK_END(b) (&(b)->data[(b)->byte])
-
-/* Request block to be at least 'l' bytes long */
-#define BLOCK_RESIZE(b,l)					\
-    do {							\
-	while((b)->alloc <= (l)) {				\
-	    (b)->alloc = (b)->alloc ? (b)->alloc*1.5 : 1024;	\
-	    (b)->data = realloc((b)->data, (b)->alloc);		\
-	}							\
-     } while(0)
-
-/* Make block exactly 'l' bytes long */
-#define BLOCK_RESIZE_EXACT(b,l)					\
-    do {							\
-        (b)->alloc = (l);                                       \
-        (b)->data = realloc((b)->data, (b)->alloc);		\
-     } while(0)
-
-/* Ensure the block can hold at least another 'l' bytes */
-#define BLOCK_GROW(b,l) BLOCK_RESIZE((b), BLOCK_SIZE((b)) + (l))
-
-/* Append string 's' of length 'l' */
-#define BLOCK_APPEND(b,s,l)		  \
-    do {				  \
-        BLOCK_GROW((b),(l));		  \
-        memcpy(BLOCK_END((b)), (s), (l)); \
-	BLOCK_SIZE((b)) += (l);		  \
-    } while (0)
-
-/* Append as single character 'c' */
-#define BLOCK_APPEND_CHAR(b,c)		  \
-    do {				  \
-        BLOCK_GROW((b),1);		  \
-	(b)->data[(b)->byte++] = (c);	  \
-    } while (0)
-
-/* Append a single unsigned integer */
-#define BLOCK_APPEND_UINT(b,i)		             \
-    do {					     \
-        unsigned char *cp;			     \
-        BLOCK_GROW((b),11);			     \
-	cp = &(b)->data[(b)->byte];		     \
-        (b)->byte += append_uint32(cp, (i)) - cp;	\
-    } while (0)
-
-static inline unsigned char *append_uint32(unsigned char *cp, uint32_t i) {
-    uint32_t j;
-
-    if (i == 0) {
-	*cp++ = '0';
-	return cp;
-    }
-
-    if (i < 100)        goto b1;
-    if (i < 10000)      goto b3;
-    if (i < 1000000)    goto b5;
-    if (i < 100000000)  goto b7;
-
-    if ((j = i / 1000000000)) {*cp++ = j + '0'; i -= j*1000000000; goto x8;}
-    if ((j = i / 100000000))  {*cp++ = j + '0'; i -= j*100000000;  goto x7;}
- b7:if ((j = i / 10000000))   {*cp++ = j + '0'; i -= j*10000000;   goto x6;}
-    if ((j = i / 1000000))    {*cp++ = j + '0', i -= j*1000000;    goto x5;}
- b5:if ((j = i / 100000))     {*cp++ = j + '0', i -= j*100000;     goto x4;}
-    if ((j = i / 10000))      {*cp++ = j + '0', i -= j*10000;      goto x3;}
- b3:if ((j = i / 1000))       {*cp++ = j + '0', i -= j*1000;       goto x2;}
-    if ((j = i / 100))        {*cp++ = j + '0', i -= j*100;        goto x1;}
- b1:if ((j = i / 10))         {*cp++ = j + '0', i -= j*10;         goto x0;}
-    if (i)                     *cp++ = i + '0';
-    return cp;
-
- x8: *cp++ = i / 100000000 + '0', i %= 100000000;
- x7: *cp++ = i / 10000000  + '0', i %= 10000000;
- x6: *cp++ = i / 1000000   + '0', i %= 1000000;
- x5: *cp++ = i / 100000    + '0', i %= 100000;
- x4: *cp++ = i / 10000     + '0', i %= 10000;
- x3: *cp++ = i / 1000      + '0', i %= 1000;
- x2: *cp++ = i / 100       + '0', i %= 100;
- x1: *cp++ = i / 10        + '0', i %= 10;
- x0: *cp++ = i             + '0';
-
-    return cp;
-}
-
-static inline unsigned char *append_sub32(unsigned char *cp, uint32_t i) {
-    *cp++ = i / 100000000 + '0', i %= 100000000;
-    *cp++ = i / 10000000  + '0', i %= 10000000;
-    *cp++ = i / 1000000   + '0', i %= 1000000;
-    *cp++ = i / 100000    + '0', i %= 100000;
-    *cp++ = i / 10000     + '0', i %= 10000;
-    *cp++ = i / 1000      + '0', i %= 1000;
-    *cp++ = i / 100       + '0', i %= 100;
-    *cp++ = i / 10        + '0', i %= 10;
-    *cp++ = i             + '0';
-
-    return cp;
-}
-
-static inline unsigned char *append_uint64(unsigned char *cp, uint64_t i) {
-    uint64_t j;
-
-    if (i <= 0xffffffff)
-	return append_uint32(cp, i);
-
-    if ((j = i/1000000000) > 1000000000) {
-	cp = append_uint32(cp, j/1000000000);
-	j %= 1000000000;
-	cp = append_sub32(cp, j);
-    } else {
-	cp = append_uint32(cp, i / 1000000000);
-    }
-    cp = append_sub32(cp, i % 1000000000);
-
-    return cp;
-}
-
-#define BLOCK_UPLEN(b) \
-    (b)->comp_size = (b)->uncomp_size = BLOCK_SIZE((b))
-
-/**@}*/
-/**@{ ----------------------------------------------------------------------
- * Reference sequence handling
- */
-
-/*! Loads a reference set from fn and stores in the cram_fd.
- *
- * @return
- * Returns 0 on success;
- *        -1 on failure
- */
-int cram_load_reference(cram_fd *fd, char *fn);
-
-/*! Generates a lookup table in refs based on the SQ headers in SAM_hdr.
- *
- * Indexes references by the order they appear in a BAM file. This may not
- * necessarily be the same order they appear in the fasta reference file.
- *
- * @return
- * Returns 0 on success;
- *        -1 on failure
- */
-int refs2id(refs_t *r, SAM_hdr *bfd);
-
-void refs_free(refs_t *r);
-
-/*! Returns a portion of a reference sequence from start to end inclusive.
- *
- * The returned pointer is owned by the cram_file fd and should not be freed
- * by the caller. It is valid only until the next cram_get_ref is called
- * with the same fd parameter (so is thread-safe if given multiple files).
- *
- * To return the entire reference sequence, specify start as 1 and end
- * as 0.
- *
- * @return
- * Returns reference on success;
- *         NULL on failure
- */
-char *cram_get_ref(cram_fd *fd, int id, int start, int end);
-void cram_ref_incr(refs_t *r, int id);
-void cram_ref_decr(refs_t *r, int id);
-/**@}*/
-/**@{ ----------------------------------------------------------------------
- * Containers
- */
-
-/*! Creates a new container, specifying the maximum number of slices
- * and records permitted.
- *
- * @return
- * Returns cram_container ptr on success;
- *         NULL on failure
- */
-cram_container *cram_new_container(int nrec, int nslice);
-void cram_free_container(cram_container *c);
-
-/*! Reads a container header.
- *
- * @return
- * Returns cram_container on success;
- *         NULL on failure or no container left (fd->err == 0).
- */
-cram_container *cram_read_container(cram_fd *fd);
-
-/*! Writes a container structure.
- *
- * @return
- * Returns 0 on success;
- *        -1 on failure
- */
-int cram_write_container(cram_fd *fd, cram_container *h);
-
-/*! Flushes a container to disk.
- *
- * Flushes a completely or partially full container to disk, writing
- * container structure, header and blocks. This also calls the encoder
- * functions.
- *
- * @return
- * Returns 0 on success;
- *        -1 on failure
- */
-int cram_flush_container(cram_fd *fd, cram_container *c);
-int cram_flush_container_mt(cram_fd *fd, cram_container *c);
-
-
-/**@}*/
-/**@{ ----------------------------------------------------------------------
- * Compression headers; the first part of the container
- */
-
-/*! Creates a new blank container compression header
- *
- * @return
- * Returns header ptr on success;
- *         NULL on failure
- */
-cram_block_compression_hdr *cram_new_compression_header(void);
-
-/*! Frees a cram_block_compression_hdr */
-void cram_free_compression_header(cram_block_compression_hdr *hdr);
-
-
-/**@}*/
-/**@{ ----------------------------------------------------------------------
- * Slices and slice headers
- */
-
-/*! Frees a slice header */
-void cram_free_slice_header(cram_block_slice_hdr *hdr);
-
-/*! Frees a slice */
-void cram_free_slice(cram_slice *s);
-
-/*! Creates a new empty slice in memory, for subsequent writing to
- * disk.
- *
- * @return
- * Returns cram_slice ptr on success;
- *         NULL on failure
- */
-cram_slice *cram_new_slice(enum cram_content_type type, int nrecs);
-
-/*! Loads an entire slice.
- *
- * FIXME: In 1.0 the native unit of slices within CRAM is broken
- * as slices contain references to objects in other slices.
- * To work around this while keeping the slice oriented outer loop
- * we read all slices and stitch them together into a fake large
- * slice instead.
- *
- * @return
- * Returns cram_slice ptr on success;
- *         NULL on failure
- */
-cram_slice *cram_read_slice(cram_fd *fd);
-
-
-
-/**@}*/
-/**@{ ----------------------------------------------------------------------
- * CRAM file definition (header)
- */
-
-/*! Reads a CRAM file definition structure.
- *
- * @return
- * Returns file_def ptr on success;
- *         NULL on failure
- */
-cram_file_def *cram_read_file_def(cram_fd *fd);
-
-/*! Writes a cram_file_def structure to cram_fd.
- *
- * @return
- * Returns 0 on success;
- *        -1 on failure
- */
-int cram_write_file_def(cram_fd *fd, cram_file_def *def);
-
-/*! Frees a cram_file_def structure. */
-void cram_free_file_def(cram_file_def *def);
-
-
-/**@}*/
-/**@{ ----------------------------------------------------------------------
- * SAM header I/O
- */
-
-/*! Reads the SAM header from the first CRAM data block.
- *
- * Also performs minimal parsing to extract read-group
- * and sample information.
- *
- * @return
- * Returns SAM hdr ptr on success;
- *         NULL on failure
- */
-SAM_hdr *cram_read_SAM_hdr(cram_fd *fd);
-
-/*! Writes a CRAM SAM header.
- *
- * @return
- * Returns 0 on success;
- *        -1 on failure
- */
-int cram_write_SAM_hdr(cram_fd *fd, SAM_hdr *hdr);
-
-
-/**@}*/
-/**@{ ----------------------------------------------------------------------
- * The top-level cram opening, closing and option handling
- */
-
-/*! Opens a CRAM file for read (mode "rb") or write ("wb").
- *
- * The filename may be "-" to indicate stdin or stdout.
- *
- * @return
- * Returns file handle on success;
- *         NULL on failure.
- */
-cram_fd *cram_open(const char *filename, const char *mode);
-
-/*! Opens an existing stream for reading or writing.
- *
- * @return
- * Returns file handle on success;
- *         NULL on failure.
- */
-cram_fd *cram_dopen(struct hFILE *fp, const char *filename, const char *mode);
-
-/*! Closes a CRAM file.
- *
- * @return
- * Returns 0 on success;
- *        -1 on failure
- */
-int cram_close(cram_fd *fd);
-
-/*
- * Seek within a CRAM file.
- *
- * Returns 0 on success
- *        -1 on failure
- */
-int cram_seek(cram_fd *fd, off_t offset, int whence);
-
-/*
- * Flushes a CRAM file.
- * Useful for when writing to stdout without wishing to close the stream.
- *
- * Returns 0 on success
- *        -1 on failure
- */
-int cram_flush(cram_fd *fd);
-
-/*! Checks for end of file on a cram_fd stream.
- *
- * @return
- * Returns 0 if not at end of file
- *         1 if we hit an expected EOF (end of range or EOF block)
- *         2 for other EOF (end of stream without EOF block)
- */
-int cram_eof(cram_fd *fd);
-
-/*! Sets options on the cram_fd.
- *
- * See CRAM_OPT_* definitions in cram_structs.h.
- * Use this immediately after opening.
- *
- * @return
- * Returns 0 on success;
- *        -1 on failure
- */
-int cram_set_option(cram_fd *fd, enum hts_fmt_option opt, ...);
-
-/*! Sets options on the cram_fd.
- *
- * See CRAM_OPT_* definitions in cram_structs.h.
- * Use this immediately after opening.
- *
- * @return
- * Returns 0 on success;
- *        -1 on failure
- */
-int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args);
-
-/*!
- * Attaches a header to a cram_fd.
- *
- * This should be used when creating a new cram_fd for writing where
- * we have an SAM_hdr already constructed (eg from a file we've read
- * in).
- *
- * @return
- * Returns 0 on success;
- *        -1 on failure
- */
-int cram_set_header(cram_fd *fd, SAM_hdr *hdr);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _CRAM_IO_H_ */