7df6e18265341f87a69fba808aa1f92f8ebca841
markd
  Wed Apr 15 13:39:42 2026 -0700
move copy of htslib

diff --git src/htslib/cram/cram_external.c src/htslib/cram/cram_external.c
deleted file mode 100644
index 4219d71d65b..00000000000
--- src/htslib/cram/cram_external.c
+++ /dev/null
@@ -1,377 +0,0 @@
-/*
-Copyright (c) 2015 Genome Research Ltd.
-Author: James Bonfield <jkb@sanger.ac.uk>
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-   1. Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.
-
-   2. Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation
-and/or other materials provided with the distribution.
-
-   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
-Institute nor the names of its contributors may be used to endorse or promote
-products derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/*! \file
- * External CRAM interface.
- *
- * Internally we're happy to use macros and to grub around in the cram
- * structures.  This isn't very sustainable for an externally usable
- * ABI though, so we have anonymous structs and accessor functions too
- * to permit software such as samtools reheader to manipulate cram
- * containers and blocks in a robust manner.
- */
-
-#include <config.h>
-
-#include "htslib/hfile.h"
-#include "cram/cram.h"
-
-/*
- *-----------------------------------------------------------------------------
- * cram_fd
- */
-SAM_hdr *cram_fd_get_header(cram_fd *fd) { return fd->header; }
-void cram_fd_set_header(cram_fd *fd, SAM_hdr *hdr) { fd->header = hdr; }
-
-int cram_fd_get_version(cram_fd *fd) { return fd->version; }
-void cram_fd_set_version(cram_fd *fd, int vers) { fd->version = vers; }
-
-int cram_major_vers(cram_fd *fd) { return CRAM_MAJOR_VERS(fd->version); }
-int cram_minor_vers(cram_fd *fd) { return CRAM_MINOR_VERS(fd->version); }
-
-hFILE *cram_fd_get_fp(cram_fd *fd) { return fd->fp; }
-void cram_fd_set_fp(cram_fd *fd, hFILE *fp) { fd->fp = fp; }
-
-
-/*
- *-----------------------------------------------------------------------------
- * cram_container
- */
-int32_t cram_container_get_length(cram_container *c) {
-    return c->length;
-}
-
-void cram_container_set_length(cram_container *c, int32_t length) {
-    c->length = length;
-}
-
-
-int32_t cram_container_get_num_blocks(cram_container *c) {
-    return c->num_blocks;
-}
-
-void cram_container_set_num_blocks(cram_container *c, int32_t num_blocks) {
-    c->num_blocks = num_blocks;
-}
-
-
-/* Returns the landmarks[] array and the number of elements
- * in num_landmarks.
- */
-int32_t *cram_container_get_landmarks(cram_container *c, int32_t *num_landmarks) {
-    *num_landmarks = c->num_landmarks;
-    return c->landmark;
-}
-
-/* Sets the landmarks[] array (pointer copy, not a memory dup) and
- * num_landmarks value.
- */
-void cram_container_set_landmarks(cram_container *c, int32_t num_landmarks,
-				  int32_t *landmarks) {
-    c->num_landmarks = num_landmarks;
-    c->landmark = landmarks;
-}
-
-
-/* Returns true if the container is empty (EOF marker) */
-int cram_container_is_empty(cram_fd *fd) {
-    return fd->empty_container;
-}
-
-
-/*
- *-----------------------------------------------------------------------------
- * cram_block_compression_hdr
- */
-
-/*
- * Utility function to edit an RG id.
- * This is only possible if there is one single RG value used and it
- * is in the container compression header using HUFFMAN or BETA
- * codec.  In this case it is essentially hard coded and needs no
- * editing of external (or worse, CORE) blocks.
- *
- * Returns 0 on success
- *        -1 on failure
- */
-// Or arbitrary set compression header constant?
-
-static int cram_block_compression_hdr_set_DS(cram_block_compression_hdr *ch,
-					     int ds, int new_rg) {
-    if (!ch || !ch->codecs || !ch->codecs[ds])
-	return -1;
-
-    switch (ch->codecs[ds]->codec) {
-    case E_HUFFMAN:
-	if (ch->codecs[ds]->huffman.ncodes != 1)
-	    return -1;
-	ch->codecs[ds]->huffman.codes[0].symbol = new_rg;
-	return 0;
-
-    case E_BETA:
-	if (ch->codecs[ds]->beta.nbits != 0)
-	    return -1;
-	ch->codecs[ds]->beta.offset = -new_rg;
-	return 0;
-
-    default:
-	return -1;
-    }
-
-    return 0;
-}
-
-int cram_block_compression_hdr_set_rg(cram_block_compression_hdr *ch, int new_rg) {
-    return cram_block_compression_hdr_set_DS(ch, DS_RG, new_rg);
-}
-
-/*
- * Converts a cram_block_compression_hdr struct used for decoding to
- * one used for encoding.  Maybe this should be a transparent
- * operation applied on-demand.
- *
- * Returns 0 on success
- *        -1 on failure
- */
-int cram_block_compression_hdr_decoder2encoder(cram_fd *fd,
-					       cram_block_compression_hdr *ch) {
-    int i;
-
-    if (!ch || !ch->codecs)
-	return -1;
-
-    for (i = 0; i < DS_END; i++) {
-	cram_codec *co = ch->codecs[i];
-	if (!co)
-	    continue;
-
-	if (-1 == cram_codec_decoder2encoder(fd, co))
-	    return -1;
-    }
-
-    return 0;
-}
-
-/*
- *-----------------------------------------------------------------------------
- * cram_slice
- */
-int32_t cram_slice_hdr_get_num_blocks(cram_block_slice_hdr *hdr) {
-    return hdr->num_blocks;
-}
-
-
-/*
- *-----------------------------------------------------------------------------
- * cram_block
- */
-int32_t cram_block_get_content_id(cram_block *b)  { return b->content_id; }
-int32_t cram_block_get_comp_size(cram_block *b)   { return b->comp_size; }
-int32_t cram_block_get_uncomp_size(cram_block *b) { return b->uncomp_size; }
-int32_t cram_block_get_crc32(cram_block *b)       { return b->crc32; }
-void *  cram_block_get_data(cram_block *b)        { return BLOCK_DATA(b); }
-int32_t cram_block_get_size(cram_block *b)        { return BLOCK_SIZE(b); }
-enum cram_content_type cram_block_get_content_type(cram_block *b) {
-    return b->content_type;
-}
-
-void cram_block_set_content_id(cram_block *b, int32_t id) { b->content_id = id; }
-void cram_block_set_comp_size(cram_block *b, int32_t size) { b->comp_size = size; }
-void cram_block_set_uncomp_size(cram_block *b, int32_t size) { b->uncomp_size = size; }
-void cram_block_set_crc32(cram_block *b, int32_t crc) { b->crc32 = crc; }
-void cram_block_set_data(cram_block *b, void *data) { BLOCK_DATA(b) = data; }
-void cram_block_set_size(cram_block *b, int32_t size) { BLOCK_SIZE(b) = size; }
-
-int cram_block_append(cram_block *b, void *data, int size) {
-    BLOCK_APPEND(b, data, size);
-    return BLOCK_DATA(b) ? 0 : -1; // It'll do for now...
-}
-void cram_block_update_size(cram_block *b) { BLOCK_UPLEN(b); }
-
-// Offset is known as "size" internally, but it can be confusing.
-size_t cram_block_get_offset(cram_block *b) { return BLOCK_SIZE(b); }
-void cram_block_set_offset(cram_block *b, size_t offset) { BLOCK_SIZE(b) = offset; }
-
-
-/*
- * Copies the blocks representing the next num_slice slices from a
- * container from 'in' to 'out'.  It is expected that the file pointer
- * is just after the read of the cram_container and cram compression
- * header.
- *
- * Returns 0 on success
- *        -1 on failure
- */
-int cram_copy_slice(cram_fd *in, cram_fd *out, int32_t num_slice) {
-    int32_t i, j;
-
-    for (i = 0; i < num_slice; i++) {
-	cram_block *blk;
-	cram_block_slice_hdr *hdr;
-
-	if (!(blk = cram_read_block(in)))
-	    return -1;
-	if (!(hdr = cram_decode_slice_header(in, blk))) {
-	    cram_free_block(blk);
-	    return -1;
-	}
-	if (cram_write_block(out, blk) != 0) {
-	    cram_free_block(blk);
-	    return -1;
-	}
-	cram_free_block(blk);
-
-	int num_blocks = cram_slice_hdr_get_num_blocks(hdr);
-	for (j = 0; j < num_blocks; j++) {
-	    blk = cram_read_block(in);
-	    if (!blk || cram_write_block(out, blk) != 0) {
-		if (blk) cram_free_block(blk);
-		return -1;
-	    }
-	    cram_free_block(blk);
-	}
-	cram_free_slice_header(hdr);
-    }
-
-    return 0;
-}
-
-/*
- * Renumbers RG numbers in a cram compression header.
- *
- * CRAM stores RG as the Nth number in the header, rather than a
- * string holding the ID: tag.  This is smaller in space, but means
- * "samtools cat" to join files together that contain single but
- * different RG lines needs a way of renumbering them.
- *
- * The file descriptor is expected to be immediately after the
- * cram_container structure (ie before the cram compression header).
- * Due to the nature of the CRAM format, this needs to read and write
- * the blocks itself.  Note that there may be multiple slices within
- * the container, meaning multiple compression headers to manipulate.
- * Changing RG may change the size of the compression header and
- * therefore the length field in the container.  Hence we rewrite all
- * blocks just incase and also emit the adjusted container.
- *
- * The current implementation can only cope with renumbering a single
- * RG (and only then if it is using HUFFMAN or BETA codecs).  In
- * theory it *may* be possible to renumber multiple RGs if they use
- * HUFFMAN to the CORE block or use an external block unshared by any
- * other data series.  So we have an API that can be upgraded to
- * support this, but do not implement it for now.  An example
- * implementation of RG as an EXTERNAL block would be to find that
- * block and rewrite it, returning the number of blocks consumed.
- *
- * Returns 0 on success;
- *        -1 if unable to edit;
- *        -2 on other errors (eg I/O).
- */
-int cram_transcode_rg(cram_fd *in, cram_fd *out,
-		      cram_container *c,
-		      int nrg, int *in_rg, int *out_rg) {
-    int new_rg = *out_rg, old_size, new_size;
-    cram_block *o_blk, *n_blk;
-    cram_block_compression_hdr *ch;
-
-    if (nrg != 1) {
-	fprintf(stderr, "[%s] ERROR: not implemented for nrg != 1\n",
-		__func__);
-	return -2;
-    }
-
-    // Produce a new block holding the updated compression header,
-    // with RG transcoded to a new value. (Single only supported.)
-    o_blk = cram_read_block(in);
-    old_size = cram_block_size(o_blk);
-    ch = cram_decode_compression_header(in, o_blk);
-    if (cram_block_compression_hdr_set_rg(ch, new_rg) != 0)
-	return -1;
-    cram_block_compression_hdr_decoder2encoder(in, ch);
-    n_blk = cram_encode_compression_header(in, c, ch);
-    cram_free_compression_header(ch);
-
-    /*
-     * Warning: this has internal knowledge of the cram compression
-     * header format.
-     *
-     * The decoder doesn't set c->tags_used, so the encoder puts a two
-     * byte blank segment.  This means n_blk is too short.  We skip
-     * through the decoded old block (o_blk) and copy from there.
-     */
-    char *cp = cram_block_get_data(o_blk);
-    char *op = cp;
-    char *endp = cp + cram_block_get_uncomp_size(o_blk);
-    //fprintf(stderr, "sz = %d\n", (int)(endp-cp));
-    int32_t i32;
-
-    cp += safe_itf8_get(cp, endp, &i32);
-    cp += i32;
-    cp += safe_itf8_get(cp, endp, &i32);
-    cp += i32;
-    op = cp;
-    cp += safe_itf8_get(cp, endp, &i32);
-    i32 += (cp-op);
-
-    //fprintf(stderr, "remaining %d bytes\n", i32);
-    cram_block_set_size(n_blk, cram_block_get_size(n_blk)-2);
-    cram_block_append(n_blk, op, i32);
-    cram_block_update_size(n_blk);
-
-    new_size = cram_block_size(n_blk);
-
-    //fprintf(stderr, "size %d -> %d\n", old_size, new_size);
-
-    // Now we've constructedthe updated compression header,
-    // amend the container too (it may have changed size).
-    int32_t *landmarks, num_landmarks;
-    landmarks = cram_container_get_landmarks(c, &num_landmarks);
-
-    if (old_size != new_size) {
-	int diff = new_size - old_size, j;
-
-	for (j = 0; j < num_landmarks; j++)
-	    landmarks[j] += diff;
-	//cram_container_set_landmarks(c, num_landmarks, landmarks);
-	cram_container_set_length(c, cram_container_get_length(c) + diff);
-    }
-
-    // Finally write it all out; container, compression header,
-    // and then all the remaining slice blocks.
-    if (cram_write_container(out, c) != 0)
-	return -2;
-
-    cram_write_block(out, n_blk);
-    cram_free_block(o_blk);
-    cram_free_block(n_blk);
-
-    // Container num_blocks can be invalid, due to a bug.
-    // Instead we iterate in slice context instead.
-    return cram_copy_slice(in, out, num_landmarks);
-}