7df6e18265341f87a69fba808aa1f92f8ebca841
markd
  Wed Apr 15 13:39:42 2026 -0700
move copy of htslib

diff --git src/htslib/cram/mFILE.c src/htslib/cram/mFILE.c
deleted file mode 100644
index 0d4bd72b647..00000000000
--- src/htslib/cram/mFILE.c
+++ /dev/null
@@ -1,694 +0,0 @@
-/*
-Copyright (c) 2005-2006, 2008-2009, 2013 Genome Research Ltd.
-Author: James Bonfield <jkb@sanger.ac.uk>
-
-Redistribution and use in source and binary forms, with or without 
-modification, are permitted provided that the following conditions are met:
-
-   1. Redistributions of source code must retain the above copyright notice, 
-this list of conditions and the following disclaimer.
-
-   2. Redistributions in binary form must reproduce the above copyright notice, 
-this list of conditions and the following disclaimer in the documentation 
-and/or other materials provided with the distribution.
-
-   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
-Institute nor the names of its contributors may be used to endorse or promote
-products derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND 
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
-DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#include <config.h>
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <stdarg.h>
-
-#include "cram/os.h"
-#include "cram/mFILE.h"
-#include "cram/vlen.h"
-
-#ifdef HAVE_MMAP
-#include <sys/mman.h>
-#endif
-
-/*
- * This file contains memory-based versions of the most commonly used
- * (by io_lib) stdio functions.
- *
- * Actual file IO takes place either on opening or closing an mFILE.
- *
- * Coupled to this are a bunch of rather scary macros which can be obtained
- * by including stdio_hack.h. It is recommended though that you use mFILE.h
- * instead and replace fopen with mfopen (etc). This is more or less
- * mandatory if you wish to use both FILE and mFILE structs in a single file.
- */
-
-static mFILE *m_channel[3];  /* stdin, stdout and stderr fakes */
-
-/*
- * Reads the entirety of fp into memory. If 'fn' exists it is the filename
- * associated with fp. This will be used for more optimal reading (via a
- * stat to identify the size and a single read). Otherwise we use successive
- * reads until EOF.
- *
- * Returns a malloced buffer on success of length *size
- *         NULL on failure
- */
-static char *mfload(FILE *fp, const char *fn, size_t *size, int binary) {
-    struct stat sb;
-    char *data = NULL;
-    size_t allocated = 0, used = 0;
-    int bufsize = 8192;
-
-#ifdef _WIN32
-    if (binary)
-	_setmode(_fileno(fp), _O_BINARY);
-    else 
-	_setmode(_fileno(fp), _O_TEXT);
-#endif
-
-    if (fn && -1 != stat(fn, &sb)) {
-	data = malloc(allocated = sb.st_size);
-	bufsize = sb.st_size;
-    } else {
-	fn = NULL;
-    }
-
-    do {
-	size_t len;
-	if (used + bufsize > allocated) {
-	    allocated += bufsize;
-	    data = realloc(data, allocated);
-	}
-	len = fread(data + used, 1, allocated - used, fp);
-	if (len > 0)
-	    used += len;
-    } while (!feof(fp) && (fn == NULL || used < sb.st_size));
-
-    *size = used;
-
-    return data;
-}
-
-
-#ifdef HAVE_MMAP
-/*
- * mmaps in the file, but only for reading currently.
- *
- * Returns 0 on success
- *        -1 on failure
- */
-int mfmmap(mFILE *mf, FILE *fp, const char *fn) {
-    struct stat sb;
-
-    if (stat(fn, &sb) != 0)
-	return -1;
-
-    mf->size = sb.st_size;
-    mf->data = mmap(NULL, mf->size, PROT_READ, MAP_SHARED,
-		    fileno(fp), 0);
-
-    if (!mf->data)
-	return -1;
-
-    mf->alloced = 0;
-    return 0;
-}
-#endif
-
-
-/*
- * Creates and returns m_channel[0].
- * We initialise this on the first attempted read, which then slurps in
- * all of stdin until EOF is met.
- */
-mFILE *mstdin(void) {
-    if (m_channel[0])
-	return m_channel[0];
-
-    m_channel[0] = mfcreate(NULL, 0);
-    if (NULL == m_channel[0]) return NULL;
-    m_channel[0]->fp = stdin;
-    return m_channel[0];
-}
-
-static void init_mstdin(void) {
-    static int done_stdin = 0;
-    if (done_stdin)
-	return;
-
-    m_channel[0]->data = mfload(stdin, NULL, &m_channel[0]->size, 1);
-    m_channel[0]->mode = MF_READ;
-    done_stdin = 1;
-}
-
-/*
- * Creates and returns m_channel[1]. This is the fake for stdout. It starts as
- * an empty buffer which is physically written out only when mfflush or
- * mfclose are called.
- */
-mFILE *mstdout(void) {
-    if (m_channel[1])
-	return m_channel[1];
-
-    m_channel[1] = mfcreate(NULL, 0);
-    if (NULL == m_channel[1]) return NULL;
-    m_channel[1]->fp = stdout;
-    m_channel[1]->mode = MF_WRITE;
-    return m_channel[1];
-}
-
-/*
- * Stderr as an mFILE.
- * The code handles stderr by returning m_channel[2], but also checking
- * for stderr in fprintf (the common usage of it) to auto-flush.
- */
-mFILE *mstderr(void) {
-    if (m_channel[2])
-	return m_channel[2];
-
-    m_channel[2] = mfcreate(NULL, 0);
-    if (NULL == m_channel[2]) return NULL;
-    m_channel[2]->fp = stderr;
-    m_channel[2]->mode = MF_WRITE;
-    return m_channel[2];
-}
-
-
-/*
- * For creating existing mFILE pointers directly from memory buffers.
- */
-mFILE *mfcreate(char *data, int size) {
-    mFILE *mf = (mFILE *)malloc(sizeof(*mf));
-    if (NULL == mf) return NULL;
-    mf->fp = NULL;
-    mf->data = data;
-    mf->alloced = size;
-    mf->size = size;
-    mf->eof = 0;
-    mf->offset = 0;
-    mf->flush_pos = 0;
-    mf->mode = MF_READ | MF_WRITE;
-    return mf;
-}
-
-/*
- * Recreate an existing mFILE to house new data/size.
- * It also rewinds the file.
- */
-void mfrecreate(mFILE *mf, char *data, int size) {
-    if (mf->data)
-	free(mf->data);
-    mf->data = data;
-    mf->size = size;
-    mf->alloced = size;
-    mf->eof = 0;
-    mf->offset = 0;
-    mf->flush_pos = 0;
-}
-
-
-/*
- * Creates a new mFILE to contain the contents of the FILE pointer.
- * This mFILE is purely for in-memory operations and has no links to the
- * original FILE* it came from. It also doesn't close the FILE pointer.
- * Consider using mfreopen() is you need different behaviour.
- *
- * Returns mFILE * on success
- *         NULL on failure.
- */ 
-mFILE *mfcreate_from(const char *path, const char *mode_str, FILE *fp) {
-   mFILE *mf; 
-
-    /* Open using mfreopen() */
-    if (NULL == (mf = mfreopen(path, mode_str, fp)))
-	return NULL;
-    
-    /* Disassociate from the input stream */
-    mf->fp = NULL;
-
-    return mf;
-}
-
-/*
- * Converts a FILE * to an mFILE *.
- * Use this for wrapper functions to turn external prototypes requring
- * FILE * as an argument into internal code using mFILE *.
- */
-mFILE *mfreopen(const char *path, const char *mode_str, FILE *fp) {
-    mFILE *mf;
-    int r = 0, w = 0, a = 0, b = 0, x = 0, mode = 0;
-
-    /* Parse mode:
-     * r = read file contents (if truncated => don't read)
-     * w = write on close
-     * a = position at end of buffer
-     * x = position at same location as the original fp, don't seek on flush
-     * + = for update (read and write)
-     * m = mmap (read only)
-     */
-    if (strchr(mode_str, 'r'))
-	r = 1, mode |= MF_READ;
-    if (strchr(mode_str, 'w'))
-	w = 1, mode |= MF_WRITE | MF_TRUNC;
-    if (strchr(mode_str, 'a'))
-	w = a = 1, mode |= MF_WRITE | MF_APPEND;
-    if (strchr(mode_str, 'b'))
-	b = 1, mode |= MF_BINARY;
-    if (strchr(mode_str, 'x'))
-	x = 1;
-    if (strchr(mode_str, '+')) {
-        w = 1, mode |= MF_READ | MF_WRITE;
-	if (a)
-	    r = 1;
-    }
-#ifdef HAVE_MMAP
-    if (strchr(mode_str, 'm'))
-	if (!w) mode |= MF_MMAP;
-#endif
-
-    if (r) {
-	mf = mfcreate(NULL, 0);
-	if (NULL == mf) return NULL;
-	if (!(mode & MF_TRUNC)) {
-#ifdef HAVE_MMAP
-	    if (mode & MF_MMAP) {
-		if (mfmmap(mf, fp, path) == -1) {
-		    mf->data = NULL;
-		    mode &= ~MF_MMAP;
-		}
-	    }
-#endif
-	    if (!mf->data) {
-		mf->data = mfload(fp, path, &mf->size, b);
-		mf->alloced = mf->size;
-		if (!a)
-		    fseek(fp, 0, SEEK_SET);
-	    }
-	}
-    } else if (w) {
-	/* Write - initialise the data structures */
-	mf = mfcreate(NULL, 0);
-	if (NULL == mf) return NULL;
-    } else {
-        fprintf(stderr, "Must specify either r, w or a for mode\n");
-        return NULL;
-    }
-    mf->fp = fp;
-    mf->mode = mode;
-
-    if (x) {
-	mf->mode |= MF_MODEX;
-    }
-    
-    if (a) {
-	mf->flush_pos = mf->size;
-	fseek(fp, 0, SEEK_END);
-    }
-
-    return mf;
-}
-
-/*
- * Opens a file. If we have read access (r or a+) then it loads the entire
- * file into memory. If We have write access then the pathname is stored.
- * We do not actually write until an mfclose, which then checks this pathname.
- */
-mFILE *mfopen(const char *path, const char *mode) {
-    FILE *fp;
-
-    if (NULL == (fp = fopen(path, mode)))
-	return NULL;
-    return mfreopen(path, mode, fp);
-}
-
-/*
- * Closes an mFILE. If the filename is known (implying write access) then this
- * also writes the data to disk.
- *
- * Stdout is handled by calling mfflush which writes to stdout if appropriate.
- */
-int mfclose(mFILE *mf) {
-    if (!mf)
-	return -1;
-
-    mfflush(mf);
-
-#ifdef HAVE_MMAP
-    if ((mf->mode & MF_MMAP) && mf->data) {
-	/* Mmaped */
-	munmap(mf->data, mf->size);
-	mf->data = NULL;
-    }
-#endif
-
-    if (mf->fp)
-	fclose(mf->fp);
-
-    mfdestroy(mf);
-
-    return 0;
-}
-
-/*
- * Closes the file pointer contained within the mFILE without destroying
- * the in-memory data.
- *
- * Attempting to do this on an mmaped buffer is an error.
- */
-int mfdetach(mFILE *mf) {
-    if (!mf)
-	return -1;
-
-    mfflush(mf);
-    if (mf->mode & MF_MMAP)
-	return -1;
-
-    if (mf->fp) {
-	fclose(mf->fp);
-	mf->fp = NULL;
-    }
-
-    return 0;
-}
-
-/*
- * Destroys an mFILE structure but does not flush or close it
- */
-int mfdestroy(mFILE *mf) {
-    if (!mf)
-	return -1;
-
-    if (mf->data)
-	free(mf->data);
-    free(mf);
-
-    return 0;
-}
-
-/*
- * Steals that data out of an mFILE.  The mFILE itself will be closed.
- * It is up to the caller to free the stolen buffer.  If size_out is
- * not NULL, mf->size will be stored in it.
- * This is more-or-less the opposite of mfcreate().
- *
- * Note, we cannot steal the allocated buffer from an mmaped mFILE.
- */
-
-void *mfsteal(mFILE *mf, size_t *size_out) {
-    void *data;
-
-    if (!mf) return NULL;
-
-    data = mf->data;
-    
-    if (NULL != size_out) *size_out = mf->size;
-
-    if (mfdetach(mf) != 0)
-	return NULL;
-
-    mf->data = NULL;
-    mfdestroy(mf);
-
-    return data;
-}
-
-/*
- * Seek/tell functions. Nothing more than updating and reporting an
- * in-memory index. NB we can seek on stdin or stdout even provided we
- * haven't been flushing.
- */
-int mfseek(mFILE *mf, long offset, int whence) {
-    switch (whence) {
-    case SEEK_SET:
-	mf->offset = offset;
-	break;
-    case SEEK_CUR:
-	mf->offset += offset;
-	break;
-    case SEEK_END:
-	mf->offset = mf->size + offset;
-	break;
-    default:
-	errno = EINVAL;
-	return -1;
-    }
-
-    mf->eof = 0;
-    return 0;
-}
-
-long mftell(mFILE *mf) {
-    return mf->offset;
-}
-
-void mrewind(mFILE *mf) {
-    mf->offset = 0;
-    mf->eof = 0;
-}
-
-/*
- * mftruncate is not directly a translation of ftruncate as the latter
- * takes a file descriptor instead of a FILE *. It performs the analogous
- * role though.
- *
- * If offset is -1 then the file is truncated to be the current file
- * offset.
- */
-void mftruncate(mFILE *mf, long offset) {
-    mf->size = offset != -1 ? offset : mf->offset;
-    if (mf->offset > mf->size)
-	mf->offset = mf->size;
-}
-
-int mfeof(mFILE *mf) {
-    return mf->eof;
-}
-
-/*
- * mFILE read/write functions. Basically these turn fread/fwrite syntax
- * into memcpy statements, with appropriate memory handling for writing.
- */
-size_t mfread(void *ptr, size_t size, size_t nmemb, mFILE *mf) {
-    size_t len;
-    char *cptr = (char *)ptr;
-    
-    if (mf == m_channel[0]) init_mstdin();
-
-    if (mf->size <= mf->offset)
-	return 0;
-
-    len = size * nmemb <= mf->size - mf->offset
-	? size * nmemb
-	: mf->size - mf->offset;
-    if (!size)
-	return 0;
-
-    memcpy(cptr, &mf->data[mf->offset], len);
-    mf->offset += len;
-    
-    if (len != size * nmemb) {
-	mf->eof = 1;
-    }
-
-    return len / size;
-}
-
-size_t mfwrite(void *ptr, size_t size, size_t nmemb, mFILE *mf) {
-    if (!(mf->mode & MF_WRITE))
-	return 0;
-
-    /* Append mode => forced all writes to end of file */
-    if (mf->mode & MF_APPEND)
-	mf->offset = mf->size;
-
-    /* Make sure we have enough room */
-    while (size * nmemb + mf->offset > mf->alloced) {
-	size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024;
-	void * new_data = realloc(mf->data, new_alloced);
-	if (NULL == new_data) return 0;
-	mf->alloced = new_alloced;
-	mf->data    = new_data;
-    }
-
-    /* Record where we need to reflush from */
-    if (mf->offset < mf->flush_pos)
-	mf->flush_pos = mf->offset;
-
-    /* Copy the data over */
-    memcpy(&mf->data[mf->offset], ptr, size * nmemb);
-    mf->offset += size * nmemb;
-    if (mf->size < mf->offset)
-	mf->size = mf->offset;
-
-    return nmemb;
-}
-
-int mfgetc(mFILE *mf) {
-    if (mf == m_channel[0]) init_mstdin();
-    if (mf->offset < mf->size) {
-	return (unsigned char)mf->data[mf->offset++];
-    }
-
-    mf->eof = 1;
-    return -1;
-}
-
-int mungetc(int c, mFILE *mf) {
-    if (mf->offset > 0) {
-	mf->data[--mf->offset] = c;
-	return c;
-    }
-    
-    mf->eof = 1;
-    return -1;
-}
-
-char *mfgets(char *s, int size, mFILE *mf) {
-    int i;
-
-    if (mf == m_channel[0]) init_mstdin();
-    *s = 0;
-    for (i = 0; i < size-1;) {
-	if (mf->offset < mf->size) {
-	    s[i] = mf->data[mf->offset++];
-	    if (s[i++] == '\n')
-		break;
-	} else {
-	    mf->eof = 1;
-	    break;
-	}
-    }
-
-    s[i] = 0;
-    return i ? s : NULL;
-}
-
-/*
- * Flushes an mFILE. If this is a real open of a file in write mode then
- * mFILE->fp will be set. We then write out any new data in mFILE since the
- * last flush. We cannot tell what may have been modified as we don't keep
- * track of that, so we typically rewrite out the entire file contents between
- * the last flush_pos and the end of file.
- *
- * For stderr/stdout we also reset the offsets so we cannot modify things
- * we've already output.
- */
-int mfflush(mFILE *mf) {
-    if (!mf->fp)
-	return 0;
-
-    /* FIXME: only do this when opened in write mode */
-    if (mf == m_channel[1] || mf == m_channel[2]) {
-	if (mf->flush_pos < mf->size) {
-	    size_t bytes = mf->size - mf->flush_pos;
-	    if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes)
-		return -1;
-	    if (0 != fflush(mf->fp))
-		return -1;
-	}
-
-	/* Stdout & stderr are non-seekable streams so throw away the data */
-	mf->offset = mf->size = mf->flush_pos = 0;
-    }
-
-    /* only flush when opened in write mode */
-    if (mf->mode & MF_WRITE) {
-	if (mf->flush_pos < mf->size) {
-	    size_t bytes = mf->size - mf->flush_pos;
-	    if (!(mf->mode & MF_MODEX)) {
-		fseek(mf->fp, mf->flush_pos, SEEK_SET);
-	    }
-	    if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes)
-		return -1;
-	    if (0 != fflush(mf->fp))
-		return -1;
-	}
-	if (ftell(mf->fp) != -1 &&
-	    ftruncate(fileno(mf->fp), ftell(mf->fp)) == -1)
-		return -1;
-	mf->flush_pos = mf->size;
-    }
-
-    return 0;
-}
-
-/*
- * A wrapper around vsprintf() to write to an mFILE. This also uses vflen() to
- * estimate how many additional bytes of storage will be required for the
- * vsprintf to work.
- */
-int mfprintf(mFILE *mf, char *fmt, ...) {
-    int ret;
-    size_t est_length;
-    va_list args;
-
-    va_start(args, fmt);
-    est_length = vflen(fmt, args);
-    va_end(args);
-    while (est_length + mf->offset > mf->alloced) {
-	size_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024;
-	void * new_data    = realloc(mf->data, new_alloced);
-	if (NULL == new_data) return -1;
-	mf->alloced = new_alloced;
-	mf->data    = new_data;
-    }
-
-    va_start(args, fmt);
-    ret = vsprintf(&mf->data[mf->offset], fmt, args);
-    va_end(args);
-
-    if (ret > 0) {
-	mf->offset += ret;
-	if (mf->size < mf->offset)
-	    mf->size = mf->offset;
-    }
-
-    if (mf->fp == stderr) {
-	/* Auto-flush for stderr */
-	if (0 != mfflush(mf)) return -1;
-    }
-
-    return ret;
-}
-
-/*
- * Converts an mFILE from binary to ascii mode by replacing all
- * cr-nl with nl.
- *
- * Primarily used on windows when we've uncompressed a binary file which
- * happens to be a text file (eg Experiment File). Previously we would have
- * seeked back to the start and used _setmode(fileno(fp), _O_TEXT).
- *
- * Side effect: resets offset and flush_pos back to the start.
- */
-void mfascii(mFILE *mf) {
-    size_t p1, p2;
-
-    for (p1 = p2 = 1; p1 < mf->size; p1++, p2++) {
-	if (mf->data[p1] == '\n' && mf->data[p1-1] == '\r') {
-	    p2--; /* delete the \r */
-	}
-	mf->data[p2] = mf->data[p1];
-    }
-    mf->size = p2;
-
-    mf->offset = mf->flush_pos = 0;
-}