src/htslib/cram/rANS_byte.h 7df6e18265341f87a69fba808aa1f92f8ebca841

7df6e18265341f87a69fba808aa1f92f8ebca841
markd
  Wed Apr 15 13:39:42 2026 -0700
move copy of htslib

diff --git src/htslib/cram/rANS_byte.h src/htslib/cram/rANS_byte.h
deleted file mode 100644
index c61ed9d10ed..00000000000
--- src/htslib/cram/rANS_byte.h
+++ /dev/null
@@ -1,336 +0,0 @@
-/* rans_byte.h originally from https://github.com/rygorous/ryg_rans
- *
- * This is a public-domain implementation of several rANS variants. rANS is an
- * entropy coder from the ANS family, as described in Jarek Duda's paper
- * "Asymmetric numeral systems" (http://arxiv.org/abs/1311.2540).
- */
-
-/*-------------------------------------------------------------------------- */
-
-// Simple byte-aligned rANS encoder/decoder - public domain - Fabian 'ryg' Giesen 2014
-//
-// Not intended to be "industrial strength"; just meant to illustrate the general
-// idea.
-
-#ifndef RANS_BYTE_HEADER
-#define RANS_BYTE_HEADER
-
-#include <stdint.h>
-
-#ifdef assert
-#define RansAssert assert
-#else
-#define RansAssert(x)
-#endif
-
-// READ ME FIRST:
-//
-// This is designed like a typical arithmetic coder API, but there's three
-// twists you absolutely should be aware of before you start hacking:
-//
-// 1. You need to encode data in *reverse* - last symbol first. rANS works
-//    like a stack: last in, first out.
-// 2. Likewise, the encoder outputs bytes *in reverse* - that is, you give
-//    it a pointer to the *end* of your buffer (exclusive), and it will
-//    slowly move towards the beginning as more bytes are emitted.
-// 3. Unlike basically any other entropy coder implementation you might
-//    have used, you can interleave data from multiple independent rANS
-//    encoders into the same bytestream without any extra signaling;
-//    you can also just write some bytes by yourself in the middle if
-//    you want to. This is in addition to the usual arithmetic encoder
-//    property of being able to switch models on the fly. Writing raw
-//    bytes can be useful when you have some data that you know is
-//    incompressible, and is cheaper than going through the rANS encode
-//    function. Using multiple rANS coders on the same byte stream wastes
-//    a few bytes compared to using just one, but execution of two
-//    independent encoders can happen in parallel on superscalar and
-//    Out-of-Order CPUs, so this can be *much* faster in tight decoding
-//    loops.
-//
-//    This is why all the rANS functions take the write pointer as an
-//    argument instead of just storing it in some context struct.
-
-// --------------------------------------------------------------------------
-
-// L ('l' in the paper) is the lower bound of our normalization interval.
-// Between this and our byte-aligned emission, we use 31 (not 32!) bits.
-// This is done intentionally because exact reciprocals for 31-bit uints
-// fit in 32-bit uints: this permits some optimizations during encoding.
-#define RANS_BYTE_L (1u << 23)  // lower bound of our normalization interval
-
-// State for a rANS encoder. Yep, that's all there is to it.
-typedef uint32_t RansState;
-
-// Initialize a rANS encoder.
-static inline void RansEncInit(RansState* r)
-{
-    *r = RANS_BYTE_L;
-}
-
-// Renormalize the encoder. Internal function.
-static inline RansState RansEncRenorm(RansState x, uint8_t** pptr, uint32_t freq, uint32_t scale_bits)
-{
-    uint32_t x_max = ((RANS_BYTE_L >> scale_bits) << 8) * freq; // this turns into a shift.
-    if (x >= x_max) {
-        uint8_t* ptr = *pptr;
-        do {
-            *--ptr = (uint8_t) (x & 0xff);
-            x >>= 8;
-        } while (x >= x_max);
-        *pptr = ptr;
-    }
-    return x;
-}
-
-// Encodes a single symbol with range start "start" and frequency "freq".
-// All frequencies are assumed to sum to "1 << scale_bits", and the
-// resulting bytes get written to ptr (which is updated).
-//
-// NOTE: With rANS, you need to encode symbols in *reverse order*, i.e. from
-// beginning to end! Likewise, the output bytestream is written *backwards*:
-// ptr starts pointing at the end of the output buffer and keeps decrementing.
-static inline void RansEncPut(RansState* r, uint8_t** pptr, uint32_t start, uint32_t freq, uint32_t scale_bits)
-{
-    // renormalize
-    RansState x = RansEncRenorm(*r, pptr, freq, scale_bits);
-
-    // x = C(s,x)
-    *r = ((x / freq) << scale_bits) + (x % freq) + start;
-}
-
-// Flushes the rANS encoder.
-static inline void RansEncFlush(RansState* r, uint8_t** pptr)
-{
-    uint32_t x = *r;
-    uint8_t* ptr = *pptr;
-
-    ptr -= 4;
-    ptr[0] = (uint8_t) (x >> 0);
-    ptr[1] = (uint8_t) (x >> 8);
-    ptr[2] = (uint8_t) (x >> 16);
-    ptr[3] = (uint8_t) (x >> 24);
-
-    *pptr = ptr;
-}
-
-// Initializes a rANS decoder.
-// Unlike the encoder, the decoder works forwards as you'd expect.
-static inline void RansDecInit(RansState* r, uint8_t** pptr)
-{
-    uint32_t x;
-    uint8_t* ptr = *pptr;
-
-    x  = ptr[0] << 0;
-    x |= ptr[1] << 8;
-    x |= ptr[2] << 16;
-    x |= ptr[3] << 24;
-    ptr += 4;
-
-    *pptr = ptr;
-    *r = x;
-}
-
-// Returns the current cumulative frequency (map it to a symbol yourself!)
-static inline uint32_t RansDecGet(RansState* r, uint32_t scale_bits)
-{
-    return *r & ((1u << scale_bits) - 1);
-}
-
-// Advances in the bit stream by "popping" a single symbol with range start
-// "start" and frequency "freq". All frequencies are assumed to sum to "1 << scale_bits",
-// and the resulting bytes get written to ptr (which is updated).
-static inline void RansDecAdvance(RansState* r, uint8_t** pptr, uint32_t start, uint32_t freq, uint32_t scale_bits)
-{
-    uint32_t mask = (1u << scale_bits) - 1;
-
-    // s, x = D(x)
-    uint32_t x = *r;
-    x = freq * (x >> scale_bits) + (x & mask) - start;
-
-    // renormalize
-    if (x < RANS_BYTE_L) {
-        uint8_t* ptr = *pptr;
-        do x = (x << 8) | *ptr++; while (x < RANS_BYTE_L);
-        *pptr = ptr;
-    }
-
-    *r = x;
-}
-
-// --------------------------------------------------------------------------
-
-// That's all you need for a full encoder; below here are some utility
-// functions with extra convenience or optimizations.
-
-// Encoder symbol description
-// This (admittedly odd) selection of parameters was chosen to make
-// RansEncPutSymbol as cheap as possible.
-typedef struct {
-    uint32_t x_max;     // (Exclusive) upper bound of pre-normalization interval
-    uint32_t rcp_freq;  // Fixed-point reciprocal frequency
-    uint32_t bias;      // Bias
-    uint16_t cmpl_freq; // Complement of frequency: (1 << scale_bits) - freq
-    uint16_t rcp_shift; // Reciprocal shift
-} RansEncSymbol;
-
-// Decoder symbols are straightforward.
-typedef struct {
-    uint16_t start;     // Start of range.
-    uint16_t freq;      // Symbol frequency.
-} RansDecSymbol;
-
-// Initializes an encoder symbol to start "start" and frequency "freq"
-static inline void RansEncSymbolInit(RansEncSymbol* s, uint32_t start, uint32_t freq, uint32_t scale_bits)
-{
-    RansAssert(scale_bits <= 16);
-    RansAssert(start <= (1u << scale_bits));
-    RansAssert(freq <= (1u << scale_bits) - start);
-
-    // Say M := 1 << scale_bits.
-    //
-    // The original encoder does:
-    //   x_new = (x/freq)*M + start + (x%freq)
-    //
-    // The fast encoder does (schematically):
-    //   q     = mul_hi(x, rcp_freq) >> rcp_shift   (division)
-    //   r     = x - q*freq                         (remainder)
-    //   x_new = q*M + bias + r                     (new x)
-    // plugging in r into x_new yields:
-    //   x_new = bias + x + q*(M - freq)
-    //        =: bias + x + q*cmpl_freq             (*)
-    //
-    // and we can just precompute cmpl_freq. Now we just need to
-    // set up our parameters such that the original encoder and
-    // the fast encoder agree.
-
-    s->x_max = ((RANS_BYTE_L >> scale_bits) << 8) * freq;
-    s->cmpl_freq = (uint16_t) ((1 << scale_bits) - freq);
-    if (freq < 2) {
-        // freq=0 symbols are never valid to encode, so it doesn't matter what
-        // we set our values to.
-        //
-        // freq=1 is tricky, since the reciprocal of 1 is 1; unfortunately,
-        // our fixed-point reciprocal approximation can only multiply by values
-        // smaller than 1.
-        //
-        // So we use the "next best thing": rcp_freq=0xffffffff, rcp_shift=0.
-        // This gives:
-        //   q = mul_hi(x, rcp_freq) >> rcp_shift
-        //     = mul_hi(x, (1<<32) - 1)) >> 0
-        //     = floor(x - x/(2^32))
-        //     = x - 1 if 1 <= x < 2^32
-        // and we know that x>0 (x=0 is never in a valid normalization interval).
-        //
-        // So we now need to choose the other parameters such that
-        //   x_new = x*M + start
-        // plug it in:
-        //     x*M + start                   (desired result)
-        //   = bias + x + q*cmpl_freq        (*)
-        //   = bias + x + (x - 1)*(M - 1)    (plug in q=x-1, cmpl_freq)
-        //   = bias + 1 + (x - 1)*M
-        //   = x*M + (bias + 1 - M)
-        //
-        // so we have start = bias + 1 - M, or equivalently
-        //   bias = start + M - 1.
-        s->rcp_freq = ~0u;
-        s->rcp_shift = 0;
-        s->bias = start + (1 << scale_bits) - 1;
-    } else {
-        // Alverson, "Integer Division using reciprocals"
-        // shift=ceil(log2(freq))
-        uint32_t shift = 0;
-        while (freq > (1u << shift))
-            shift++;
-
-        s->rcp_freq = (uint32_t) (((1ull << (shift + 31)) + freq-1) / freq);
-        s->rcp_shift = shift - 1;
-
-        // With these values, 'q' is the correct quotient, so we
-        // have bias=start.
-        s->bias = start;
-    }
-
-    s->rcp_shift += 32; // Avoid the extra >>32 in RansEncPutSymbol
-}
-
-// Initialize a decoder symbol to start "start" and frequency "freq"
-static inline void RansDecSymbolInit(RansDecSymbol* s, uint32_t start, uint32_t freq)
-{
-    RansAssert(start <= (1 << 16));
-    RansAssert(freq <= (1 << 16) - start);
-    s->start = (uint16_t) start;
-    s->freq = (uint16_t) freq;
-}
-
-// Encodes a given symbol. This is faster than straight RansEnc since we can do
-// multiplications instead of a divide.
-//
-// See RansEncSymbolInit for a description of how this works.
-static inline void RansEncPutSymbol(RansState* r, uint8_t** pptr, RansEncSymbol const* sym)
-{
-    RansAssert(sym->x_max != 0); // can't encode symbol with freq=0
-
-    // renormalize
-    uint32_t x = *r;
-    uint32_t x_max = sym->x_max;
-
-    if (x >= x_max) {
-	uint8_t* ptr = *pptr;
-	do {
-	    *--ptr = (uint8_t) (x & 0xff);
-	    x >>= 8;
-	} while (x >= x_max);
-	*pptr = ptr;
-    }
-
-    // x = C(s,x)
-    // NOTE: written this way so we get a 32-bit "multiply high" when
-    // available. If you're on a 64-bit platform with cheap multiplies
-    // (e.g. x64), just bake the +32 into rcp_shift.
-    //uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> 32) >> sym->rcp_shift;
-
-    // The extra >>32 has already been added to RansEncSymbolInit
-    uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> sym->rcp_shift);
-    *r = x + sym->bias + q * sym->cmpl_freq;
-}
-
-// Equivalent to RansDecAdvance that takes a symbol.
-static inline void RansDecAdvanceSymbol(RansState* r, uint8_t** pptr, RansDecSymbol const* sym, uint32_t scale_bits)
-{
-    RansDecAdvance(r, pptr, sym->start, sym->freq, scale_bits);
-}
-
-// Advances in the bit stream by "popping" a single symbol with range start
-// "start" and frequency "freq". All frequencies are assumed to sum to "1 << scale_bits".
-// No renormalization or output happens.
-static inline void RansDecAdvanceStep(RansState* r, uint32_t start, uint32_t freq, uint32_t scale_bits)
-{
-    uint32_t mask = (1u << scale_bits) - 1;
-
-    // s, x = D(x)
-    uint32_t x = *r;
-    *r = freq * (x >> scale_bits) + (x & mask) - start;
-}
-
-// Equivalent to RansDecAdvanceStep that takes a symbol.
-static inline void RansDecAdvanceSymbolStep(RansState* r, RansDecSymbol const* sym, uint32_t scale_bits)
-{
-    RansDecAdvanceStep(r, sym->start, sym->freq, scale_bits);
-}
-
-// Renormalize.
-static inline void RansDecRenorm(RansState* r, uint8_t** pptr)
-{
-    // renormalize
-    uint32_t x = *r;
-
-    if (x < RANS_BYTE_L) {
-        uint8_t* ptr = *pptr;
-        do x = (x << 8) | *ptr++; while (x < RANS_BYTE_L);
-        *pptr = ptr;
-    }
-
-    *r = x;
-}
-
-#endif // RANS_BYTE_HEADER