7df6e18265341f87a69fba808aa1f92f8ebca841 markd Wed Apr 15 13:39:42 2026 -0700 move copy of htslib diff --git src/htslib/cram/cram_codecs.c src/htslib/cram/cram_codecs.c deleted file mode 100644 index b859b9b01af..00000000000 --- src/htslib/cram/cram_codecs.c +++ /dev/null @@ -1,1949 +0,0 @@ -/* -Copyright (c) 2012-2013 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -/* - * FIXME: add checking of cram_external_type to return NULL on unsupported - * {codec,type} tuples. - */ - -#include - -#include -#include -#include -#include - -#include "cram/cram.h" - -static char *codec2str(enum cram_encoding codec) { - switch (codec) { - case E_NULL: return "NULL"; - case E_EXTERNAL: return "EXTERNAL"; - case E_GOLOMB: return "GOLOMB"; - case E_HUFFMAN: return "HUFFMAN"; - case E_BYTE_ARRAY_LEN: return "BYTE_ARRAY_LEN"; - case E_BYTE_ARRAY_STOP: return "BYTE_ARRAY_STOP"; - case E_BETA: return "BETA"; - case E_SUBEXP: return "SUBEXP"; - case E_GOLOMB_RICE: return "GOLOMB_RICE"; - case E_GAMMA: return "GAMMA"; - case E_NUM_CODECS: - default: return "(unknown)"; - } -} - -/* - * --------------------------------------------------------------------------- - * Block bit-level I/O functions. - * All defined static here to promote easy inlining by the compiler. - */ - -#if 0 -/* Get a single bit, MSB first */ -static signed int get_bit_MSB(cram_block *block) { - unsigned int val; - - if (block->byte > block->alloc) - return -1; - - val = block->data[block->byte] >> block->bit; - if (--block->bit == -1) { - block->bit = 7; - block->byte++; - //printf("(%02X)", block->data[block->byte]); - } - - //printf("-B%d-", val&1); - - return val & 1; -} -#endif - -/* - * Count number of successive 0 and 1 bits - */ -static int get_one_bits_MSB(cram_block *block) { - int n = 0, b; - if (block->byte >= block->uncomp_size) - return -1; - do { - b = block->data[block->byte] >> block->bit; - if (--block->bit == -1) { - block->bit = 7; - block->byte++; - if (block->byte == block->uncomp_size && (b&1)) - return -1; - } - n++; - } while (b&1); - - return n-1; -} - -static int get_zero_bits_MSB(cram_block *block) { - int n = 0, b; - if (block->byte >= block->uncomp_size) - return -1; - do { - b = block->data[block->byte] >> block->bit; - if (--block->bit == -1) { - block->bit = 7; - block->byte++; - if (block->byte == block->uncomp_size && !(b&1)) - return -1; - } - n++; - } while (!(b&1)); - - return n-1; -} - -#if 0 -/* Stores a single bit */ -static void store_bit_MSB(cram_block *block, unsigned int bit) { - if (block->byte >= block->alloc) { - block->alloc = block->alloc ? block->alloc*2 : 1024; - block->data = realloc(block->data, block->alloc); - } - - if (bit) - block->data[block->byte] |= (1 << block->bit); - - if (--block->bit == -1) { - block->bit = 7; - block->byte++; - block->data[block->byte] = 0; - } -} -#endif - -#if 0 -/* Rounds to the next whole byte boundary first */ -static void store_bytes_MSB(cram_block *block, char *bytes, int len) { - if (block->bit != 7) { - block->bit = 7; - block->byte++; - } - - while (block->byte + len >= block->alloc) { - block->alloc = block->alloc ? block->alloc*2 : 1024; - block->data = realloc(block->data, block->alloc); - } - - memcpy(&block->data[block->byte], bytes, len); - block->byte += len; -} -#endif - -/* Local optimised copy for inlining */ -static inline unsigned int get_bits_MSB(cram_block *block, int nbits) { - unsigned int val = 0; - int i; - -#if 0 - // Fits within the current byte */ - if (nbits <= block->bit+1) { - val = (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<bit -= nbits) == -1) { - block->bit = 7; - block->byte++; - } - return val; - } - - // partial first byte - val = block->data[block->byte] & ((1<<(block->bit+1))-1); - nbits -= block->bit+1; - block->bit = 7; - block->byte++; - - // whole middle bytes - while (nbits >= 8) { - val = (val << 8) | block->data[block->byte++]; - nbits -= 8; - } - - val <<= nbits; - val |= (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<bit -= nbits; - return val; -#endif - -#if 0 - /* Inefficient implementation! */ - //printf("{"); - for (i = 0; i < nbits; i++) - //val = (val << 1) | get_bit_MSB(block); - GET_BIT_MSB(block, val); -#endif - -#if 1 - /* Combination of 1st two methods */ - if (nbits <= block->bit+1) { - val = (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<bit -= nbits) == -1) { - block->bit = 7; - block->byte++; - } - return val; - } - - switch(nbits) { -// case 15: GET_BIT_MSB(block, val); -// case 14: GET_BIT_MSB(block, val); -// case 13: GET_BIT_MSB(block, val); -// case 12: GET_BIT_MSB(block, val); -// case 11: GET_BIT_MSB(block, val); -// case 10: GET_BIT_MSB(block, val); -// case 9: GET_BIT_MSB(block, val); - case 8: GET_BIT_MSB(block, val); - case 7: GET_BIT_MSB(block, val); - case 6: GET_BIT_MSB(block, val); - case 5: GET_BIT_MSB(block, val); - case 4: GET_BIT_MSB(block, val); - case 3: GET_BIT_MSB(block, val); - case 2: GET_BIT_MSB(block, val); - case 1: GET_BIT_MSB(block, val); - break; - - default: - for (i = 0; i < nbits; i++) - //val = (val << 1) | get_bit_MSB(block); - GET_BIT_MSB(block, val); - } -#endif - - //printf("=0x%x}", val); - - return val; -} - -/* - * Can store up to 24-bits worth of data encoded in an integer value - * Possibly we'd want to have a less optimal store_bits function when dealing - * with nbits > 24, but for now we assume the codes generated are never - * that big. (Given this is only possible with 121392 or more - * characters with exactly the correct frequency distribution we check - * for it elsewhere.) - */ -static int store_bits_MSB(cram_block *block, unsigned int val, int nbits) { - /* fprintf(stderr, " store_bits: %02x %d\n", val, nbits); */ - - /* - * Use slow mode until we tweak the huffman generator to never generate - * codes longer than 24-bits. - */ - unsigned int mask; - - if (block->byte+4 >= block->alloc) { - if (block->byte) { - block->alloc *= 2; - block->data = realloc(block->data, block->alloc + 4); - if (!block->data) - return -1; - } else { - block->alloc = 1024; - block->data = realloc(block->data, block->alloc + 4); - if (!block->data) - return -1; - block->data[0] = 0; // initialise first byte of buffer - } - } - - /* fits in current bit-field */ - if (nbits <= block->bit+1) { - block->data[block->byte] |= (val << (block->bit+1-nbits)); - if ((block->bit-=nbits) == -1) { - block->bit = 7; - block->byte++; - block->data[block->byte] = 0; - } - return 0; - } - - block->data[block->byte] |= (val >> (nbits -= block->bit+1)); - block->bit = 7; - block->byte++; - block->data[block->byte] = 0; - - mask = 1<<(nbits-1); - do { - if (val & mask) - block->data[block->byte] |= (1 << block->bit); - if (--block->bit == -1) { - block->bit = 7; - block->byte++; - block->data[block->byte] = 0; - } - mask >>= 1; - } while(--nbits); - - return 0; -} - -/* - * Returns the next 'size' bytes from a block, or NULL if insufficient - * data left.This is just a pointer into the block data and not an - * allocated object, so do not free the result. - */ -static char *cram_extract_block(cram_block *b, int size) { - char *cp = (char *)b->data + b->idx; - b->idx += size; - if (b->idx > b->uncomp_size) - return NULL; - - return cp; -} - -/* - * --------------------------------------------------------------------------- - * EXTERNAL - */ -int cram_external_decode_int(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - int l; - char *cp; - cram_block *b; - - /* Find the external block */ - b = cram_get_block_by_id(slice, c->external.content_id); - if (!b) - return *out_size?-1:0; - - cp = (char *)b->data + b->idx; - // E_INT and E_LONG are guaranteed single item queries - l = safe_itf8_get(cp, (char *)b->data + b->uncomp_size, (int32_t *)out); - b->idx += l; - *out_size = 1; - - return l > 0 ? 0 : -1; -} - -int cram_external_decode_char(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, - int *out_size) { - char *cp; - cram_block *b; - - /* Find the external block */ - b = cram_get_block_by_id(slice, c->external.content_id); - if (!b) - return *out_size?-1:0; - - cp = cram_extract_block(b, *out_size); - if (!cp) - return -1; - - memcpy(out, cp, *out_size); - return 0; -} - -static int cram_external_decode_block(cram_slice *slice, cram_codec *c, - cram_block *in, char *out_, - int *out_size) { - char *cp; - cram_block *b = NULL; - cram_block *out = (cram_block *)out_; - - /* Find the external block */ - b = cram_get_block_by_id(slice, c->external.content_id); - if (!b) - return *out_size?-1:0; - - cp = cram_extract_block(b, *out_size); - if (!cp) - return -1; - - BLOCK_APPEND(out, cp, *out_size); - return 0; -} - -void cram_external_decode_free(cram_codec *c) { - if (c) - free(c); -} - -cram_codec *cram_external_decode_init(char *data, int size, - enum cram_external_type option, - int version) { - cram_codec *c; - char *cp = data; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = E_EXTERNAL; - if (option == E_INT || option == E_LONG) - c->decode = cram_external_decode_int; - else if (option == E_BYTE_ARRAY || option == E_BYTE) - c->decode = cram_external_decode_char; - else - c->decode = cram_external_decode_block; - c->free = cram_external_decode_free; - - cp += itf8_get(cp, &c->external.content_id); - - if (cp - data != size) { - fprintf(stderr, "Malformed external header stream\n"); - free(c); - return NULL; - } - - c->external.type = option; - - return c; -} - -int cram_external_encode_int(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - uint32_t *i32 = (uint32_t *)in; - - itf8_put_blk(c->out, *i32); - return 0; -} - -int cram_external_encode_char(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - BLOCK_APPEND(c->out, in, in_size); - return 0; -} - -void cram_external_encode_free(cram_codec *c) { - if (!c) - return; - free(c); -} - -int cram_external_encode_store(cram_codec *c, cram_block *b, char *prefix, - int version) { - char tmp[99], *tp = tmp; - int len = 0; - - if (prefix) { - size_t l = strlen(prefix); - BLOCK_APPEND(b, prefix, l); - len += l; - } - - tp += itf8_put(tp, c->e_external.content_id); - len += itf8_put_blk(b, c->codec); - len += itf8_put_blk(b, tp-tmp); - BLOCK_APPEND(b, tmp, tp-tmp); - len += tp-tmp; - - return len; -} - -cram_codec *cram_external_encode_init(cram_stats *st, - enum cram_external_type option, - void *dat, - int version) { - cram_codec *c; - - c = malloc(sizeof(*c)); - if (!c) - return NULL; - c->codec = E_EXTERNAL; - c->free = cram_external_encode_free; - if (option == E_INT || option == E_LONG) - c->encode = cram_external_encode_int; - else if (option == E_BYTE_ARRAY || option == E_BYTE) - c->encode = cram_external_encode_char; - else - abort(); - c->store = cram_external_encode_store; - - c->e_external.content_id = (size_t)dat; - - return c; -} - -/* - * --------------------------------------------------------------------------- - * BETA - */ -int cram_beta_decode_int(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - int32_t *out_i = (int32_t *)out; - int i, n; - - if (c->beta.nbits) { - if (cram_not_enough_bits(in, c->beta.nbits)) - return -1; - - for (i = 0, n = *out_size; i < n; i++) - out_i[i] = get_bits_MSB(in, c->beta.nbits) - c->beta.offset; - } else { - for (i = 0, n = *out_size; i < n; i++) - out_i[i] = -c->beta.offset; - } - - return 0; -} - -int cram_beta_decode_char(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - int i, n; - - - if (c->beta.nbits) { - if (cram_not_enough_bits(in, c->beta.nbits)) - return -1; - - for (i = 0, n = *out_size; i < n; i++) - out[i] = get_bits_MSB(in, c->beta.nbits) - c->beta.offset; - } else { - for (i = 0, n = *out_size; i < n; i++) - out[i] = -c->beta.offset; - } - - return 0; -} - -void cram_beta_decode_free(cram_codec *c) { - if (c) - free(c); -} - -cram_codec *cram_beta_decode_init(char *data, int size, - enum cram_external_type option, - int version) { - cram_codec *c; - char *cp = data; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = E_BETA; - if (option == E_INT || option == E_LONG) - c->decode = cram_beta_decode_int; - else if (option == E_BYTE_ARRAY || option == E_BYTE) - c->decode = cram_beta_decode_char; - else - abort(); - c->free = cram_beta_decode_free; - - cp += itf8_get(cp, &c->beta.offset); - cp += itf8_get(cp, &c->beta.nbits); - - if (cp - data != size - || c->beta.nbits < 0 || c->beta.nbits > 8 * sizeof(int)) { - fprintf(stderr, "Malformed beta header stream\n"); - free(c); - return NULL; - } - - return c; -} - -int cram_beta_encode_store(cram_codec *c, cram_block *b, - char *prefix, int version) { - int len = 0; - - if (prefix) { - size_t l = strlen(prefix); - BLOCK_APPEND(b, prefix, l); - len += l; - } - - len += itf8_put_blk(b, c->codec); - len += itf8_put_blk(b, itf8_size(c->e_beta.offset) - + itf8_size(c->e_beta.nbits)); // codec length - len += itf8_put_blk(b, c->e_beta.offset); - len += itf8_put_blk(b, c->e_beta.nbits); - - return len; -} - -int cram_beta_encode_int(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - int *syms = (int *)in; - int i, r = 0; - - for (i = 0; i < in_size; i++) - r |= store_bits_MSB(c->out, syms[i] + c->e_beta.offset, - c->e_beta.nbits); - - return r; -} - -int cram_beta_encode_char(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - unsigned char *syms = (unsigned char *)in; - int i, r = 0; - - for (i = 0; i < in_size; i++) - r |= store_bits_MSB(c->out, syms[i] + c->e_beta.offset, - c->e_beta.nbits); - - return r; -} - -void cram_beta_encode_free(cram_codec *c) { - if (c) free(c); -} - -cram_codec *cram_beta_encode_init(cram_stats *st, - enum cram_external_type option, - void *dat, - int version) { - cram_codec *c; - int min_val, max_val, len = 0; - - c = malloc(sizeof(*c)); - if (!c) - return NULL; - c->codec = E_BETA; - c->free = cram_beta_encode_free; - if (option == E_INT) - c->encode = cram_beta_encode_int; - else - c->encode = cram_beta_encode_char; - c->store = cram_beta_encode_store; - - if (dat) { - min_val = ((int *)dat)[0]; - max_val = ((int *)dat)[1]; - } else { - min_val = INT_MAX; - max_val = INT_MIN; - int i; - for (i = 0; i < MAX_STAT_VAL; i++) { - if (!st->freqs[i]) - continue; - if (min_val > i) - min_val = i; - max_val = i; - } - if (st->h) { - khint_t k; - - for (k = kh_begin(st->h); k != kh_end(st->h); k++) { - if (!kh_exist(st->h, k)) - continue; - - i = kh_key(st->h, k); - if (min_val > i) - min_val = i; - if (max_val < i) - max_val = i; - } - } - } - - assert(max_val >= min_val); - c->e_beta.offset = -min_val; - max_val -= min_val; - while (max_val) { - len++; - max_val >>= 1; - } - c->e_beta.nbits = len; - - return c; -} - -/* - * --------------------------------------------------------------------------- - * SUBEXP - */ -int cram_subexp_decode(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - int32_t *out_i = (int32_t *)out; - int n, count; - int k = c->subexp.k; - - for (count = 0, n = *out_size; count < n; count++) { - int i = 0, tail; - int val; - - /* Get number of 1s */ - //while (get_bit_MSB(in) == 1) i++; - i = get_one_bits_MSB(in); - if (i < 0 || cram_not_enough_bits(in, i > 0 ? i + k - 1 : k)) - return -1; - /* - * Val is - * i > 0: 2^(k+i-1) + k+i-1 bits - * i = 0: k bits - */ - if (i) { - tail = i + k-1; - val = 0; - while (tail) { - //val = val<<1; val |= get_bit_MSB(in); - GET_BIT_MSB(in, val); - tail--; - } - val += 1 << (i + k-1); - } else { - tail = k; - val = 0; - while (tail) { - //val = val<<1; val |= get_bit_MSB(in); - GET_BIT_MSB(in, val); - tail--; - } - } - - out_i[count] = val - c->subexp.offset; - } - - return 0; -} - -void cram_subexp_decode_free(cram_codec *c) { - if (c) - free(c); -} - -cram_codec *cram_subexp_decode_init(char *data, int size, - enum cram_external_type option, - int version) { - cram_codec *c; - char *cp = data; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = E_SUBEXP; - c->decode = cram_subexp_decode; - c->free = cram_subexp_decode_free; - c->subexp.k = -1; - - cp += safe_itf8_get(cp, data + size, &c->subexp.offset); - cp += safe_itf8_get(cp, data + size, &c->subexp.k); - - if (cp - data != size || c->subexp.k < 0) { - fprintf(stderr, "Malformed subexp header stream\n"); - free(c); - return NULL; - } - - return c; -} - -/* - * --------------------------------------------------------------------------- - * GAMMA - */ -int cram_gamma_decode(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) { - int32_t *out_i = (int32_t *)out; - int i, n; - - for (i = 0, n = *out_size; i < n; i++) { - int nz = 0; - int val; - //while (get_bit_MSB(in) == 0) nz++; - nz = get_zero_bits_MSB(in); - if (cram_not_enough_bits(in, nz)) - return -1; - val = 1; - while (nz > 0) { - //val <<= 1; val |= get_bit_MSB(in); - GET_BIT_MSB(in, val); - nz--; - } - - out_i[i] = val - c->gamma.offset; - } - - return 0; -} - -void cram_gamma_decode_free(cram_codec *c) { - if (c) - free(c); -} - -cram_codec *cram_gamma_decode_init(char *data, int size, - enum cram_external_type option, - int version) { - cram_codec *c; - char *cp = data; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = E_GAMMA; - c->decode = cram_gamma_decode; - c->free = cram_gamma_decode_free; - - cp += itf8_get(cp, &c->gamma.offset); - - if (cp - data != size) { - fprintf(stderr, "Malformed gamma header stream\n"); - free(c); - return NULL; - } - - return c; -} - -/* - * --------------------------------------------------------------------------- - * HUFFMAN - */ - -static int code_sort(const void *vp1, const void *vp2) { - const cram_huffman_code *c1 = (const cram_huffman_code *)vp1; - const cram_huffman_code *c2 = (const cram_huffman_code *)vp2; - - if (c1->len != c2->len) - return c1->len - c2->len; - else - return c1->symbol - c2->symbol; -} - -void cram_huffman_decode_free(cram_codec *c) { - if (!c) - return; - - if (c->huffman.codes) - free(c->huffman.codes); - free(c); -} - -int cram_huffman_decode_null(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - return -1; -} - -int cram_huffman_decode_char0(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - int i, n; - - /* Special case of 0 length codes */ - for (i = 0, n = *out_size; i < n; i++) { - out[i] = c->huffman.codes[0].symbol; - } - return 0; -} - -int cram_huffman_decode_char(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - int i, n, ncodes = c->huffman.ncodes; - const cram_huffman_code * const codes = c->huffman.codes; - - for (i = 0, n = *out_size; i < n; i++) { - int idx = 0; - int val = 0, len = 0, last_len = 0; - - for (;;) { - int dlen = codes[idx].len - last_len; - if (cram_not_enough_bits(in, dlen)) - return -1; - - //val <<= dlen; - //val |= get_bits_MSB(in, dlen); - //last_len = (len += dlen); - - last_len = (len += dlen); - for (; dlen; dlen--) GET_BIT_MSB(in, val); - - idx = val - codes[idx].p; - if (idx >= ncodes || idx < 0) - return -1; - - if (codes[idx].code == val && codes[idx].len == len) { - out[i] = codes[idx].symbol; - break; - } - } - } - - return 0; -} - -int cram_huffman_decode_int0(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - int32_t *out_i = (int32_t *)out; - int i, n; - const cram_huffman_code * const codes = c->huffman.codes; - - /* Special case of 0 length codes */ - for (i = 0, n = *out_size; i < n; i++) { - out_i[i] = codes[0].symbol; - } - return 0; -} - -int cram_huffman_decode_int(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, int *out_size) { - int32_t *out_i = (int32_t *)out; - int i, n, ncodes = c->huffman.ncodes; - const cram_huffman_code * const codes = c->huffman.codes; - - for (i = 0, n = *out_size; i < n; i++) { - int idx = 0; - int val = 0, len = 0, last_len = 0; - - // Now one bit at a time for remaining checks - for (;;) { - int dlen = codes[idx].len - last_len; - if (cram_not_enough_bits(in, dlen)) - return -1; - - //val <<= dlen; - //val |= get_bits_MSB(in, dlen); - //last_len = (len += dlen); - - last_len = (len += dlen); - for (; dlen; dlen--) GET_BIT_MSB(in, val); - - idx = val - codes[idx].p; - if (idx >= ncodes || idx < 0) - return -1; - - if (codes[idx].code == val && codes[idx].len == len) { - out_i[i] = codes[idx].symbol; - break; - } - } - } - - return 0; -} - -/* - * Initialises a huffman decoder from an encoding data stream. - */ -cram_codec *cram_huffman_decode_init(char *data, int size, - enum cram_external_type option, - int version) { - int32_t ncodes = 0, i, j; - char *cp = data, *data_end = &data[size]; - cram_codec *h; - cram_huffman_code *codes; - int32_t val, last_len, max_len = 0; - int l; - - cp += safe_itf8_get(cp, data_end, &ncodes); - h = calloc(1, sizeof(*h)); - if (!h) - return NULL; - - h->codec = E_HUFFMAN; - h->free = cram_huffman_decode_free; - - h->huffman.ncodes = ncodes; - codes = h->huffman.codes = malloc(ncodes * sizeof(*codes)); - if (!codes) { - free(h); - return NULL; - } - - /* Read symbols and bit-lengths */ - for (i = 0, l = 1; i < ncodes && l > 0; i++, cp += l) { - l = safe_itf8_get(cp, data_end, &codes[i].symbol); - } - - if (l < 1) { - fprintf(stderr, "Malformed huffman header stream\n"); - free(h); - return NULL; - } - cp += safe_itf8_get(cp, data_end, &i); - if (i != ncodes) { - fprintf(stderr, "Malformed huffman header stream\n"); - free(h); - return NULL; - } - - if (ncodes == 0) { - /* NULL huffman stream. Ensure it returns an error if - anything tries to use it. */ - h->decode = cram_huffman_decode_null; - return h; - } - - for (i = 0, l = 1; i < ncodes; i++, cp += l) { - l = safe_itf8_get(cp, data_end, &codes[i].len); - if (l < 1) - break; - if (max_len < codes[i].len) - max_len = codes[i].len; - } - if (l < 1 || cp - data != size || max_len >= ncodes) { - fprintf(stderr, "Malformed huffman header stream\n"); - free(h); - return NULL; - } - - /* Sort by bit length and then by symbol value */ - qsort(codes, ncodes, sizeof(*codes), code_sort); - - /* Assign canonical codes */ - val = -1, last_len = 0; - for (i = 0; i < ncodes; i++) { - val++; - if (codes[i].len > last_len) { - while (codes[i].len > last_len) { - val <<= 1; - last_len++; - } - } - codes[i].code = val; - } - - /* - * Compute the next starting point, offset by the i'th value. - * For example if codes 10, 11, 12, 13 are 30, 31, 32, 33 then - * codes[10..13].p = 30 - 10. - */ - last_len = 0; - for (i = j = 0; i < ncodes; i++) { - if (codes[i].len > last_len) { - j = codes[i].code - i; - last_len = codes[i].len; - } - codes[i].p = j; - } - -// puts("==HUFF LEN=="); -// for (i = 0; i <= last_len+1; i++) { -// printf("len %d=%d prefix %d\n", i, h->huffman.lengths[i], h->huffman.prefix[i]); -// } -// puts("===HUFFMAN CODES==="); -// for (i = 0; i < ncodes; i++) { -// int j; -// printf("%d: %d %d %d ", i, codes[i].symbol, codes[i].len, codes[i].code); -// j = codes[i].len; -// while (j) { -// putchar(codes[i].code & (1 << --j) ? '1' : '0'); -// } -// printf(" %d\n", codes[i].code); -// } - - if (option == E_BYTE || option == E_BYTE_ARRAY) { - if (h->huffman.codes[0].len == 0) - h->decode = cram_huffman_decode_char0; - else - h->decode = cram_huffman_decode_char; - } else if (option == E_BYTE_ARRAY_BLOCK) { - abort(); - } else { - if (h->huffman.codes[0].len == 0) - h->decode = cram_huffman_decode_int0; - else - h->decode = cram_huffman_decode_int; - } - - return (cram_codec *)h; -} - -int cram_huffman_encode_char0(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - return 0; -} - -int cram_huffman_encode_char(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - int i, code, len, r = 0; - unsigned char *syms = (unsigned char *)in; - - while (in_size--) { - int sym = *syms++; - if (sym >= -1 && sym < MAX_HUFF) { - i = c->e_huffman.val2code[sym+1]; - assert(c->e_huffman.codes[i].symbol == sym); - code = c->e_huffman.codes[i].code; - len = c->e_huffman.codes[i].len; - } else { - /* Slow - use a lookup table for when sym < MAX_HUFF? */ - for (i = 0; i < c->e_huffman.nvals; i++) { - if (c->e_huffman.codes[i].symbol == sym) - break; - } - if (i == c->e_huffman.nvals) - return -1; - - code = c->e_huffman.codes[i].code; - len = c->e_huffman.codes[i].len; - } - - r |= store_bits_MSB(c->out, code, len); - } - - return r; -} - -int cram_huffman_encode_int0(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - return 0; -} - -int cram_huffman_encode_int(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - int i, code, len, r = 0; - int *syms = (int *)in; - - while (in_size--) { - int sym = *syms++; - - if (sym >= -1 && sym < MAX_HUFF) { - i = c->e_huffman.val2code[sym+1]; - assert(c->e_huffman.codes[i].symbol == sym); - code = c->e_huffman.codes[i].code; - len = c->e_huffman.codes[i].len; - } else { - /* Slow - use a lookup table for when sym < MAX_HUFFMAN_SYM? */ - for (i = 0; i < c->e_huffman.nvals; i++) { - if (c->e_huffman.codes[i].symbol == sym) - break; - } - if (i == c->e_huffman.nvals) - return -1; - - code = c->e_huffman.codes[i].code; - len = c->e_huffman.codes[i].len; - } - - r |= store_bits_MSB(c->out, code, len); - } - - return r; -} - -void cram_huffman_encode_free(cram_codec *c) { - if (!c) - return; - - if (c->e_huffman.codes) - free(c->e_huffman.codes); - free(c); -} - -/* - * Encodes a huffman tree. - * Returns number of bytes written. - */ -int cram_huffman_encode_store(cram_codec *c, cram_block *b, char *prefix, - int version) { - int i, len = 0; - cram_huffman_code *codes = c->e_huffman.codes; - /* - * Up to code length 127 means 2.5e+26 bytes of data required (worst - * case huffman tree needs symbols with freqs matching the Fibonacci - * series). So guaranteed 1 byte per code. - * - * Symbols themselves could be 5 bytes (eg -1 is 5 bytes in itf8). - * - * Therefore 6*ncodes + 5 + 5 + 1 + 5 is max memory - */ - char *tmp = malloc(6*c->e_huffman.nvals+16); - char *tp = tmp; - - if (!tmp) - return -1; - - if (prefix) { - size_t l = strlen(prefix); - BLOCK_APPEND(b, prefix, l); - len += l; - } - - tp += itf8_put(tp, c->e_huffman.nvals); - for (i = 0; i < c->e_huffman.nvals; i++) { - tp += itf8_put(tp, codes[i].symbol); - } - - tp += itf8_put(tp, c->e_huffman.nvals); - for (i = 0; i < c->e_huffman.nvals; i++) { - tp += itf8_put(tp, codes[i].len); - } - - len += itf8_put_blk(b, c->codec); - len += itf8_put_blk(b, tp-tmp); - BLOCK_APPEND(b, tmp, tp-tmp); - len += tp-tmp; - - free(tmp); - - return len; -} - -cram_codec *cram_huffman_encode_init(cram_stats *st, - enum cram_external_type option, - void *dat, - int version) { - int *vals = NULL, *freqs = NULL, vals_alloc = 0, *lens, code, len; - int nvals, i, ntot = 0, max_val = 0, min_val = INT_MAX, k; - cram_codec *c; - cram_huffman_code *codes; - - c = malloc(sizeof(*c)); - if (!c) - return NULL; - c->codec = E_HUFFMAN; - - /* Count number of unique symbols */ - for (nvals = i = 0; i < MAX_STAT_VAL; i++) { - if (!st->freqs[i]) - continue; - if (nvals >= vals_alloc) { - vals_alloc = vals_alloc ? vals_alloc*2 : 1024; - vals = realloc(vals, vals_alloc * sizeof(int)); - freqs = realloc(freqs, vals_alloc * sizeof(int)); - if (!vals || !freqs) { - if (vals) free(vals); - if (freqs) free(freqs); - free(c); - return NULL; - } - } - vals[nvals] = i; - freqs[nvals] = st->freqs[i]; - assert(st->freqs[i] > 0); - ntot += freqs[nvals]; - if (max_val < i) max_val = i; - if (min_val > i) min_val = i; - nvals++; - } - if (st->h) { - khint_t k; - - for (k = kh_begin(st->h); k != kh_end(st->h); k++) { - if (!kh_exist(st->h, k)) - continue; - if (nvals >= vals_alloc) { - vals_alloc = vals_alloc ? vals_alloc*2 : 1024; - vals = realloc(vals, vals_alloc * sizeof(int)); - freqs = realloc(freqs, vals_alloc * sizeof(int)); - if (!vals || !freqs) - return NULL; - } - vals[nvals]= kh_key(st->h, k); - freqs[nvals] = kh_val(st->h, k); - assert(freqs[nvals] > 0); - ntot += freqs[nvals]; - if (max_val < i) max_val = i; - if (min_val > i) min_val = i; - nvals++; - } - } - - assert(nvals > 0); - - freqs = realloc(freqs, 2*nvals*sizeof(*freqs)); - lens = calloc(2*nvals, sizeof(*lens)); - if (!lens || !freqs) - return NULL; - - /* Inefficient, use pointers to form chain so we can insert and maintain - * a sorted list? This is currently O(nvals^2) complexity. - */ - for (;;) { - int low1 = INT_MAX, low2 = INT_MAX; - int ind1 = 0, ind2 = 0; - for (i = 0; i < nvals; i++) { - if (freqs[i] < 0) - continue; - if (low1 > freqs[i]) - low2 = low1, ind2 = ind1, low1 = freqs[i], ind1 = i; - else if (low2 > freqs[i]) - low2 = freqs[i], ind2 = i; - } - if (low2 == INT_MAX) - break; - - freqs[nvals] = low1 + low2; - lens[ind1] = nvals; - lens[ind2] = nvals; - freqs[ind1] *= -1; - freqs[ind2] *= -1; - nvals++; - } - nvals = nvals/2+1; - - /* Assign lengths */ - for (i = 0; i < nvals; i++) { - int code_len = 0; - for (k = lens[i]; k; k = lens[k]) - code_len++; - lens[i] = code_len; - freqs[i] *= -1; - //fprintf(stderr, "%d / %d => %d\n", vals[i], freqs[i], lens[i]); - } - - - /* Sort, need in a struct */ - if (!(codes = malloc(nvals * sizeof(*codes)))) - return NULL; - for (i = 0; i < nvals; i++) { - codes[i].symbol = vals[i]; - codes[i].len = lens[i]; - } - qsort(codes, nvals, sizeof(*codes), code_sort); - - /* - * Generate canonical codes from lengths. - * Sort by length. - * Start with 0. - * Every new code of same length is +1. - * Every new code of new length is +1 then <<1 per extra length. - * - * /\ - * a/\ - * /\/\ - * bcd/\ - * ef - * - * a 1 0 - * b 3 4 (0+1)<<2 - * c 3 5 - * d 3 6 - * e 4 14 (6+1)<<1 - * f 5 15 - */ - code = 0; len = codes[0].len; - for (i = 0; i < nvals; i++) { - while (len != codes[i].len) { - code<<=1; - len++; - } - codes[i].code = code++; - - if (codes[i].symbol >= -1 && codes[i].symbol < MAX_HUFF) - c->e_huffman.val2code[codes[i].symbol+1] = i; - - //fprintf(stderr, "sym %d, code %d, len %d\n", - // codes[i].symbol, codes[i].code, codes[i].len); - } - - free(lens); - free(vals); - free(freqs); - - c->e_huffman.codes = codes; - c->e_huffman.nvals = nvals; - - c->free = cram_huffman_encode_free; - if (option == E_BYTE || option == E_BYTE_ARRAY) { - if (c->e_huffman.codes[0].len == 0) - c->encode = cram_huffman_encode_char0; - else - c->encode = cram_huffman_encode_char; - } else { - if (c->e_huffman.codes[0].len == 0) - c->encode = cram_huffman_encode_int0; - else - c->encode = cram_huffman_encode_int; - } - c->store = cram_huffman_encode_store; - - return c; -} - -/* - * --------------------------------------------------------------------------- - * BYTE_ARRAY_LEN - */ -int cram_byte_array_len_decode(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, - int *out_size) { - /* Fetch length */ - int32_t len = 0, one = 1; - int r; - - r = c->byte_array_len.len_codec->decode(slice, c->byte_array_len.len_codec, - in, (char *)&len, &one); - //printf("ByteArray Len=%d\n", len); - - if (!r && c->byte_array_len.val_codec && len >= 0) { - r = c->byte_array_len.val_codec->decode(slice, - c->byte_array_len.val_codec, - in, out, &len); - } else { - return -1; - } - - *out_size = len; - - return r; -} - -void cram_byte_array_len_decode_free(cram_codec *c) { - if (!c) return; - - if (c->byte_array_len.len_codec) - c->byte_array_len.len_codec->free(c->byte_array_len.len_codec); - - if (c->byte_array_len.val_codec) - c->byte_array_len.val_codec->free(c->byte_array_len.val_codec); - - free(c); -} - -cram_codec *cram_byte_array_len_decode_init(char *data, int size, - enum cram_external_type option, - int version) { - cram_codec *c; - char *cp = data; - char *endp = data + size; - int32_t encoding = 0; - int32_t sub_size = -1; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = E_BYTE_ARRAY_LEN; - c->decode = cram_byte_array_len_decode; - c->free = cram_byte_array_len_decode_free; - - cp += safe_itf8_get(cp, endp, &encoding); - cp += safe_itf8_get(cp, endp, &sub_size); - if (sub_size < 0 || endp - cp < sub_size) - goto malformed; - c->byte_array_len.len_codec = cram_decoder_init(encoding, cp, sub_size, - E_INT, version); - if (c->byte_array_len.len_codec == NULL) - goto no_codec; - cp += sub_size; - - sub_size = -1; - cp += safe_itf8_get(cp, endp, &encoding); - cp += safe_itf8_get(cp, endp, &sub_size); - if (sub_size < 0 || endp - cp < sub_size) - goto malformed; - c->byte_array_len.val_codec = cram_decoder_init(encoding, cp, sub_size, - option, version); - if (c->byte_array_len.val_codec == NULL) - goto no_codec; - cp += sub_size; - - if (cp - data != size) - goto malformed; - - return c; - - malformed: - fprintf(stderr, "Malformed byte_array_len header stream\n"); - no_codec: - free(c); - return NULL; -} - -int cram_byte_array_len_encode(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - int32_t i32 = in_size; - int r = 0; - - r |= c->e_byte_array_len.len_codec->encode(slice, - c->e_byte_array_len.len_codec, - (char *)&i32, 1); - r |= c->e_byte_array_len.val_codec->encode(slice, - c->e_byte_array_len.val_codec, - in, in_size); - return r; -} - -void cram_byte_array_len_encode_free(cram_codec *c) { - if (!c) - return; - - if (c->e_byte_array_len.len_codec) - c->e_byte_array_len.len_codec->free(c->e_byte_array_len.len_codec); - - if (c->e_byte_array_len.val_codec) - c->e_byte_array_len.val_codec->free(c->e_byte_array_len.val_codec); - - free(c); -} - -int cram_byte_array_len_encode_store(cram_codec *c, cram_block *b, - char *prefix, int version) { - int len = 0, len2, len3; - cram_codec *tc; - cram_block *b_len, *b_val; - - if (prefix) { - size_t l = strlen(prefix); - BLOCK_APPEND(b, prefix, l); - len += l; - } - - tc = c->e_byte_array_len.len_codec; - b_len = cram_new_block(0, 0); - len2 = tc->store(tc, b_len, NULL, version); - - tc = c->e_byte_array_len.val_codec; - b_val = cram_new_block(0, 0); - len3 = tc->store(tc, b_val, NULL, version); - - len += itf8_put_blk(b, c->codec); - len += itf8_put_blk(b, len2+len3); - BLOCK_APPEND(b, BLOCK_DATA(b_len), BLOCK_SIZE(b_len)); - BLOCK_APPEND(b, BLOCK_DATA(b_val), BLOCK_SIZE(b_val)); - - cram_free_block(b_len); - cram_free_block(b_val); - - return len + len2 + len3; -} - -cram_codec *cram_byte_array_len_encode_init(cram_stats *st, - enum cram_external_type option, - void *dat, - int version) { - cram_codec *c; - cram_byte_array_len_encoder *e = (cram_byte_array_len_encoder *)dat; - - c = malloc(sizeof(*c)); - if (!c) - return NULL; - c->codec = E_BYTE_ARRAY_LEN; - c->free = cram_byte_array_len_encode_free; - c->encode = cram_byte_array_len_encode; - c->store = cram_byte_array_len_encode_store; - - c->e_byte_array_len.len_codec = cram_encoder_init(e->len_encoding, - NULL, E_INT, - e->len_dat, - version); - c->e_byte_array_len.val_codec = cram_encoder_init(e->val_encoding, - NULL, E_BYTE_ARRAY, - e->val_dat, - version); - - return c; -} - -/* - * --------------------------------------------------------------------------- - * BYTE_ARRAY_STOP - */ -static int cram_byte_array_stop_decode_char(cram_slice *slice, cram_codec *c, - cram_block *in, char *out, - int *out_size) { - cram_block *b = NULL; - char *cp, ch; - - b = cram_get_block_by_id(slice, c->byte_array_stop.content_id); - if (!b) - return *out_size?-1:0; - - if (b->idx >= b->uncomp_size) - return -1; - - cp = (char *)b->data + b->idx; - while ((ch = *cp) != (char)c->byte_array_stop.stop) { - if (cp - (char *)b->data >= b->uncomp_size) - return -1; - *out++ = ch; - cp++; - } - - *out_size = cp - (char *)(b->data + b->idx); - b->idx = cp - (char *)b->data + 1; - - return 0; -} - -int cram_byte_array_stop_decode_block(cram_slice *slice, cram_codec *c, - cram_block *in, char *out_, - int *out_size) { - cram_block *b; - cram_block *out = (cram_block *)out_; - char *cp, *out_cp, *cp_end; - char stop; - - b = cram_get_block_by_id(slice, c->byte_array_stop.content_id); - if (!b) - return *out_size?-1:0; - - if (b->idx >= b->uncomp_size) - return -1; - cp = (char *)b->data + b->idx; - cp_end = (char *)b->data + b->uncomp_size; - out_cp = (char *)BLOCK_END(out); - - stop = c->byte_array_stop.stop; - if (cp_end - cp < out->alloc - out->byte) { - while (cp != cp_end && *cp != stop) - *out_cp++ = *cp++; - BLOCK_SIZE(out) = out_cp - (char *)BLOCK_DATA(out); - } else { - char *cp_start; - for (cp_start = cp; cp != cp_end && *cp != stop; cp++) - ; - BLOCK_APPEND(out, cp_start, cp - cp_start); - BLOCK_GROW(out, cp - cp_start); - } - - *out_size = cp - (char *)(b->data + b->idx); - b->idx = cp - (char *)b->data + 1; - - return 0; -} - -void cram_byte_array_stop_decode_free(cram_codec *c) { - if (!c) return; - - free(c); -} - -cram_codec *cram_byte_array_stop_decode_init(char *data, int size, - enum cram_external_type option, - int version) { - cram_codec *c; - unsigned char *cp = (unsigned char *)data; - - if (!(c = malloc(sizeof(*c)))) - return NULL; - - c->codec = E_BYTE_ARRAY_STOP; - switch (option) { - case E_BYTE_ARRAY_BLOCK: - c->decode = cram_byte_array_stop_decode_block; - break; - case E_BYTE_ARRAY: - c->decode = cram_byte_array_stop_decode_char; - break; - default: - fprintf(stderr, "byte_array_stop codec only supports BYTE_ARRAYs.\n"); - free(c); - return NULL; - } - c->free = cram_byte_array_stop_decode_free; - - c->byte_array_stop.stop = *cp++; - if (CRAM_MAJOR_VERS(version) == 1) { - c->byte_array_stop.content_id = cp[0] + (cp[1]<<8) + (cp[2]<<16) - + (cp[3]<<24); - cp += 4; - } else { - cp += itf8_get(cp, &c->byte_array_stop.content_id); - } - - if ((char *)cp - data != size) { - fprintf(stderr, "Malformed byte_array_stop header stream\n"); - free(c); - return NULL; - } - - return c; -} - -int cram_byte_array_stop_encode(cram_slice *slice, cram_codec *c, - char *in, int in_size) { - BLOCK_APPEND(c->out, in, in_size); - BLOCK_APPEND_CHAR(c->out, c->e_byte_array_stop.stop); - return 0; -} - -void cram_byte_array_stop_encode_free(cram_codec *c) { - if (!c) - return; - free(c); -} - -int cram_byte_array_stop_encode_store(cram_codec *c, cram_block *b, - char *prefix, int version) { - int len = 0; - char buf[20], *cp = buf; - - if (prefix) { - size_t l = strlen(prefix); - BLOCK_APPEND(b, prefix, l); - len += l; - } - - cp += itf8_put(cp, c->codec); - - if (CRAM_MAJOR_VERS(version) == 1) { - cp += itf8_put(cp, 5); - *cp++ = c->e_byte_array_stop.stop; - *cp++ = (c->e_byte_array_stop.content_id >> 0) & 0xff; - *cp++ = (c->e_byte_array_stop.content_id >> 8) & 0xff; - *cp++ = (c->e_byte_array_stop.content_id >> 16) & 0xff; - *cp++ = (c->e_byte_array_stop.content_id >> 24) & 0xff; - } else { - cp += itf8_put(cp, 1 + itf8_size(c->e_byte_array_stop.content_id)); - *cp++ = c->e_byte_array_stop.stop; - cp += itf8_put(cp, c->e_byte_array_stop.content_id); - } - - BLOCK_APPEND(b, buf, cp-buf); - len += cp-buf; - - return len; -} - -cram_codec *cram_byte_array_stop_encode_init(cram_stats *st, - enum cram_external_type option, - void *dat, - int version) { - cram_codec *c; - - c = malloc(sizeof(*c)); - if (!c) - return NULL; - c->codec = E_BYTE_ARRAY_STOP; - c->free = cram_byte_array_stop_encode_free; - c->encode = cram_byte_array_stop_encode; - c->store = cram_byte_array_stop_encode_store; - - c->e_byte_array_stop.stop = ((int *)dat)[0]; - c->e_byte_array_stop.content_id = ((int *)dat)[1]; - - return c; -} - -/* - * --------------------------------------------------------------------------- - */ - -char *cram_encoding2str(enum cram_encoding t) { - switch (t) { - case E_NULL: return "NULL"; - case E_EXTERNAL: return "EXTERNAL"; - case E_GOLOMB: return "GOLOMB"; - case E_HUFFMAN: return "HUFFMAN"; - case E_BYTE_ARRAY_LEN: return "BYTE_ARRAY_LEN"; - case E_BYTE_ARRAY_STOP: return "BYTE_ARRAY_STOP"; - case E_BETA: return "BETA"; - case E_SUBEXP: return "SUBEXP"; - case E_GOLOMB_RICE: return "GOLOMB_RICE"; - case E_GAMMA: return "GAMMA"; - case E_NUM_CODECS: - default: return "?"; - } -} - -static cram_codec *(*decode_init[])(char *data, - int size, - enum cram_external_type option, - int version) = { - NULL, - cram_external_decode_init, - NULL, - cram_huffman_decode_init, - cram_byte_array_len_decode_init, - cram_byte_array_stop_decode_init, - cram_beta_decode_init, - cram_subexp_decode_init, - NULL, - cram_gamma_decode_init, -}; - -cram_codec *cram_decoder_init(enum cram_encoding codec, - char *data, int size, - enum cram_external_type option, - int version) { - if (codec >= E_NULL && codec < E_NUM_CODECS && decode_init[codec]) { - return decode_init[codec](data, size, option, version); - } else { - fprintf(stderr, "Unimplemented codec of type %s\n", codec2str(codec)); - return NULL; - } -} - -static cram_codec *(*encode_init[])(cram_stats *stx, - enum cram_external_type option, - void *opt, - int version) = { - NULL, - cram_external_encode_init, - NULL, - cram_huffman_encode_init, - cram_byte_array_len_encode_init, - cram_byte_array_stop_encode_init, - cram_beta_encode_init, - NULL, //cram_subexp_encode_init, - NULL, - NULL, //cram_gamma_encode_init, -}; - -cram_codec *cram_encoder_init(enum cram_encoding codec, - cram_stats *st, - enum cram_external_type option, - void *dat, - int version) { - if (st && !st->nvals) - return NULL; - - if (encode_init[codec]) { - cram_codec *r; - if ((r = encode_init[codec](st, option, dat, version))) - r->out = NULL; - return r; - } else { - fprintf(stderr, "Unimplemented codec of type %s\n", codec2str(codec)); - abort(); - } -} - -/* - * Returns the content_id used by this codec, also in id2 if byte_array_len. - * Returns -1 for the CORE block and -2 for unneeded. - * id2 is only filled out for BYTE_ARRAY_LEN which uses 2 codecs. - */ -int cram_codec_to_id(cram_codec *c, int *id2) { - int bnum1, bnum2 = -2; - - switch (c->codec) { - case E_HUFFMAN: - bnum1 = c->huffman.ncodes == 1 ? -2 : -1; - break; - case E_GOLOMB: - case E_BETA: - case E_SUBEXP: - case E_GOLOMB_RICE: - case E_GAMMA: - bnum1 = -1; - break; - case E_EXTERNAL: - bnum1 = c->external.content_id; - break; - case E_BYTE_ARRAY_LEN: - bnum1 = cram_codec_to_id(c->byte_array_len.len_codec, NULL); - bnum2 = cram_codec_to_id(c->byte_array_len.val_codec, NULL); - break; - case E_BYTE_ARRAY_STOP: - bnum1 = c->byte_array_stop.content_id; - break; - case E_NULL: - bnum1 = -2; - break; - default: - fprintf(stderr, "Unknown codec type %d\n", c->codec); - bnum1 = -1; - } - - if (id2) - *id2 = bnum2; - return bnum1; -} - - -/* - * cram_codec structures are specialised for decoding or encoding. - * Unfortunately this makes turning a decoder into an encoder (such as - * when transcoding files) problematic. - * - * This function converts a cram decoder codec into an encoder version - * in-place (ie it modifiers the codec itself). - * - * Returns 0 on success; - * -1 on failure. - */ -int cram_codec_decoder2encoder(cram_fd *fd, cram_codec *c) { - int j; - - switch (c->codec) { - case E_EXTERNAL: - // shares struct with decode - c->free = cram_external_encode_free; - c->store = cram_external_encode_store; - if (c->decode == cram_external_decode_int) - c->encode = cram_external_encode_int; - else if (c->decode == cram_external_decode_char) - c->encode = cram_external_encode_char; - else - return -1; - break; - - case E_HUFFMAN: { - // New structure, so switch. - // FIXME: we huffman and e_huffman structs amended, we could - // unify this. - cram_codec *t = malloc(sizeof(*t)); - t->codec = E_HUFFMAN; - t->free = cram_huffman_encode_free; - t->store = cram_huffman_encode_store; - t->e_huffman.codes = c->huffman.codes; - t->e_huffman.nvals = c->huffman.ncodes; - for (j = 0; j < t->e_huffman.nvals; j++) { - int32_t sym = t->e_huffman.codes[j].symbol; - if (sym >= -1 && sym < MAX_HUFF) - t->e_huffman.val2code[sym+1] = j; - } - - if (c->decode == cram_huffman_decode_char0) - t->encode = cram_huffman_encode_char0; - else if (c->decode == cram_huffman_decode_char) - t->encode = cram_huffman_encode_char; - else if (c->decode == cram_huffman_decode_int0) - t->encode = cram_huffman_encode_int0; - else if (c->decode == cram_huffman_decode_int) - t->encode = cram_huffman_encode_int; - else { - free(t); - return -1; - } - *c = *t; - free(t); - break; - } - - case E_BETA: - // shares struct with decode - c->free = cram_beta_encode_free; - c->store = cram_beta_encode_store; - if (c->decode == cram_beta_decode_int) - c->encode = cram_beta_encode_int; - else if (c->decode == cram_beta_decode_char) - c->encode = cram_beta_encode_char; - else - return -1; - break; - - case E_BYTE_ARRAY_LEN: { - cram_codec *t = malloc(sizeof(*t)); - t->codec = E_BYTE_ARRAY_LEN; - t->free = cram_byte_array_len_encode_free; - t->store = cram_byte_array_len_encode_store; - t->encode = cram_byte_array_len_encode; - t->e_byte_array_len.len_codec = c->byte_array_len.len_codec; - t->e_byte_array_len.val_codec = c->byte_array_len.val_codec; - if (cram_codec_decoder2encoder(fd, t->e_byte_array_len.len_codec) == -1 || - cram_codec_decoder2encoder(fd, t->e_byte_array_len.val_codec) == -1) { - t->free(t); - return -1; - } - - // {len,val}_{encoding,dat} are undefined, but unused. - // Leaving them unset here means we can test that assertion. - *c = *t; - free(t); - break; - } - - case E_BYTE_ARRAY_STOP: - // shares struct with decode - c->free = cram_byte_array_stop_encode_free; - c->store = cram_byte_array_stop_encode_store; - c->encode = cram_byte_array_stop_encode; - break; - - default: - return -1; - } - - return 0; -}