mirror of
https://github.com/reactos/reactos.git
synced 2025-01-05 22:12:46 +00:00
1251 lines
37 KiB
C
1251 lines
37 KiB
C
/*
|
|
File lzx_layer.c, part of lzxcomp library
|
|
Copyright (C) 2002 Matthew T. Russotto
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
the Free Software Foundation; version 2.1 only
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <stdint.h>
|
|
#include <string.h> /* for memset on Linux */
|
|
#include <assert.h>
|
|
#include <math.h>
|
|
#include "lzx_config.h"
|
|
#ifdef NONSLIDE
|
|
#include "lz_nonslide.h"
|
|
#else
|
|
#include "hash_slide.h"
|
|
#include "lz_slide.h"
|
|
#endif
|
|
#include "lzx_compress.h"
|
|
#include "lzx_constants.h"
|
|
|
|
/* Debugging defines useful during development. All add diagnostic output
|
|
at various points in the system */
|
|
|
|
/*#define DEBUG_MATCHES *//* When matches come in from the LZ engine */
|
|
/*#define DEBUG_MATCHES_2 *//* When matches are being output */
|
|
/*#define DEBUG_HUFFMAN *//* When huffman trees are built */
|
|
/*#define DEBUG_ENTROPY *//* In entropy calculation */
|
|
/*#define DEBUG_LZ *//* Uncompressed input reconstructed from
|
|
LZ engine */
|
|
/*#define DEBUG_BITBUF *//* Raw output to upper layer */
|
|
/*#define DEBUG_EXTRA_BITS *//* Savings due to extra bits huffman tree */
|
|
/*#define DEBUG_POSITION_SLOT_LOOKUP */
|
|
/*#define DEBUG_TREE_COMPRESSION *//* During RLE compression of trees */
|
|
|
|
/* number of position slots given window_size-5 */
|
|
/* as corrected by Caie */
|
|
short num_position_slots[] = {30, 32, 34, 36, 38, 42, 50};
|
|
unsigned long position_base[51];
|
|
u_char extra_bits[52];
|
|
double rloge2;
|
|
|
|
typedef struct ih_elem {
|
|
int freq;
|
|
short sym;
|
|
short pathlength;
|
|
struct ih_elem *parent;
|
|
struct ih_elem *left;
|
|
struct ih_elem *right;
|
|
} ih_elem;
|
|
|
|
typedef struct h_elem {
|
|
int freq;
|
|
short sym;
|
|
short pathlength;
|
|
struct ih_elem *parent;
|
|
unsigned short code;
|
|
} h_elem;
|
|
|
|
typedef struct huff_entry {
|
|
short codelength;
|
|
unsigned short code;
|
|
} huff_entry;
|
|
|
|
static int cmp_leaves(const void *in_a, const void *in_b)
|
|
{
|
|
const struct h_elem *a = in_a;
|
|
const struct h_elem *b = in_b;
|
|
|
|
if (!a->freq && b->freq)
|
|
return 1;
|
|
if (a->freq && !b->freq)
|
|
return -1;
|
|
|
|
if (a->freq == b->freq)
|
|
return a->sym - b->sym;
|
|
|
|
return a->freq - b->freq;
|
|
}
|
|
|
|
static int
|
|
cmp_pathlengths(const void *in_a, const void *in_b)
|
|
{
|
|
const struct h_elem *a = in_a;
|
|
const struct h_elem *b = in_b;
|
|
|
|
if (a->pathlength == b->pathlength)
|
|
#if 0
|
|
return a->sym - b->sym;
|
|
#else
|
|
/* see note on canonical pathlengths */
|
|
return b->sym - a->sym;
|
|
#endif
|
|
return b->pathlength - a->pathlength;
|
|
}
|
|
|
|
/* standard huffman building algorithm */
|
|
static void
|
|
build_huffman_tree(int nelem, int max_code_length, int *freq, huff_entry *tree)
|
|
{
|
|
h_elem *leaves = malloc(nelem * sizeof(h_elem));
|
|
ih_elem *inodes;
|
|
ih_elem *next_inode;
|
|
ih_elem *cur_inode;
|
|
h_elem *cur_leaf;
|
|
int leaves_left;
|
|
int nleaves;
|
|
int pathlength;
|
|
unsigned short cur_code;
|
|
short codes_too_long = 0;
|
|
ih_elem *f1, *f2;
|
|
int i;
|
|
|
|
for (i = 0; i < nelem; i++) {
|
|
leaves[i].freq = freq[i];
|
|
leaves[i].sym = i;
|
|
leaves[i].pathlength = 0;
|
|
}
|
|
qsort(leaves, nelem, sizeof(h_elem), cmp_leaves);
|
|
for (leaves_left = 0; leaves_left < nelem; leaves_left++) {
|
|
#ifdef DEBUG_HUFFMAN
|
|
fprintf(stderr, "%3d: %3d '%c'\n", leaves_left, leaves[leaves_left].freq,
|
|
leaves[leaves_left].sym);
|
|
#endif
|
|
if (!leaves[leaves_left].freq) break;
|
|
}
|
|
nleaves = leaves_left;
|
|
|
|
if (nleaves >= 2) {
|
|
inodes = malloc((nelem-1) * sizeof(ih_elem));
|
|
do {
|
|
if (codes_too_long) {
|
|
for (leaves_left = 0; leaves_left < nelem; leaves_left++) {
|
|
if (!leaves[leaves_left].freq) break;
|
|
if (leaves[leaves_left].freq != 1) {
|
|
leaves[leaves_left].freq >>= 1;
|
|
codes_too_long = 0;
|
|
}
|
|
}
|
|
assert (!codes_too_long);
|
|
}
|
|
|
|
cur_leaf = leaves;
|
|
next_inode = cur_inode = inodes;
|
|
|
|
do {
|
|
f1 = f2 = NULL;
|
|
if (leaves_left &&
|
|
((cur_inode == next_inode) ||
|
|
(cur_leaf->freq <= cur_inode->freq))) {
|
|
f1 = (ih_elem *)cur_leaf++;
|
|
leaves_left--;
|
|
}
|
|
else if (cur_inode != next_inode) {
|
|
f1 = cur_inode++;
|
|
}
|
|
|
|
if (leaves_left &&
|
|
((cur_inode == next_inode) ||
|
|
(cur_leaf->freq <= cur_inode->freq))) {
|
|
f2 = (ih_elem *)cur_leaf++;
|
|
leaves_left--;
|
|
}
|
|
else if (cur_inode != next_inode) {
|
|
f2 = cur_inode++;
|
|
}
|
|
|
|
#ifdef DEBUG_HUFFMAN
|
|
fprintf(stderr, "%d %d\n", f1, f2);
|
|
#endif
|
|
if (f1 && f2) {
|
|
next_inode->freq = f1->freq + f2->freq;
|
|
next_inode->sym = -1;
|
|
next_inode->left = f1;
|
|
next_inode->right = f2;
|
|
next_inode->parent = NULL;
|
|
f1->parent = next_inode;
|
|
f2->parent = next_inode;
|
|
if (f1->pathlength > f2->pathlength)
|
|
next_inode->pathlength = f1->pathlength + 1;
|
|
else
|
|
next_inode->pathlength = f2->pathlength + 1;
|
|
if (next_inode->pathlength > max_code_length) {
|
|
codes_too_long = 1;
|
|
break;
|
|
}
|
|
next_inode++;
|
|
}
|
|
}
|
|
while (f1 && f2);
|
|
}
|
|
while (codes_too_long);
|
|
|
|
#ifdef DEBUG_HUFFMAN
|
|
cur_inode = inodes;
|
|
while (cur_inode < next_inode) {
|
|
fprintf(stderr, "%d l: %3d%c r: %3d%c freq: %8d\n",
|
|
cur_inode - inodes,
|
|
(cur_inode->left->sym!=-1)?(((struct h_elem *)cur_inode->left)-leaves):(cur_inode->left-inodes),
|
|
(cur_inode->left->sym!=-1)?'l':'i',
|
|
(cur_inode->right->sym!=-1)?(((struct h_elem *)cur_inode->right)-leaves):(cur_inode->right-inodes),
|
|
(cur_inode->right->sym!=-1)?'l':'i',
|
|
(cur_inode->freq)
|
|
);
|
|
cur_inode++;
|
|
}
|
|
#endif
|
|
|
|
/* now traverse tree depth-first */
|
|
cur_inode = next_inode - 1;
|
|
pathlength = 0;
|
|
cur_inode->pathlength = -1;
|
|
do {
|
|
/* precondition: at unmarked node*/
|
|
if (cur_inode->sym == -1) /*&& (cur_inode->left)*/ {
|
|
/* left node of unmarked node is unmarked */
|
|
cur_inode = cur_inode->left;
|
|
cur_inode->pathlength = -1;
|
|
pathlength++;
|
|
}
|
|
else {
|
|
/* mark node */
|
|
cur_inode->pathlength = pathlength;
|
|
#if 0
|
|
if (cur_inode->right) {
|
|
/* right node of previously unmarked node is unmarked */
|
|
cur_inode = cur_inode->right;
|
|
cur_inode->pathlength = -1;
|
|
pathlength++;
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
|
|
/* time to come up. Keep coming up until an unmarked node is reached */
|
|
/* or the tree is exhausted */
|
|
do {
|
|
cur_inode = cur_inode->parent;
|
|
pathlength--;
|
|
}
|
|
while (cur_inode && (cur_inode->pathlength != -1));
|
|
if (cur_inode) {
|
|
/* found unmarked node; mark it and go right */
|
|
cur_inode->pathlength = pathlength;
|
|
cur_inode = cur_inode->right;
|
|
cur_inode->pathlength = -1;
|
|
pathlength++;
|
|
/* would be complex if cur_inode could be null here. It can't */
|
|
}
|
|
}
|
|
}
|
|
}
|
|
while (cur_inode);
|
|
|
|
#ifdef DEBUG_HUFFMAN
|
|
cur_inode = inodes;
|
|
while (cur_inode < next_inode) {
|
|
fprintf(stderr, "%d l: %3d%c r: %3d%c freq: %8d pathlength %4d\n",
|
|
cur_inode - inodes,
|
|
(cur_inode->left->sym!=-1)?(((struct h_elem *)cur_inode->left)-leaves):(cur_inode->left-inodes),
|
|
(cur_inode->left->sym!=-1)?'l':'i',
|
|
(cur_inode->right->sym!=-1)?(((struct h_elem *)cur_inode->right)-leaves):(cur_inode->right-inodes),
|
|
(cur_inode->right->sym!=-1)?'l':'i',
|
|
(cur_inode->freq),
|
|
(cur_inode->pathlength)
|
|
);
|
|
cur_inode++;
|
|
}
|
|
#endif
|
|
free(inodes);
|
|
|
|
/* the pathlengths are already in order, so this sorts by symbol */
|
|
qsort(leaves, nelem, sizeof(h_elem), cmp_pathlengths);
|
|
|
|
/**
|
|
Microsoft's second condition on its canonical huffman codes is:
|
|
|
|
For each level, starting at the deepest level of the tree and then
|
|
moving upwards, leaf nodes must start as far left as possible. An
|
|
alternative way of stating this constraint is that if any tree node
|
|
has children then all tree nodes to the left of it with the same path
|
|
length must also have children.
|
|
|
|
These 'alternatives' are not equivalent. The latter alternative gives
|
|
the common canonical code where the longest code is all zeros. The former
|
|
gives an opposite code where the longest code is all ones. Microsoft uses the
|
|
former alternative.
|
|
**/
|
|
|
|
#if 0
|
|
pathlength = leaves[0].pathlength;
|
|
cur_code = 0;
|
|
for (i = 0; i < nleaves; i++) {
|
|
while (leaves[i].pathlength < pathlength) {
|
|
assert(!(cur_code & 1));
|
|
cur_code >>= 1;
|
|
pathlength--;
|
|
}
|
|
leaves[i].code = cur_code;
|
|
cur_code++;
|
|
}
|
|
#else
|
|
pathlength = leaves[nleaves-1].pathlength;
|
|
assert(leaves[0].pathlength <= 16); /* this method cannot deal with bigger codes, though
|
|
the other canonical method can in some cases
|
|
(because it starts with zeros ) */
|
|
cur_code = 0;
|
|
for (i = nleaves - 1; i >= 0; i--) {
|
|
while (leaves[i].pathlength > pathlength) {
|
|
cur_code <<= 1;
|
|
pathlength++;
|
|
}
|
|
leaves[i].code = cur_code;
|
|
cur_code++;
|
|
}
|
|
#endif
|
|
|
|
#ifdef DEBUG_HUFFMAN
|
|
for (i = 0; i < nleaves; i++) {
|
|
char code[18];
|
|
int j;
|
|
|
|
cur_code = leaves[i].code;
|
|
code[leaves[i].pathlength] = 0;
|
|
for (j = leaves[i].pathlength-1; j >= 0; j--) {
|
|
if (cur_code & 1) code[j] = '1';
|
|
else code[j] = '0';
|
|
cur_code >>= 1;
|
|
}
|
|
fprintf(stderr, "%3d: %3d %3d %-16.16s '%c'\n", i, leaves[i].freq, leaves[i].pathlength, code,
|
|
leaves[i].sym);
|
|
}
|
|
#endif
|
|
}
|
|
else if (nleaves == 1) {
|
|
/* 0 symbols is OK (not according to doc, but according to Caie) */
|
|
/* but if only one symbol is present, two symbols are required */
|
|
nleaves = 2;
|
|
leaves[0].pathlength = leaves[1].pathlength = 1;
|
|
if (leaves[1].sym > leaves[0].sym) {
|
|
leaves[1].code = 1;
|
|
leaves[0].code = 0;
|
|
}
|
|
else {
|
|
leaves[0].code = 1;
|
|
leaves[1].code = 0;
|
|
}
|
|
}
|
|
|
|
memset(tree, 0, nelem * sizeof(huff_entry));
|
|
for (i = 0; i < nleaves; i++) {
|
|
tree[leaves[i].sym].codelength = leaves[i].pathlength;
|
|
tree[leaves[i].sym].code = leaves[i].code;
|
|
}
|
|
|
|
free(leaves);
|
|
}
|
|
|
|
/* from Stuart Caie's code -- I'm hoping this code is too small to encumber
|
|
this file. If not, you could rip it out and hard-code the tables */
|
|
|
|
static void lzx_init_static(void)
|
|
{
|
|
int i, j;
|
|
|
|
if (extra_bits[49]) return;
|
|
|
|
rloge2 = 1.0/log(2);
|
|
for (i=0, j=0; i <= 50; i += 2) {
|
|
extra_bits[i] = extra_bits[i+1] = j; /* 0,0,0,0,1,1,2,2,3,3... */
|
|
if ((i != 0) && (j < 17)) j++; /* 0,0,1,2,3,4...15,16,17,17,17,17... */
|
|
}
|
|
|
|
for (i=0, j=0; i <= 50; i++) {
|
|
position_base[i] = j; /* 0,1,2,3,4,6,8,12,16,24,32,... */
|
|
j += 1 << extra_bits[i]; /* 1,1,1,1,2,2,4,4,8,8,16,16,32,32,... */
|
|
}
|
|
}
|
|
|
|
struct lzx_data
|
|
{
|
|
void *in_arg;
|
|
void *out_arg;
|
|
void *mark_frame_arg;
|
|
lzx_get_bytes_t get_bytes;
|
|
lzx_at_eof_t at_eof;
|
|
lzx_put_bytes_t put_bytes;
|
|
lzx_mark_frame_t mark_frame;
|
|
struct lz_info *lzi;
|
|
/* a 'frame' is an 0x8000 byte thing. Called that because otherwise
|
|
I'd confuse myself overloading 'block' */
|
|
int left_in_frame;
|
|
int left_in_block;
|
|
int R0, R1, R2;
|
|
int num_position_slots;
|
|
/* this is the LZX block size */
|
|
int block_size;
|
|
int *main_freq_table;
|
|
int length_freq_table[NUM_SECONDARY_LENGTHS];
|
|
int aligned_freq_table[LZX_ALIGNED_SIZE];
|
|
uint32_t *block_codes;
|
|
uint32_t *block_codesp;
|
|
huff_entry *main_tree;
|
|
huff_entry length_tree[NUM_SECONDARY_LENGTHS];
|
|
huff_entry aligned_tree[LZX_ALIGNED_SIZE];
|
|
int main_tree_size;
|
|
uint16_t bit_buf;
|
|
int bits_in_buf;
|
|
double main_entropy;
|
|
double last_ratio;
|
|
uint8_t *prev_main_treelengths;
|
|
uint8_t prev_length_treelengths[NUM_SECONDARY_LENGTHS];
|
|
uint32_t len_uncompressed_input;
|
|
uint32_t len_compressed_output;
|
|
short need_1bit_header;
|
|
short subdivide; /* 0 = don't subdivide, 1 = allowed, -1 = requested */
|
|
};
|
|
|
|
static int
|
|
lzx_get_chars(lz_info *lzi, int n, u_char *buf)
|
|
{
|
|
/* force lz compression to stop after every block */
|
|
int chars_read;
|
|
int chars_pad;
|
|
|
|
lzx_data *lzud = (lzx_data *)lzi->user_data;
|
|
#ifdef OLDFRAMING
|
|
if (lzud->subdivide < 0) return 0;
|
|
if (n > lzud->left_in_frame)
|
|
n = lzud->left_in_frame;
|
|
if (n > lzud->left_in_block)
|
|
n = lzud->left_in_block;
|
|
#endif
|
|
chars_read = lzud->get_bytes(lzud->in_arg, n, buf);
|
|
#ifdef OLDFRAMING
|
|
lzud->left_in_frame -= chars_read;
|
|
lzud->left_in_block -= chars_read;
|
|
#else
|
|
lzud->left_in_frame -= chars_read % LZX_FRAME_SIZE;
|
|
if (lzud->left_in_frame < 0)
|
|
lzud->left_in_frame += LZX_FRAME_SIZE;
|
|
#endif
|
|
if ((chars_read < n) && (lzud->left_in_frame)) {
|
|
chars_pad = n - chars_read;
|
|
if (chars_pad > lzud->left_in_frame) chars_pad = lzud->left_in_frame;
|
|
/* never emit a full frame of padding. This prevents silliness when
|
|
lzx_compress is called when at EOF but EOF not yet detected */
|
|
if (chars_pad == LZX_FRAME_SIZE) chars_pad = 0;
|
|
#ifdef OLDFRAMING
|
|
if (chars_pad > lzud->left_in_block) chars_pad = lzud->left_in_block;
|
|
#endif
|
|
memset(buf + chars_read, 0, chars_pad);
|
|
lzud->left_in_frame -= chars_pad;
|
|
#ifdef OLDFRAMING
|
|
lzud->left_in_block -= chars_pad;
|
|
#endif
|
|
chars_read += chars_pad;
|
|
}
|
|
return chars_read;
|
|
}
|
|
|
|
#ifdef NONSLIDE
|
|
static int find_match_at(lz_info *lzi, int loc, int match_len, int *match_locp)
|
|
{
|
|
u_char *matchb;
|
|
u_char *nmatchb;
|
|
u_char *c1, *c2;
|
|
int j;
|
|
|
|
if (-*match_locp == loc) return -1;
|
|
if (loc < match_len) return -1;
|
|
|
|
matchb = lzi->block_buf + lzi->block_loc + *match_locp;
|
|
nmatchb = lzi->block_buf + lzi->block_loc - loc;
|
|
c1 = matchb;
|
|
c2 = nmatchb;
|
|
for (j = 0; j < match_len; j++) {
|
|
if (*c1++ != *c2++) break;
|
|
}
|
|
if (j == match_len) {
|
|
#ifdef DEBUG_MATCHES
|
|
fprintf(stderr, "match found %d, old = %d new = %d len = %d\n", lzi->cur_loc, -*match_locp, loc, match_len);
|
|
#endif
|
|
*match_locp = -loc;
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
#else
|
|
static int find_match_at(lz_info *lzi, int loc, int match_len, int *match_locp)
|
|
{
|
|
u_char *matchb;
|
|
u_char *nmatchb;
|
|
u_char *c1, *c2;
|
|
int j;
|
|
|
|
if (-*match_locp == loc) return -1;
|
|
if (loc < match_len) return -1;
|
|
|
|
matchb = lzi->slide_bufp + *match_locp;
|
|
if (matchb < lzi->slide_buf) matchb += lzi->slide_buf_size;
|
|
nmatchb = lzi->slide_bufp - loc;
|
|
if (nmatchb < lzi->slide_buf) nmatchb += lzi->slide_buf_size;
|
|
c1 = matchb;
|
|
c2 = nmatchb;
|
|
for (j = 0; j < match_len; j++) {
|
|
if (*c1++ != *c2++) break;
|
|
if (c1 == lzi->slide_bufe) c1 = lzi->slide_buf;
|
|
if (c2 == lzi->slide_bufe) c2 = lzi->slide_buf;
|
|
}
|
|
if (j == match_len) {
|
|
#ifdef DEBUG_MATCHES
|
|
fprintf(stderr, "match found %d, old = %d new = %d len = %d\n", lzi->cur_loc, -*match_locp, loc, match_len);
|
|
#endif
|
|
*match_locp = -loc;
|
|
return 0;
|
|
}
|
|
return -1;
|
|
}
|
|
#endif
|
|
static void check_entropy(lzx_data *lzud, int main_index)
|
|
{
|
|
/* entropy = - sum_alphabet P(x) * log2 P(x) */
|
|
/* entropy = - sum_alphabet f(x)/N * log2 (f(x)/N) */
|
|
/* entropy = - 1/N sum_alphabet f(x) * (log2 f(x) - log2 N) */
|
|
/* entropy = - 1/N (sum_alphabet f(x) * log2 f(x)) - sum_alphabet f(x) log2 N */
|
|
/* entropy = - 1/N (sum_alphabet f(x) * log2 f(x)) - log2 N sum_alphabet f(x) */
|
|
/* entropy = - 1/N (sum_alphabet f(x) * log2 f(x)) - N * log2 N */
|
|
|
|
/* entropy = - 1/N ((sum_alphabet f(x) * log2 f(x) ) - N * log2 N) */
|
|
/* entropy = - 1/N ((sum_alphabet f(x) * ln f(x) * 1/ln 2) - N * ln N * 1/ln 2) */
|
|
/* entropy = 1/(N ln 2) (N * ln N - (sum_alphabet f(x) * ln f(x))) */
|
|
/* entropy = 1/(N ln 2) (N * ln N + (sum_alphabet -f(x) * ln f(x))) */
|
|
|
|
/* entropy = 1/(N ln 2) ( sum_alphabet ln N * f(x) + (sum_alphabet -f(x) * ln f(x))) */
|
|
/* entropy = 1/(N ln 2) ( sum_alphabet ln N * f(x) + (-f(x) * ln f(x))) */
|
|
/* entropy = -1/(N ln 2) ( sum_alphabet -ln N * f(x) + (f(x) * ln f(x))) */
|
|
/* entropy = -1/(N ln 2) ( sum_alphabet f(x)(- ln N + ln f(x))) */
|
|
/* entropy = -1/(N ln 2) ( sum_alphabet f(x)(ln f(x)/N)) */
|
|
/* entropy = -1/N ( sum_alphabet (1/(ln 2))f(x)(ln f(x)/N)) */
|
|
/* entropy = -1/N ( sum_alphabet f(x)(log2 f(x)/N)) */
|
|
/* entropy = - ( sum_alphabet f(x)/N(log2 f(x)/N)) */
|
|
/* entropy = - ( sum_alphabet P(x)(log2 P(x))) */
|
|
|
|
|
|
double freq;
|
|
double n_ln_n;
|
|
double rn_ln2;
|
|
double cur_ratio;
|
|
int n;
|
|
|
|
/* delete old entropy accumulation */
|
|
if (lzud->main_freq_table[main_index] != 1) {
|
|
freq = (double)lzud->main_freq_table[main_index]-1;
|
|
lzud->main_entropy += freq * log(freq);
|
|
}
|
|
/* add new entropy accumulation */
|
|
freq = (double)lzud->main_freq_table[main_index];
|
|
lzud->main_entropy -= freq * log(freq);
|
|
n = lzud->block_codesp - lzud->block_codes;
|
|
|
|
if (((n & 0xFFF) == 0) && (lzud->left_in_block >= 0x1000)) {
|
|
n_ln_n = (double)n * log((double)n);
|
|
rn_ln2 = rloge2 / (double)n;
|
|
cur_ratio = (n * rn_ln2 *(n_ln_n + lzud->main_entropy) + 24 + 3 * 80 + NUM_CHARS + (lzud->main_tree_size-NUM_CHARS)*3 + NUM_SECONDARY_LENGTHS ) / (double)n;
|
|
#ifdef DEBUG_ENTROPY
|
|
fprintf(stderr, "n = %d\n", n);
|
|
fprintf(stderr, "main entropy = %f\n", rn_ln2 *(n_ln_n + lzud->main_entropy) );
|
|
fprintf(stderr, "compression ratio (raw) = %f\n", 100.0 * rn_ln2 *(n_ln_n + lzud->main_entropy) /9.0 );
|
|
fprintf(stderr, "compression ratio (ovh) = %f\n", 100.0 * cur_ratio/9.0);
|
|
#endif
|
|
if (cur_ratio > lzud->last_ratio) {
|
|
#ifdef DEBUG_ENTROPY
|
|
fprintf(stderr, "resetting huffman tables at %d\n", n);
|
|
#endif
|
|
lzud->subdivide = -1;
|
|
lz_stop_compressing(lzud->lzi);
|
|
}
|
|
lzud->last_ratio = cur_ratio;
|
|
}
|
|
}
|
|
|
|
static int
|
|
lzx_output_match(lz_info *lzi, int match_pos, int match_len)
|
|
{
|
|
lzx_data *lzud = (lzx_data *)lzi->user_data;
|
|
uint32_t formatted_offset;
|
|
uint32_t position_footer;
|
|
uint8_t length_footer;
|
|
uint8_t length_header;
|
|
uint16_t len_pos_header;
|
|
int position_slot;
|
|
short btdt;
|
|
|
|
#ifdef DEBUG_LZ
|
|
{
|
|
int i;
|
|
int pos;
|
|
for (i = 0; i < match_len; i++) {
|
|
|
|
#ifdef NONSLIDE
|
|
pos = match_pos + lzi->block_loc + i;
|
|
fprintf(stderr, "%c", lzi->block_buf[pos]);
|
|
#else
|
|
pos = match_pos + lzi->front_offset + i;
|
|
if (pos > lzi->slide_buf_size)
|
|
pos -= lzi->slide_buf_size;
|
|
fprintf(stderr, "%c", lzi->slide_buf[pos]);
|
|
#endif
|
|
}
|
|
}
|
|
#endif
|
|
position_footer = 0;
|
|
btdt = 0;
|
|
testforr:
|
|
if (match_pos == -lzud->R0) {
|
|
match_pos = 0;
|
|
formatted_offset = 0;
|
|
position_slot = 0;
|
|
}
|
|
else if (match_pos == -lzud->R1) {
|
|
lzud->R1 = lzud->R0;
|
|
lzud->R0 = -match_pos;
|
|
match_pos = 1;
|
|
formatted_offset = 1;
|
|
position_slot = 1;
|
|
}
|
|
else if (match_pos == -lzud->R2) {
|
|
lzud->R2 = lzud->R0;
|
|
lzud->R0 = -match_pos;
|
|
match_pos = 2;
|
|
formatted_offset = 2;
|
|
position_slot = 2;
|
|
}
|
|
else {
|
|
if (!btdt) {
|
|
btdt = 1;
|
|
if (find_match_at(lzi, lzud->R0, match_len, &match_pos) == 0)
|
|
goto testforr;
|
|
if (find_match_at(lzi, lzud->R1, match_len, &match_pos) == 0)
|
|
goto testforr;
|
|
if (find_match_at(lzi, lzud->R2, match_len, &match_pos) == 0)
|
|
goto testforr;
|
|
}
|
|
|
|
formatted_offset = -match_pos + 2;
|
|
|
|
if ((match_len < 3) ||
|
|
((formatted_offset >= 64) && (match_len < 4)) ||
|
|
((formatted_offset >= 2048) && (match_len < 5)) ||
|
|
((formatted_offset >= 65536) && (match_len < 6))) {
|
|
/* reject matches where extra_bits will likely be bigger than just outputting
|
|
literals. The numbers are basically derived through guessing
|
|
and trial and error */
|
|
return -1; /* reject the match */
|
|
}
|
|
|
|
lzud->R2 = lzud->R1;
|
|
lzud->R1 = lzud->R0;
|
|
lzud->R0 = -match_pos;
|
|
|
|
/* calculate position base using binary search of table; if log2 can be
|
|
done in hardware, approximation might work;
|
|
trunc(log2(formatted_offset*formatted_offset)) gets either the proper
|
|
position slot or the next one, except for slots 0, 1, and 39-49
|
|
|
|
Slots 0-1 are handled by the R0-R1 procedures
|
|
|
|
Slots 36-49 (formatted_offset >= 262144) can be found by
|
|
(formatted_offset/131072) + 34 ==
|
|
(formatted_offset >> 17) + 34;
|
|
*/
|
|
if (formatted_offset >= 262144) {
|
|
position_slot = (formatted_offset >> 17) + 34;
|
|
}
|
|
else {
|
|
int left, right, mid;
|
|
|
|
left = 3;
|
|
right = lzud->num_position_slots - 1;
|
|
position_slot = -1;
|
|
while (left <= right) {
|
|
mid = (left + right)/2;
|
|
if ((position_base[mid] <= formatted_offset) &&
|
|
position_base[mid+1] > formatted_offset) {
|
|
position_slot = mid;
|
|
break;
|
|
}
|
|
#if 0
|
|
fprintf(stderr, "BEFORE: %06x %06x %06x %06x\n",
|
|
position_base[left], position_base[mid],
|
|
formatted_offset, position_base[right]);
|
|
#endif
|
|
if (formatted_offset > position_base[mid])
|
|
/* too low */
|
|
left = mid + 1;
|
|
else /* too high */
|
|
right = mid;
|
|
#if 0
|
|
fprintf(stderr, "AFTER : %06x %06x %06x %06x\n",
|
|
position_base[left], position_base[mid],
|
|
formatted_offset, position_base[right]);
|
|
#endif
|
|
}
|
|
#ifdef DEBUG_POSITION_SLOT_LOOKUP
|
|
if (position_slot < 0) {
|
|
fprintf(stderr, "lmr npr: %d %d %d %d\n", left, mid, right, lzud->num_position_slots);
|
|
fprintf(stderr, "AFTER : %07d %07d %07d %07d\n",
|
|
position_base[left], position_base[mid],
|
|
formatted_offset, position_base[right]);
|
|
fprintf(stderr, "(%d, %d, %d, %d, %d)\n", match_pos, match_len, formatted_offset, position_slot, position_footer);
|
|
}
|
|
#endif
|
|
assert(position_slot >= 0);
|
|
/* FIXME precalc extra_mask table */
|
|
}
|
|
position_footer = ((1UL << extra_bits[position_slot]) - 1) & formatted_offset;
|
|
}
|
|
#ifdef DEBUG_MATCHES
|
|
#ifdef NONSLIDE
|
|
fprintf(stderr, "(%08x, %d, %d, %d, %d, %d)\n", lzud->lzi->cur_loc , match_pos, match_len, formatted_offset, position_slot, position_footer);
|
|
#else
|
|
fprintf(stderr, "(%08x, %d, %d, %d, %d, %d)\n", lzud->lzi->cur_loc - lzud->lzi->chars_in_match , match_pos, match_len, formatted_offset, position_slot, position_footer);
|
|
#endif
|
|
#endif
|
|
/* match length = 8 bits */
|
|
/* position_slot = 6 bits */
|
|
/* position_footer = 17 bits */
|
|
/* total = 31 bits */
|
|
/* plus one to say whether it's a literal or not */
|
|
*lzud->block_codesp++ = 0x80000000 | /* bit 31 in intelligent bit ordering */
|
|
(position_slot << 25) | /* bits 30-25 */
|
|
(position_footer << 8) | /* bits 8-24 */
|
|
(match_len - MIN_MATCH); /* bits 0-7 */
|
|
|
|
if (match_len < (NUM_PRIMARY_LENGTHS + MIN_MATCH)) {
|
|
length_header = match_len - MIN_MATCH;
|
|
/* length_footer = 255; */ /* not necessary */
|
|
}
|
|
else {
|
|
length_header = NUM_PRIMARY_LENGTHS;
|
|
length_footer = match_len - (NUM_PRIMARY_LENGTHS + MIN_MATCH);
|
|
lzud->length_freq_table[length_footer]++;
|
|
}
|
|
len_pos_header = (position_slot << 3) | length_header;
|
|
lzud->main_freq_table[len_pos_header + NUM_CHARS]++;
|
|
if (extra_bits[position_slot] >= 3) {
|
|
lzud->aligned_freq_table[position_footer & 7]++;
|
|
}
|
|
#ifndef OLDFRAMING
|
|
lzud->left_in_block -= match_len;
|
|
#endif
|
|
if (lzud->subdivide)
|
|
check_entropy(lzud, len_pos_header + NUM_CHARS);
|
|
return 0; /* accept the match */
|
|
}
|
|
|
|
static void
|
|
lzx_output_literal(lz_info *lzi, u_char ch)
|
|
{
|
|
lzx_data *lzud = (lzx_data *)lzi->user_data;
|
|
|
|
#ifndef OLDFRAMING
|
|
lzud->left_in_block--;
|
|
#endif
|
|
*lzud->block_codesp++ = ch;
|
|
#ifdef DEBUG_LZ
|
|
fprintf(stderr, "%c", ch);
|
|
#endif
|
|
lzud->main_freq_table[ch]++;
|
|
if (lzud->subdivide)
|
|
check_entropy(lzud, ch);
|
|
}
|
|
|
|
static void lzx_write_bits(lzx_data *lzxd, int nbits, uint32_t bits)
|
|
{
|
|
int cur_bits;
|
|
int shift_bits;
|
|
int rshift_bits;
|
|
uint16_t mask_bits;
|
|
|
|
#ifdef DEBUG_BITBUF
|
|
fprintf(stderr, "WB: %2d %08x\n", nbits, bits);
|
|
#endif
|
|
cur_bits = lzxd->bits_in_buf;
|
|
while ((cur_bits + nbits) >= 16) {
|
|
shift_bits = 16 - cur_bits;
|
|
rshift_bits = nbits - shift_bits;
|
|
if (shift_bits == 16) {
|
|
lzxd->bit_buf = (bits>>rshift_bits) & 0xFFFF;
|
|
}
|
|
else {
|
|
mask_bits = (1U << shift_bits) - 1;
|
|
lzxd->bit_buf <<= shift_bits;
|
|
lzxd->bit_buf |= (bits>>rshift_bits) & mask_bits;
|
|
}
|
|
#ifdef DEBUG_BITBUF
|
|
fprintf(stderr, "WBB: %04x\n", lzxd->bit_buf);
|
|
#endif
|
|
#ifdef LZX_BIG_ENDIAN
|
|
lzxd->bit_buf = ((lzxd->bit_buf & 0xFF)<<8) | (lzxd->bit_buf >> 8);
|
|
#endif
|
|
lzxd->put_bytes(lzxd->out_arg, sizeof(lzxd->bit_buf), &lzxd->bit_buf);
|
|
lzxd->len_compressed_output += sizeof(lzxd->bit_buf);
|
|
lzxd->bit_buf = 0;
|
|
nbits -= shift_bits;
|
|
cur_bits = 0;
|
|
}
|
|
/* (cur_bits + nbits) < 16. If nbits = 0, we're done.
|
|
otherwise move bits in */
|
|
shift_bits = nbits;
|
|
mask_bits = (1U << shift_bits) - 1;
|
|
lzxd->bit_buf <<= shift_bits;
|
|
lzxd->bit_buf |= bits & mask_bits;
|
|
cur_bits += nbits;
|
|
|
|
#ifdef DEBUG_BITBUF
|
|
fprintf(stderr, "OBB: %2d %04x\n", cur_bits, lzxd->bit_buf);
|
|
#endif
|
|
lzxd->bits_in_buf = cur_bits;
|
|
}
|
|
|
|
static void lzx_align_output(lzx_data *lzxd)
|
|
{
|
|
if (lzxd->bits_in_buf) {
|
|
lzx_write_bits(lzxd, 16 - lzxd->bits_in_buf, 0);
|
|
}
|
|
if (lzxd->mark_frame)
|
|
lzxd->mark_frame(lzxd->mark_frame_arg, lzxd->len_uncompressed_input, lzxd->len_compressed_output);
|
|
}
|
|
|
|
static void
|
|
lzx_write_compressed_literals(lzx_data *lzxd, int block_type)
|
|
{
|
|
uint32_t *cursor = lzxd->block_codes;
|
|
uint32_t *endp = lzxd->block_codesp;
|
|
uint16_t position_slot;
|
|
uint32_t position_footer;
|
|
uint32_t match_len_m2; /* match length minus 2, which is MIN_MATCH */
|
|
uint32_t verbatim_bits;
|
|
uint32_t block_code;
|
|
uint16_t length_header;
|
|
uint16_t length_footer;
|
|
uint16_t len_pos_header;
|
|
huff_entry *huffe;
|
|
int frame_count = (lzxd->len_uncompressed_input % LZX_FRAME_SIZE);
|
|
|
|
lzxd->len_uncompressed_input -= frame_count; /* will be added back in later */
|
|
while (cursor < endp) {
|
|
block_code = *cursor++;
|
|
if (block_code & 0x80000000) {
|
|
/*
|
|
* 0x80000000 | bit 31 in intelligent bit ordering
|
|
* (position_slot << 25) | bits 30-25
|
|
* (position_footer << 8) | bits 8-24
|
|
* (match_len - MIN_MATCH); bits 0-7
|
|
*
|
|
*/
|
|
|
|
match_len_m2 = block_code & 0xFF; /* 8 bits */
|
|
position_footer = (block_code >> 8)& 0x1FFFF; /* 17 bits */
|
|
position_slot = (block_code >> 25) & 0x3F; /* 6 bits */
|
|
|
|
#ifdef DEBUG_MATCHES_2
|
|
fprintf(stderr, "%08x, %3d %2d %d\n", lzxd->len_uncompressed_input + frame_count, match_len_m2, position_slot, position_footer);
|
|
#endif
|
|
if (match_len_m2 < NUM_PRIMARY_LENGTHS) {
|
|
length_header = match_len_m2;
|
|
length_footer = 255; /* personal encoding for NULL */
|
|
}
|
|
else {
|
|
length_header = NUM_PRIMARY_LENGTHS;
|
|
length_footer = match_len_m2 - NUM_PRIMARY_LENGTHS;
|
|
}
|
|
len_pos_header = (position_slot << 3) | length_header;
|
|
huffe = &lzxd->main_tree[len_pos_header+NUM_CHARS];
|
|
lzx_write_bits(lzxd, huffe->codelength, huffe->code);
|
|
if (length_footer != 255) {
|
|
huffe = &lzxd->length_tree[length_footer];
|
|
lzx_write_bits(lzxd, huffe->codelength, huffe->code);
|
|
}
|
|
if ((block_type == LZX_ALIGNED_OFFSET_BLOCK) && (extra_bits[position_slot] >= 3)) {
|
|
/* aligned offset block and code */
|
|
verbatim_bits = position_footer >> 3;
|
|
lzx_write_bits(lzxd, extra_bits[position_slot] - 3, verbatim_bits);
|
|
huffe = &lzxd->aligned_tree[position_footer&7];
|
|
lzx_write_bits(lzxd, huffe->codelength, huffe->code);
|
|
}
|
|
else {
|
|
verbatim_bits = position_footer;
|
|
lzx_write_bits(lzxd, extra_bits[position_slot], verbatim_bits);
|
|
}
|
|
frame_count += match_len_m2 + 2;
|
|
}
|
|
else {
|
|
/* literal */
|
|
assert(block_code < NUM_CHARS);
|
|
huffe = &lzxd->main_tree[block_code];
|
|
lzx_write_bits(lzxd, huffe->codelength, huffe->code);
|
|
frame_count++;
|
|
}
|
|
if (frame_count == LZX_FRAME_SIZE) {
|
|
lzxd->len_uncompressed_input += frame_count;
|
|
lzx_align_output(lzxd);
|
|
frame_count = 0;
|
|
}
|
|
#ifdef DEBUG_MATCHES_2
|
|
if (frame_count > LZX_FRAME_SIZE) {
|
|
fprintf(stderr, "uncomp_len = %x, frame_count = %x, block_code = %08x, match_len_m2 = %d", lzxd->len_uncompressed_input, frame_count, block_code, match_len_m2);
|
|
}
|
|
#endif
|
|
assert (frame_count < LZX_FRAME_SIZE);
|
|
}
|
|
lzxd->len_uncompressed_input += frame_count;
|
|
}
|
|
|
|
static int
|
|
lzx_write_compressed_tree(struct lzx_data *lzxd,
|
|
struct huff_entry *tree, uint8_t *prevlengths,
|
|
int treesize)
|
|
{
|
|
u_char *codes;
|
|
u_char *runs;
|
|
int freqs[LZX_PRETREE_SIZE];
|
|
int cur_run;
|
|
int last_len;
|
|
huff_entry pretree[20];
|
|
u_char *codep;
|
|
u_char *codee;
|
|
u_char *runp;
|
|
int excess;
|
|
int i;
|
|
int cur_code;
|
|
|
|
codep = codes = malloc(treesize*sizeof(char));
|
|
runp = runs = malloc(treesize*sizeof(char));
|
|
memset(freqs, 0, sizeof(freqs));
|
|
cur_run = 1;
|
|
last_len = tree[0].codelength;
|
|
for (i = 1; i <= treesize; i++) {
|
|
if ((i == treesize) || (tree[i].codelength != last_len)) {
|
|
if (last_len == 0) {
|
|
while (cur_run >= 20) {
|
|
excess = cur_run - 20;
|
|
if (excess > 31) excess = 31;
|
|
*codep++ = 18;
|
|
*runp++ = excess;
|
|
cur_run -= excess + 20;
|
|
freqs[18]++;
|
|
}
|
|
while (cur_run >= 4) {
|
|
excess = cur_run - 4;
|
|
if (excess > 15) excess = 15;
|
|
*codep++ = 17;
|
|
*runp++ = excess;
|
|
cur_run -= excess + 4;
|
|
freqs[17]++;
|
|
}
|
|
while (cur_run > 0) {
|
|
*codep = prevlengths[i - cur_run];
|
|
freqs[*codep++]++;
|
|
*runp++ = 0; /* not necessary */
|
|
cur_run--;
|
|
}
|
|
}
|
|
else {
|
|
while (cur_run >= 4) {
|
|
if (cur_run == 4) excess = 0;
|
|
else excess = 1;
|
|
*codep++ = 19;
|
|
*runp++ = excess;
|
|
freqs[19]++;
|
|
/* right, MS lies again. Code is NOT
|
|
prev_len + len (mod 17), it's prev_len - len (mod 17)*/
|
|
*codep = prevlengths[i-cur_run] - last_len;
|
|
if (*codep > 16) *codep += 17;
|
|
freqs[*codep++]++;
|
|
*runp++ = 0; /* not necessary */
|
|
cur_run -= excess+4;
|
|
}
|
|
while (cur_run > 0) {
|
|
*codep = prevlengths[i-cur_run] - last_len;
|
|
if (*codep > 16) *codep += 17;
|
|
*runp++ = 0; /* not necessary */
|
|
cur_run--;
|
|
freqs[*codep++]++;
|
|
}
|
|
}
|
|
if (i != treesize)
|
|
last_len = tree[i].codelength;
|
|
cur_run = 0;
|
|
}
|
|
cur_run++;
|
|
}
|
|
codee = codep;
|
|
#ifdef DEBUG_TREE_COMPRESSION
|
|
*codep++ = 255;
|
|
*runp++ = 255;
|
|
fprintf(stderr, "num: len code run\n");
|
|
for (i = 0; i < treesize; i++) {
|
|
fprintf(stderr, "%3d: %2d %2d %2d\n", i, tree[i].codelength, codes[i], runs[i]);
|
|
}
|
|
#endif
|
|
/* now create the huffman table and write out the pretree */
|
|
build_huffman_tree(LZX_PRETREE_SIZE, 16, freqs, pretree);
|
|
for (i = 0; i < LZX_PRETREE_SIZE; i++) {
|
|
lzx_write_bits(lzxd, 4, pretree[i].codelength);
|
|
}
|
|
codep = codes;
|
|
runp = runs;
|
|
cur_run = 0;
|
|
while (codep < codee) {
|
|
cur_code = *codep++;
|
|
lzx_write_bits(lzxd, pretree[cur_code].codelength, pretree[cur_code].code);
|
|
if (cur_code == 17) {
|
|
cur_run += *runp + 4;
|
|
lzx_write_bits(lzxd, 4, *runp);
|
|
}
|
|
else if (cur_code == 18) {
|
|
cur_run += *runp + 20;
|
|
lzx_write_bits(lzxd, 5, *runp);
|
|
}
|
|
else if (cur_code == 19) {
|
|
cur_run += *runp + 4;
|
|
lzx_write_bits(lzxd, 1, *runp);
|
|
cur_code = *codep++;
|
|
lzx_write_bits(lzxd, pretree[cur_code].codelength, pretree[cur_code].code);
|
|
runp++;
|
|
}
|
|
else {
|
|
cur_run++;
|
|
}
|
|
runp++;
|
|
}
|
|
free(codes);
|
|
free(runs);
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
lzx_reset(lzx_data *lzxd)
|
|
{
|
|
lzxd->need_1bit_header = 1;
|
|
lzxd->R0 = lzxd->R1 = lzxd->R2 = 1;
|
|
memset(lzxd->prev_main_treelengths, 0, lzxd->main_tree_size * sizeof(uint8_t));
|
|
memset(lzxd->prev_length_treelengths, 0, NUM_SECONDARY_LENGTHS * sizeof(uint8_t));
|
|
lz_reset(lzxd->lzi);
|
|
}
|
|
|
|
int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide)
|
|
{
|
|
int i;
|
|
uint32_t written_sofar = 0;
|
|
int block_type;
|
|
long uncomp_bits;
|
|
long comp_bits;
|
|
long comp_bits_ovh;
|
|
long uncomp_length;
|
|
|
|
if ((lzxd->block_size != block_size) || (lzxd->block_codes == NULL)) {
|
|
if (lzxd->block_codes != NULL) free(lzxd->block_codes);
|
|
lzxd->block_size = block_size;
|
|
lzxd->block_codes = malloc(block_size * sizeof(uint32_t));
|
|
}
|
|
lzxd->subdivide = subdivide?1:0;
|
|
|
|
lzxd->left_in_block = block_size;
|
|
lzxd->left_in_frame = LZX_FRAME_SIZE;
|
|
lzxd->main_entropy = 0.0;
|
|
lzxd->last_ratio = 9999999.0;
|
|
lzxd->block_codesp = lzxd->block_codes;
|
|
|
|
memset(lzxd->length_freq_table, 0, NUM_SECONDARY_LENGTHS * sizeof(int));
|
|
memset(lzxd->main_freq_table, 0, lzxd->main_tree_size * sizeof(int));
|
|
memset(lzxd->aligned_freq_table, 0, LZX_ALIGNED_SIZE * sizeof(int));
|
|
do {
|
|
lz_compress(lzxd->lzi, lzxd->left_in_block);
|
|
if (lzxd->left_in_frame == 0)
|
|
lzxd->left_in_frame = LZX_FRAME_SIZE;
|
|
|
|
if ((lzxd->subdivide<0) || !lzxd->left_in_block ||
|
|
(!lz_left_to_process(lzxd->lzi) && lzxd->at_eof(lzxd->in_arg))) {
|
|
/* now one block is LZ-analyzed. */
|
|
/* time to write it out */
|
|
uncomp_length = lzxd->block_size - lzxd->left_in_block - written_sofar;
|
|
/* uncomp_length will sometimes be 0 when input length is
|
|
an exact multiple of frame size */
|
|
if (uncomp_length == 0)
|
|
continue;
|
|
if (lzxd->subdivide < 0) {
|
|
#ifdef DEBUG_ENTROPY
|
|
fprintf(stderr, "subdivided\n");
|
|
#endif
|
|
lzxd->subdivide = 1;
|
|
}
|
|
|
|
if (lzxd->need_1bit_header) {
|
|
/* one bit Intel preprocessing header */
|
|
/* always 0 because this implementation doesn't do Intel preprocessing */
|
|
lzx_write_bits(lzxd, 1, 0);
|
|
lzxd->need_1bit_header = 0;
|
|
}
|
|
|
|
/* handle extra bits */
|
|
uncomp_bits = comp_bits = 0;
|
|
build_huffman_tree(LZX_ALIGNED_SIZE, 7, lzxd->aligned_freq_table, lzxd->aligned_tree);
|
|
for (i = 0; i < LZX_ALIGNED_SIZE; i++) {
|
|
uncomp_bits += lzxd->aligned_freq_table[i]* 3;
|
|
comp_bits += lzxd->aligned_freq_table[i]* lzxd->aligned_tree[i].codelength;
|
|
}
|
|
comp_bits_ovh = comp_bits + LZX_ALIGNED_SIZE * 3;
|
|
if (comp_bits_ovh < uncomp_bits)
|
|
block_type = LZX_ALIGNED_OFFSET_BLOCK;
|
|
else
|
|
block_type = LZX_VERBATIM_BLOCK;
|
|
|
|
#ifdef DEBUG_EXTRA_BITS
|
|
fprintf(stderr, "Extra bits uncompressed: %5d compressed: %5d compressed w/overhead %5d gain/loss %5d\n", uncomp_bits, comp_bits, comp_bits_ovh, uncomp_bits - comp_bits_ovh);
|
|
#endif
|
|
|
|
/* block type */
|
|
lzx_write_bits(lzxd, 3, block_type);
|
|
/* uncompressed length */
|
|
lzx_write_bits(lzxd, 24, uncomp_length);
|
|
|
|
written_sofar = lzxd->block_size - lzxd->left_in_block;
|
|
|
|
/* now write out the aligned offset trees if present */
|
|
if (block_type == LZX_ALIGNED_OFFSET_BLOCK) {
|
|
for (i = 0; i < LZX_ALIGNED_SIZE; i++) {
|
|
lzx_write_bits(lzxd, 3, lzxd->aligned_tree[i].codelength);
|
|
}
|
|
}
|
|
/* end extra bits */
|
|
build_huffman_tree(lzxd->main_tree_size, LZX_MAX_CODE_LENGTH,
|
|
lzxd->main_freq_table, lzxd->main_tree);
|
|
build_huffman_tree(NUM_SECONDARY_LENGTHS, 16,
|
|
lzxd->length_freq_table, lzxd->length_tree);
|
|
|
|
|
|
|
|
/* now write the pre-tree and tree for main 1 */
|
|
lzx_write_compressed_tree(lzxd, lzxd->main_tree, lzxd->prev_main_treelengths, NUM_CHARS);
|
|
|
|
/* now write the pre-tree and tree for main 2*/
|
|
lzx_write_compressed_tree(lzxd, lzxd->main_tree + NUM_CHARS,
|
|
lzxd->prev_main_treelengths + NUM_CHARS,
|
|
lzxd->main_tree_size - NUM_CHARS);
|
|
|
|
/* now write the pre tree and tree for length */
|
|
lzx_write_compressed_tree(lzxd, lzxd->length_tree, lzxd->prev_length_treelengths,
|
|
NUM_SECONDARY_LENGTHS);
|
|
|
|
/* now write literals */
|
|
lzx_write_compressed_literals(lzxd, block_type);
|
|
|
|
/* copy treelengths somewhere safe to do delta compression */
|
|
for (i = 0; i < lzxd->main_tree_size; i++) {
|
|
lzxd->prev_main_treelengths[i] = lzxd->main_tree[i].codelength;
|
|
}
|
|
for (i = 0; i < NUM_SECONDARY_LENGTHS; i++) {
|
|
lzxd->prev_length_treelengths[i] = lzxd->length_tree[i].codelength;
|
|
}
|
|
lzxd->main_entropy = 0.0;
|
|
lzxd->last_ratio = 9999999.0;
|
|
lzxd->block_codesp = lzxd->block_codes;
|
|
|
|
memset(lzxd->length_freq_table, 0, NUM_SECONDARY_LENGTHS * sizeof(int));
|
|
memset(lzxd->main_freq_table, 0, lzxd->main_tree_size * sizeof(int));
|
|
memset(lzxd->aligned_freq_table, 0, LZX_ALIGNED_SIZE * sizeof(int));
|
|
}
|
|
}
|
|
while (lzxd->left_in_block && (lz_left_to_process(lzxd->lzi) || !lzxd->at_eof(lzxd->in_arg)));
|
|
return 0;
|
|
}
|
|
|
|
int lzx_init(struct lzx_data **lzxdp, int wsize_code,
|
|
lzx_get_bytes_t get_bytes, void *get_bytes_arg,
|
|
lzx_at_eof_t at_eof,
|
|
lzx_put_bytes_t put_bytes, void *put_bytes_arg,
|
|
lzx_mark_frame_t mark_frame, void *mark_frame_arg)
|
|
{
|
|
int wsize;
|
|
struct lzx_data *lzxd;
|
|
|
|
if ((wsize_code < 15) || (wsize_code > 21)) {
|
|
return -1;
|
|
}
|
|
lzx_init_static();
|
|
|
|
*lzxdp = lzxd = malloc(sizeof(*lzxd));
|
|
if (lzxd == 0)
|
|
return -2;
|
|
|
|
lzxd->in_arg = get_bytes_arg;
|
|
lzxd->out_arg = put_bytes_arg;
|
|
lzxd->mark_frame_arg = mark_frame_arg;
|
|
lzxd->get_bytes = get_bytes;
|
|
lzxd->put_bytes = put_bytes;
|
|
lzxd->at_eof = at_eof;
|
|
lzxd->mark_frame = mark_frame;
|
|
|
|
wsize = 1 << (wsize_code);
|
|
|
|
lzxd->bits_in_buf = 0;
|
|
lzxd->block_codes = NULL;
|
|
lzxd->num_position_slots = num_position_slots[wsize_code-15];
|
|
lzxd->main_tree_size = (NUM_CHARS + 8 * lzxd->num_position_slots);
|
|
|
|
lzxd->main_freq_table = malloc(sizeof(int) * lzxd->main_tree_size);
|
|
lzxd->main_tree = malloc(sizeof(huff_entry)* lzxd->main_tree_size);
|
|
lzxd->prev_main_treelengths = malloc(sizeof(uint8_t)*lzxd->main_tree_size);
|
|
|
|
lzxd->lzi = malloc(sizeof (*lzxd->lzi));
|
|
/* the -3 prevents matches at wsize, wsize-1, wsize-2, all of which are illegal */
|
|
lz_init(lzxd->lzi, wsize, wsize - 3, MAX_MATCH, MIN_MATCH, LZX_FRAME_SIZE,
|
|
lzx_get_chars, lzx_output_match, lzx_output_literal,lzxd);
|
|
lzxd->len_uncompressed_input = 0;
|
|
lzxd->len_compressed_output = 0;
|
|
lzx_reset(lzxd);
|
|
return 0;
|
|
}
|
|
|
|
int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr)
|
|
{
|
|
/* lzx_align_output(lzxd); Not needed as long as frame padding is in place */
|
|
if (lzxr) {
|
|
lzxr->len_compressed_output = lzxd->len_compressed_output;
|
|
lzxr->len_uncompressed_input = lzxd->len_uncompressed_input;
|
|
}
|
|
lz_release(lzxd->lzi);
|
|
free(lzxd->lzi);
|
|
free(lzxd->prev_main_treelengths);
|
|
free(lzxd->main_tree);
|
|
free(lzxd->main_freq_table);
|
|
free(lzxd);
|
|
return 0;
|
|
}
|
|
|