diff --git a/reactos/tools/CMakeLists.txt b/reactos/tools/CMakeLists.txt index 7c2981610ce..1c886e8061c 100644 --- a/reactos/tools/CMakeLists.txt +++ b/reactos/tools/CMakeLists.txt @@ -15,6 +15,7 @@ add_executable(utf16le utf16le/utf16le.cpp) add_subdirectory(cabman) add_subdirectory(cdmake) +add_subdirectory(hhpcomp) add_subdirectory(hpp) add_subdirectory(kbdtool) add_subdirectory(mkhive) diff --git a/reactos/tools/hhpcomp/CMakeLists.txt b/reactos/tools/hhpcomp/CMakeLists.txt new file mode 100644 index 00000000000..d86fed1d0f5 --- /dev/null +++ b/reactos/tools/hhpcomp/CMakeLists.txt @@ -0,0 +1,15 @@ + +list(APPEND SOURCE + hhpcomp.cpp + hhp_reader.cpp + utils.cpp + chmc/chmc.c + chmc/err.c + lzx_compress/lz_nonslide.c + lzx_compress/lzx_layer.c) + +# used by lzx_compress +add_definitions(-DNONSLIDE) + +add_executable(hhpcomp ${SOURCE}) +target_link_libraries(hhpcomp m) diff --git a/reactos/tools/hhpcomp/COPYING b/reactos/tools/hhpcomp/COPYING new file mode 100644 index 00000000000..354b21f9b18 --- /dev/null +++ b/reactos/tools/hhpcomp/COPYING @@ -0,0 +1,11 @@ +LICENSING CLEARIFICATION + +1. files unique to hhpcomp: LGPL version 2.1 or later +2. files borrowed from chmc: GPL version 3 or later +3. files borrowed from lzxcomp: LGPL version 2.1 only + +whole project: GPL version 3 or later (via implicit relicensing of 1. and 3.) + +copies of the respective license texts can be found in the top level directory + +see http://sourceforge.net/projects/chmc for unmodified sources of 2. and 3. diff --git a/reactos/tools/hhpcomp/chmc/chm.h b/reactos/tools/hhpcomp/chmc/chm.h new file mode 100644 index 00000000000..0c59feee358 --- /dev/null +++ b/reactos/tools/hhpcomp/chmc/chm.h @@ -0,0 +1,177 @@ +/* + + Copyright (C) 2010 Alex Andreotti + + This file is part of chmc. + + chmc is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + chmc is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with chmc. If not, see . + + NOTE this file is mainly based on chm_lib.c from chmLib by Jed Wing + http://www.jedrea.com/chmlib/ + +*/ +#ifndef CHMC_CHM_H +#define CHMC_CHM_H + +/* + * architecture specific defines + * + * Note: as soon as C99 is more widespread, the below defines should + * probably just use the C99 sized-int types. + * + * The following settings will probably work for many platforms. The sizes + * don't have to be exactly correct, but the types must accommodate at least as + * many bits as they specify. + */ + +/* i386, 32-bit, Windows */ +#ifdef WIN32 +typedef unsigned char UChar; +typedef __int16 Int16; +typedef unsigned __int16 UInt16; +typedef __int32 Int32; +typedef unsigned __int32 UInt32; +typedef __int64 Int64; +typedef unsigned __int64 UInt64; + +/* I386, 32-bit, non-Windows */ +/* Sparc */ +/* MIPS */ +/* PPC */ +#elif __i386__ || __sun || __sgi || __ppc__ +typedef unsigned char UChar; +typedef short Int16; +typedef unsigned short UInt16; +typedef long Int32; +typedef unsigned long UInt32; +typedef long long Int64; +typedef unsigned long long UInt64; + +/* x86-64 */ +/* Note that these may be appropriate for other 64-bit machines. */ +#elif __x86_64__ || __ia64__ +typedef unsigned char UChar; +typedef short Int16; +typedef unsigned short UInt16; +typedef int Int32; +typedef unsigned int UInt32; +typedef long Int64; +typedef unsigned long UInt64; + +#else + +/* yielding an error is preferable to yielding incorrect behavior */ +#error "Please define the sized types for your platform" +#endif + +/* GCC */ +#ifdef __GNUC__ +#define memcmp __builtin_memcmp +#define memset __builtin_memset +#define memcpy __builtin_memcpy +#define strlen __builtin_strlen +#endif + +#define _CHMC_ITSF_V3_LEN (0x60) +struct chmcItsfHeader { + char signature[4]; /* 0 (ITSF) */ + Int32 version; /* 4 */ + Int32 header_len; /* 8 */ + Int32 unknown_000c; /* c */ + UInt32 last_modified; /* 10 */ + UInt32 lang_id; /* 14 */ + UChar dir_uuid[16]; /* 18 */ + UChar stream_uuid[16]; /* 28 */ + UInt64 sect0_offset; /* 38 */ + UInt64 sect0_len; /* 40 */ + UInt64 dir_offset; /* 48 */ + UInt64 dir_len; /* 50 */ + UInt64 data_offset; /* 58 (Not present before V3) */ +}; /* __attribute__ ((aligned (1))); */ + +#define _CHMC_SECT0_LEN (0x18) +struct chmcSect0 { + Int32 unknown_0000; /* 0 */ + Int32 unknown_0004; /* 4 */ + UInt64 file_len; /* 8 */ + Int32 unknown_0010; /* 10 */ + Int32 unknown_0014; /* 14 */ +}; + +#define CHM_IDX_INTVL 2 + +/* structure of ITSP headers */ +#define _CHMC_ITSP_V1_LEN (0x54) +struct chmcItspHeader { + char signature[4]; /* 0 (ITSP) */ + Int32 version; /* 4 */ + Int32 header_len; /* 8 */ + Int32 unknown_000c; /* c */ + UInt32 block_len; /* 10 */ + Int32 blockidx_intvl; /* 14 */ + Int32 index_depth; /* 18 */ + Int32 index_root; /* 1c */ + Int32 index_head; /* 20 */ + Int32 index_last; /* 24 */ + Int32 unknown_0028; /* 28 */ + UInt32 num_blocks; /* 2c */ + UInt32 lang_id; /* 30 */ + UChar system_uuid[16]; /* 34 */ + UInt32 header_len2; /* 44 */ + UChar unknown_0048[12]; /* 48 */ +}; /* __attribute__ ((aligned (1))); */ + +/* structure of PMGL headers */ +#define _CHMC_PMGL_LEN (0x14) +struct chmcPmglHeader +{ + char signature[4]; /* 0 (PMGL) */ + UInt32 free_space; /* 4 */ + UInt32 unknown_0008; /* 8 */ + Int32 block_prev; /* c */ + Int32 block_next; /* 10 */ +}; /* __attribute__ ((aligned (1))); */ + +#define _CHMC_PMGI_LEN (0x08) +struct chmcPmgiHeader { + char signature[4]; /* 0 (PMGI) */ + UInt32 free_space; /* 4 */ +}; /* __attribute__ ((aligned (1))); */ + +/* structure of LZXC reset table */ +#define _CHMC_LZXC_RESETTABLE_V1_LEN (0x28) +struct chmcLzxcResetTable { + UInt32 version; + UInt32 block_count; + UInt32 entry_size; + UInt32 table_offset; + UInt64 uncompressed_len; + UInt64 compressed_len; + UInt64 block_len; +}; /* __attribute__ ((aligned (1))); */ + +/* structure of LZXC control data block */ +#define _CHMC_LZXC_MIN_LEN (0x18) +#define _CHMC_LZXC_V2_LEN (0x1c) +struct chmcLzxcControlData { + UInt32 size; /* 0 */ + char signature[4]; /* 4 (LZXC) */ + UInt32 version; /* 8 */ + UInt32 resetInterval; /* c */ + UInt32 windowSize; /* 10 */ + UInt32 windowsPerReset; /* 14 */ + UInt32 unknown_18; /* 18 */ +}; + +#endif /* CHMC_CHM_H */ diff --git a/reactos/tools/hhpcomp/chmc/chmc.c b/reactos/tools/hhpcomp/chmc/chmc.c new file mode 100644 index 00000000000..cf1c83baf64 --- /dev/null +++ b/reactos/tools/hhpcomp/chmc/chmc.c @@ -0,0 +1,1659 @@ +/* + + Copyright(C) 2010 Alex Andreotti + + This file is part of chmc. + + chmc is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + chmc is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with chmc. If not, see . + +*/ +#include "chmc.h" + +#include + +#include +#include +#include +#include +#include "err.h" + + +#include "encint.h" + +#include +#include "../lzx_compress/lzx_config.h" +#include "../lzx_compress/lzx_compress.h" + +#define PACKAGE_STRING "hhpcomp development version" + +int chmc_section_add(struct chmcFile *chm, const char *name); +struct chmcSection * chmc_section_create(struct chmcFile *chm, + const char *name); +void chmc_reset_table_init(struct chmcLzxcResetTable *reset_table); +void chmc_control_data_init(struct chmcLzxcControlData *control_data); +int chmc_namelist_create(struct chmcFile *chm, int len); +struct chmcTreeNode * chmc_add_meta(struct chmcFile *chm, + const char *metaname, int sect_id, + UChar *buf, UInt64 len); +struct chmcTreeNode *chmc_add_entry(struct chmcFile *chm, const char *name, + UInt16 prefixlen, int sect_id, + UChar *buf, UInt64 offset, UInt64 len); +void chmc_sections_free(struct chmcFile *chm); +void chmc_section_destroy(struct chmcSection *section); +void chmc_pmgi_free(struct chmcFile *chm); +void chmc_pmgl_free(struct chmcFile *chm); +void chmc_pmgl_destroy(struct chmcPmglChunkNode *node); +void chmc_pmgi_destroy(struct chmcPmgiChunkNode *node); +void chmc_entries_free(struct chmcFile *chm); +void chmc_entry_destroy(struct chmcTreeNode *node); +int chmc_add_tree(struct chmcFile *chm, const char *dir); +static int _add_tree_file( struct dir_tree_global *dtg, + struct dir_tree_local *dtl ); +static int _add_tree_dir(struct dir_tree_global *dtg, + struct dir_tree_local *dtl); +struct chmcTreeNode *chmc_add_file(struct chmcFile *chm, const char *filename, + UInt16 prefixlen, int sect_id, UChar *buf, + UInt64 len); +struct chmcTreeNode *chmc_add_dir(struct chmcFile *chm, const char *dir); +struct chmcTreeNode *chmc_add_empty(struct chmcFile *chm, const char *file); + +int chmc_crunch_lzx(struct chmcFile *chm, int sect_id); +static int _lzx_at_eof(void *arg); +static int _lzx_put_bytes(void *arg, int n, void *buf); +static void _lzx_mark_frame(void *arg, uint32_t uncomp, uint32_t comp); +static int _lzx_get_bytes(void *arg, int n, void *buf); + +int chmc_compressed_add_mark(struct chmcFile *chm, UInt64 at); +int chmc_control_data_done(struct chmcFile *chm); +int chmc_reset_table_done(struct chmcFile *chm); +void chmc_pmgl_done(struct chmcFile *chm); + +void chmc_entries_qsort(struct chmcFile *chm); +static int _entry_cmp(struct chmcTreeNode **pa, struct chmcTreeNode **pb); + +struct chmcSection *chmc_section_lookup(struct chmcFile *chm, int id); + +struct chmcPmglChunkNode *chmc_pmgl_create(void); +void chmc_pmgl_add(struct chmcFile *chm, struct chmcPmglChunkNode *pmgl); +void chmc_pmgl_init(struct chmcPmglChunkNode *node); +int chmc_pmgi_add_entry(struct chmcFile *chm, const char *name, int pmgl_id); +void chmc_pmgi_add(struct chmcFile *chm, struct chmcPmgiChunkNode *pmgi); +void chmc_string_init(struct chmcStringChunk *node); + +struct chmcLzxInfo +{ + struct chmcFile *chm; + struct chmcSection *section; + int fd; + UInt32 fd_offset; + UInt32 done; + UInt32 todo; + struct list_head *pos; + int error; + int eof; +}; + +static const short chmc_transform_list[] = { + 0x7b, 0x37, 0x46, 0x43, 0x32, 0x38, 0x39, + 0x34, 0x30, 0x2d, 0x39, 0x44, 0x33, 0x31, + 0x2d, 0x31, 0x31, 0x44, 0x30 }; + +int chmc_init(struct chmcFile *chm, const char *filename, + struct chmcConfig *config) +{ + struct chmcItsfHeader *itsf = &chm->itsf; + struct chmcSect0 *sect0 = &chm->sect0; + struct chmcItspHeader *itsp = &chm->itsp; + struct chmcSystem *system = &chm->system; + struct chmcSystemInfo *sysinfo = &chm->system.info; + struct chmcIndexHeader *idxhdr = &chm->idxhdr; + + assert(chm); + assert(filename); + + chmcerr_clean(); + + memset(chm, 0, sizeof(struct chmcFile)); + + chm->config = config; + + if (strcmp(filename, "-") != 0) { + chm->fd = creat(filename, 0644); + if (chm->fd < 0) { + chmcerr_set(errno, strerror(errno)); + chmcerr_return_msg("creat file '%s'", filename); + } + } else { + chm->fd = fileno(stdout); + } + + memcpy(itsf->signature, "ITSF", 4); + itsf->version = 3; + itsf->header_len = _CHMC_ITSF_V3_LEN; + itsf->unknown_000c = 1; + + itsf->lang_id = chm->config->language; + memcpy(itsf->dir_uuid, CHMC_DIR_UUID, 16); + memcpy(itsf->stream_uuid, CHMC_STREAM_UUID, 16); + itsf->dir_offset = _CHMC_ITSF_V3_LEN + _CHMC_SECT0_LEN; + + itsf->sect0_offset = _CHMC_ITSF_V3_LEN; + itsf->sect0_len = _CHMC_SECT0_LEN; + + sect0->file_len = _CHMC_ITSF_V3_LEN + + _CHMC_SECT0_LEN + + _CHMC_ITSP_V1_LEN; + + sect0->unknown_0000 = 510; + + memcpy(itsp->signature, "ITSP", 4); + itsp->version = 1; + itsp->header_len = _CHMC_ITSP_V1_LEN; + itsp->unknown_000c = 10; + itsp->block_len = _CHMC_CHUNK_LEN; + itsp->blockidx_intvl = CHM_IDX_INTVL; + itsp->index_depth = 2; + + itsp->unknown_0028 = -1; + itsp->lang_id = CHMC_MS_LCID_EN_US; + memcpy(itsp->system_uuid, CHMC_SYSTEM_UUID, 16); + itsp->header_len2 = _CHMC_ITSP_V1_LEN; + memset(itsp->unknown_0048, -1, 12); + + system->version = 3; + system->_size = _CHMC_SYSTEM_HDR_LEN + sizeof(struct chmcIndexHeader); + + sysinfo->lcid = CHMC_MS_LCID_EN_US; + + memcpy(idxhdr->signature, "T#SM", 4); + idxhdr->unknown_4 = 28582569; // FIXME got from some chm + idxhdr->unknown_8 = 1; + // idxhdr->full_search = 1; + // idxhdr->klinks = 1; + // idxhdr->alinks = 0; + // idxhdr->timestamp = ???; + + // idxhdr->num_of_topic = 2; // sorry?? + idxhdr->off_img_list = -1; + // idxhdr->img_type_folder; + idxhdr->background = -1; + idxhdr->foreground = -1; + idxhdr->off_font = -1; + idxhdr->win_style = -1; + idxhdr->ex_win_style = -1; + idxhdr->unknown_34 = -1; + idxhdr->off_frame_name = -1; + idxhdr->off_win_name = -1; + // idxhdr->num_of_info; + idxhdr->unknown_44 = 1; + // idxhdr->num_of_merge_files; + // idxhdr->unknown_4c; + + INIT_LIST_HEAD(&chm->sections_list); + INIT_LIST_HEAD(&chm->pmgl_list); + INIT_LIST_HEAD(&chm->entries_list); + INIT_LIST_HEAD(&chm->pmgi_list); + + chm->strings = malloc(4096); + memset(chm->strings, 0, 4096); + chm->strings_len = 4096; + chm->strings_offset = 1; + + if (chmc_section_add(chm, "Uncompressed") != CHMC_NOERR) + chmcerr_return_msg("adding section: Uncompressed"); + + if (chmc_section_add(chm, "MSCompressed") != CHMC_NOERR) + chmcerr_return_msg("adding section: MSCompressed"); + + chmc_sections_done(chm); + + return CHMC_NOERR; +} + +int chmc_section_add(struct chmcFile *chm, const char *name) +{ + struct chmcSection *section; + + assert(chm); + assert(name); + + section = chmc_section_create(chm, name); + if (!section) + return chmcerr_code(); + + list_add_tail(§ion->list, &chm->sections_list); + chm->sections_num++; + + return CHMC_NOERR; +} + +struct chmcSection *chmc_section_create(struct chmcFile *chm, + const char *name) +{ + struct chmcSection *section; + + assert(name); + + section = calloc(1, sizeof(struct chmcSection)); + if (section) { + const char *tmpdir; + int len; + + len = strlen(name); + memcpy(section->name, name, len + 1); + section->offset = 0; + section->len = 0; + + tmpdir = NULL; + if (chm->config != NULL) + tmpdir = chm->config->tmpdir; + if (tmpdir == NULL) + tmpdir = "/tmp/"; + + len = strlen(tmpdir); + if (len >= PATH_MAX - 12) { + chmcerr_set(errno, strerror(errno)); + chmcerr_msg("tmpdir too long: '%s'", tmpdir); + goto fail; + } + + strcat(section->filename, tmpdir); + if (section->filename[len - 1] != '/') + strcat(section->filename, "/"); + + if (strcmp("MSCompressed", name) == 0) + strcat(section->filename, "chmcCXXXXXX"); + else + strcat(section->filename, "chmcUXXXXXX"); + + section->fd = mkstemp(section->filename); + fprintf(stderr, "temp file: %s\n", section->filename); + if (section->fd < 0) { + chmcerr_set(errno, strerror(errno)); + chmcerr_msg("creat() file '%s'", section->filename); + goto fail; + } + else if (strcmp(section->name, "MSCompressed") == 0) { + chmc_reset_table_init(§ion->reset_table_header); + chmc_control_data_init(§ion->control_data); + INIT_LIST_HEAD(§ion->mark_list); + section->mark_count = 0; + } + } else { + chmcerr_set(errno, strerror(errno)); + chmcerr_msg("section '%s' allocation failed", name); + } + + return section; + + fail: + free(section); + return NULL; +} + +void chmc_reset_table_init(struct chmcLzxcResetTable *reset_table) +{ + reset_table->version = 2; + reset_table->block_count = 0; + reset_table->entry_size = 8; + reset_table->table_offset = _CHMC_LZXC_RESETTABLE_V1_LEN; + reset_table->uncompressed_len = 0; + reset_table->compressed_len = 0; + reset_table->block_len = 0x8000; +} + +void chmc_control_data_init(struct chmcLzxcControlData *control_data) +{ + control_data->size = 6; + memcpy(control_data->signature, "LZXC", 4); + control_data->version = 2; + control_data->resetInterval = 2; + control_data->windowSize = 2; + control_data->windowsPerReset = 1; + control_data->unknown_18 = 0; +} + +void chmc_sections_done(struct chmcFile *chm) +{ + int len; + int i; + + assert(chm); + + chm->sections = malloc(sizeof(struct chmcSection *) * chm->sections_num); + if (chm->sections) { + struct chmcSection *section; + struct list_head *pos; + + i = 0; + len = 4; + list_for_each(pos, &chm->sections_list) { + section = list_entry(pos, struct chmcSection, list); + len += 4 + strlen(section->name) * 2; + chm->sections[i++] = section; + } + chmc_namelist_create(chm, len); + } else + BUG_ON("FIXME: %s: %d\n", __FILE__, __LINE__); +} + +int chmc_namelist_create(struct chmcFile *chm, int len) +{ + UInt16 *namelist; + + namelist = malloc(len); + if (namelist) { + struct chmcSection *section; + int i, j, k, name_len; + + k = 0; + namelist[k++] = len >> 1; + namelist[k++] = chm->sections_num; + for( i=0; i < chm->sections_num; i++ ) { + section = chm->sections[i]; + + name_len = strlen(section->name); + namelist[k++] = name_len; + for( j=0; j < name_len; j++ ) + namelist[k++] = section->name[j]; + namelist[k++] = 0; + } + chmc_add_meta(chm, "::DataSpace/NameList", 0, (UChar *)namelist, len); + } + else + return CHMC_ENOMEM; + + return CHMC_NOERR; +} + +struct chmcTreeNode *chmc_add_empty(struct chmcFile *chm, const char *file) +{ + assert(chm); + return chmc_add_entry(chm, file, 0, 0, NULL, 0, 0); +} + +struct chmcTreeNode *chmc_add_meta(struct chmcFile *chm, const char *metaname, + int sect_id, + UChar *buf, UInt64 len) +{ + struct chmcSection *section; + struct chmcTreeNode *node; + + assert(chm); + + if (sect_id >= chm->sections_num) + return NULL; + + section = chm->sections[sect_id]; + + node = chmc_add_entry(chm, metaname, 0, sect_id, buf, section->offset, len); + + if ((node) && (len > 0)) + section->offset += len; + + return node; +} + +struct chmcTreeNode *chmc_add_entry(struct chmcFile *chm, const char *name, + UInt16 prefixlen, int sect_id, UChar *buf, + UInt64 offset, UInt64 len) +{ + struct chmcTreeNode *node; + + assert(chm); + + if (sect_id >= (chm->sections_num)) { + fprintf(stderr,"sect_id %d >= chm->sections_num %d\n", + sect_id, chm->sections_num); + return NULL; + } + + node = malloc(sizeof(struct chmcTreeNode)); + if (node) { + node->flags = 0; + node->name = strdup( name ); + node->prefixlen = prefixlen; + node->sect_id = sect_id; + node->buf = buf; + node->offset = offset; + node->len = len; + list_add_tail(&node->list, &chm->entries_list); + chm->entries_num++; + } + else + BUG_ON("FIXME: %s: %d\n", __FILE__, __LINE__); + + return node; +} + +void chmc_term(struct chmcFile *chm) +{ + assert(chm); + assert(chm->fd > -1); + + free(chm->strings); + + chmc_entries_free(chm); + chmc_pmgl_free(chm); + chmc_pmgi_free(chm); + if (chm->sections) + free(chm->sections); + chmc_sections_free(chm); + + if (chm->fd != fileno(stdout)) + close(chm->fd); +} + +void chmc_sections_free(struct chmcFile *chm) +{ + struct chmcSection *section; + struct list_head *pos, *q; + + assert(chm); + + list_for_each_safe(pos, q, &chm->sections_list) { + section = list_entry(pos, struct chmcSection, list); + list_del(pos); + chmc_section_destroy(section); + } +} + +void chmc_section_destroy(struct chmcSection *section) +{ + assert(section); + assert(section->fd > -1); + + if (strcmp(section->name, "MSCompressed") == 0) { + struct list_head *pos, *q; + struct chmcResetTableMark *mark; + + list_for_each_safe(pos, q, §ion->mark_list) { + mark = list_entry(pos, struct chmcResetTableMark, list); + list_del(pos); + free(mark); + } + } + + close(section->fd); + unlink(section->filename); + free(section); +} + +void chmc_pmgi_free(struct chmcFile *chm) +{ + struct chmcPmgiChunkNode *node; + struct list_head *pos, *q; + + assert(chm); + + list_for_each_safe(pos, q, &chm->pmgi_list) { + node = list_entry(pos, struct chmcPmgiChunkNode, list); + list_del(pos); + chmc_pmgi_destroy(node); + } +} + +void chmc_pmgl_free(struct chmcFile *chm) +{ + struct chmcPmglChunkNode *node; + struct list_head *pos, *q; + + assert(chm); + + list_for_each_safe(pos, q, &chm->pmgl_list) { + node = list_entry(pos, struct chmcPmglChunkNode, list); + list_del(pos); + chmc_pmgl_destroy(node); + } +} + +void chmc_entries_free( struct chmcFile *chm ) +{ + struct chmcTreeNode *node; + struct list_head *pos, *q; + + assert(chm); + + list_for_each_safe(pos, q, &chm->entries_list) { + node = list_entry(pos, struct chmcTreeNode, list); + list_del(pos); + chmc_entry_destroy(node); + } + + free(chm->sort_entries); +} + +UInt32 chmc_strings_add( struct chmcFile *chm, const char *s) +{ + UInt32 len, off; + + /* FIXME null are errors */ + + if (!s || *s == '\0') + return 0; + + len = strlen(s); + + off = chm->strings_offset; + + if (off + len + 1 < chm->strings_len) { + + memcpy(&chm->strings[off], s, len + 1); + chm->strings_offset += len + 1; + + } else { + /* realloc strings */ + /* if the string truncate copy til end of chunk + then re-copy from 0 of new */ + BUG_ON("FIXME: %s: %d: handle more chunk for strings\n", + __FILE__, __LINE__); + } + + return off; +} + +void chmc_entry_destroy( struct chmcTreeNode *node ) +{ + assert(node); + assert(node->name); + + free(node->name); + if (node->buf && !(node->flags & CHMC_TNFL_STATIC)) + free(node->buf); + free(node); +} + +struct chmcTreeNode *chmc_add_file(struct chmcFile *chm, const char *filename, + UInt16 prefixlen, int sect_id, UChar *buf, + UInt64 len) +{ + struct chmcSection *section; + struct chmcTreeNode *node; + + assert(chm); + + if (sect_id >= chm->sections_num) + return NULL; + + section = chm->sections[sect_id]; + + node = chmc_add_entry(chm, filename, prefixlen, sect_id, NULL, + section->offset, len); + + if ((node) && (len > 0)) + section->offset += len; + + return node; +} + +struct chmcTreeNode *chmc_add_dir(struct chmcFile *chm, const char *dir) +{ + assert(chm); + + return chmc_add_entry(chm, dir, 0, 0, NULL, 0, 0); +} + +static inline void *chmc_syscat_mem(void *d, void *s, unsigned long len) +{ + memcpy(d, s, len); + + return d + len; +} + +static void *chmc_syscat_entry(Int16 code, void *d, void *s, Int16 len) +{ + d = chmc_syscat_mem(d, &code, 2); + d = chmc_syscat_mem(d, &len, 2); + + return chmc_syscat_mem(d, s, len); +} + +/* #define DEFAULT_TOPIC "index.htm" */ +/* #define TITLE "hello world" */ +/* #define LCASEFILE "test" */ + +int chmc_system_done(struct chmcFile *chm) +{ + struct chmcSystem *system; + struct chmcSystemInfo *sysinfo; + struct chmcIndexHeader *idxhdr; + void *sysp, *p; + + assert(chm); + + system = &chm->system; + sysinfo = &system->info; + idxhdr = &chm->idxhdr; + + // TODO should be set from application + // system->_size += (_CHMC_SYS_ENTRY_HDR_LEN + sizeof(UInt32)) /* timestamp */ + // + (_CHMC_SYS_ENTRY_HDR_LEN + sizeof(PACKAGE_STRING)) /* compiler */ + // + (_CHMC_SYS_ENTRY_HDR_LEN + sizeof(UInt32)) /* eof */ + // + (_CHMC_SYS_ENTRY_HDR_LEN + sizeof(DEFAULT_TOPIC)) + // + (_CHMC_SYS_ENTRY_HDR_LEN + sizeof(TITLE)) + // + 32; + + sysp = malloc(16384); + if (sysp) { + UInt32 val; + UInt16 code, len; + const char *entry_val; + + p = chmc_syscat_mem(sysp, &system->version, sizeof(system->version)); + + val = 0; + p = chmc_syscat_entry(SIEC_TIMESTAMP, p, &val, sizeof(val)); + p = chmc_syscat_entry(SIEC_COMPVER, p, + /*"HHA Version 4.74.8702"*/ + PACKAGE_STRING, + sizeof(PACKAGE_STRING) + /*strlen("HHA Version 4.74.8702")+1*/); + p = chmc_syscat_entry(SIEC_SYSINFO, p, + sysinfo, sizeof(struct chmcSystemInfo)); + + if (chm->config != NULL && chm->config->deftopic != NULL) + entry_val = chm->config->deftopic; + else + entry_val = "index.htm"; + p = chmc_syscat_entry(SIEC_DEFTOPIC, p, (void *)entry_val, + strlen(entry_val)+1); + + if (chm->config != NULL && chm->config->title != NULL) + entry_val = chm->config->title; + else + entry_val = "untitled"; + p = chmc_syscat_entry(SIEC_TITLE, p, (void *)entry_val, + strlen(entry_val)+1); + // p = chmc_syscat_entry(SIEC_DEFFONT, p, &val, sizeof(val)); + p = chmc_syscat_entry(SIEC_LCASEFILE, p, "siec_lcasefile", + strlen("siec_lcasefile")+1); + p = chmc_syscat_entry(SIEC_DEFWINDOW, p, + "MsdnHelp", strlen("MsdnHelp")+1); + + val = 0; + p = chmc_syscat_entry(SIEC_NUMOFINFOT, p, &val, sizeof(val)); + + p = chmc_syscat_entry(SIEC_IDXHDR, p, + idxhdr, sizeof(struct chmcIndexHeader)); + + + val = 0; + p = chmc_syscat_entry(SIEC_INFOCHKSUM, p, &val, sizeof(val)); + + system->_size = p - sysp; + chmc_add_meta(chm, "/#SYSTEM", 0, sysp, system->_size); + return CHMC_NOERR; + } + + chmcerr_set(CHMC_ENOMEM, "system done: malloc %d bytes", + system->_size); + + return CHMC_ENOMEM; +} + +int chmc_tree_done( struct chmcFile *chm ) +{ + struct chmcItsfHeader *itsf; + struct chmcSect0 *sect0; + struct chmcItspHeader *itsp; + struct chmcTreeNode *ctrl; + UInt32 str_index; + const char *val; + + assert(chm); + + itsf = &chm->itsf; + sect0 = &chm->sect0; + itsp = &chm->itsp; + + chmc_add_dir(chm, "/"); + + ctrl = chmc_add_meta(chm, "::DataSpace/Storage/MSCompressed/Transform/List", + 0, (UChar *)chmc_transform_list, + sizeof(chmc_transform_list)); + if (ctrl) + ctrl->flags |= CHMC_TNFL_STATIC; + + chmc_system_done(chm); + + if (chm->config != NULL && chm->config->deftopic != NULL) + val = chm->config->deftopic; + else + val = "index.htm"; + + str_index = chmc_strings_add(chm, val); + +#if 0 + // FIXME just a test + { + UChar *p; + int len; + struct chmcTopicEntry topicEntry; + // struct chmcUrlStrEntry urlStrEntry; + + p = malloc(4096); + if (p) { + memset(p, 0, 4096); + len = 0; + + topicEntry.tocidx_offset = 4096; + topicEntry.strings_offset = -1; + topicEntry.urltbl_offset = 0; + topicEntry.in_content = 6; + topicEntry.unknown = 0; + + memcpy(p, &topicEntry, sizeof(struct chmcTopicEntry)); + len += sizeof(struct chmcTopicEntry); + + chm->idxhdr.num_of_topic++; + + chmc_add_meta(chm, "/#TOPICS", 1, (UChar *)p, len); + } else + BUG_ON("FIXME: %s: %d\n", __FILE__, __LINE__); + } +#endif + + ctrl = chmc_add_meta(chm, "/#IDXHDR", 1, (void *)&chm->idxhdr, + sizeof(struct chmcIndexHeader)); + if (ctrl) + ctrl->flags |= CHMC_TNFL_STATIC; + + { + UInt32 *p; + p = malloc(8+196); + if (p) { + const char *val; + memset(p+2, 0, 196); + + p[0] = 1; + p[1] = 196; + + p[2+0] = 196; + // p[2+2] = 1; + // p[2+3] = 0x00000532; + // p[2+4] = 0x00062520; + + // p[2+8] = 86; + // p[2+9] = 51; + // p[2+10] = 872; + // p[2+11] = 558; + + // p[2+19] = 220; + + // p[2+27] = 0x00000041; + // p[2+28] = 14462; + + if (chm->config != NULL && chm->config->title != NULL) + val = chm->config->title; + else + val = "untitled"; + p[2+5] = chmc_strings_add(chm, val); + + if (chm->config != NULL && chm->config->hhc != NULL) + val = chm->config->hhc; + else + val = "toc.hhc"; + p[2+24] = chmc_strings_add(chm, val); + + if (chm->config != NULL && chm->config->hhk != NULL) + val = chm->config->hhc; + else + val = "toc.hhk"; + p[2+25] = chmc_strings_add(chm, val); + p[2+26] = str_index; + + chmc_add_meta(chm, "/#WINDOWS", 1, (UChar *)p, 8+196); + } else + BUG_ON("FIXME: %s: %d\n", __FILE__, __LINE__); + } + + ctrl = chmc_add_meta(chm, "/#STRINGS", 1, (void *)chm->strings, + chm->strings_len); + if (ctrl) + ctrl->flags |= CHMC_TNFL_STATIC; + +#if 0 + // FIXME just a test + { + UChar *p; + int len; + struct chmcUrlStrEntry urlStrEntry; + + urlStrEntry.url_offset = 0; + urlStrEntry.framename_offset = 0; + + p = malloc(4096); + if (p) { + memset(p, 0, 4096); + *p = 0x42; + len = 1; + + memcpy(p + len, &urlStrEntry, sizeof(struct chmcUrlStrEntry)); + len += sizeof(struct chmcUrlStrEntry); + len += sprintf(p + len, "index.htm" ) + 1; + + memcpy(p + len, &urlStrEntry, sizeof(struct chmcUrlStrEntry)); + len += sizeof(struct chmcUrlStrEntry); + len += sprintf(p + len, "test.htm" ) + 1; + + chmc_add_meta(chm, "/#URLSTR", 1, (UChar *)p, len); + } else + BUG_ON("FIXME: %s: %d\n", __FILE__, __LINE__); + } +#endif + + // chmc_add_entry(chm, "/#URLTBL", 0, 1, NULL, 0, 0); + // chmc_add_entry(chm, "/#TOPICS", 0, 1, NULL, 0, 0); + + // NOTE NOTE NOTE add any meta compressed before crunch ;-) + + chmc_crunch_lzx(chm, 1); + + chmc_control_data_done(chm); + chmc_reset_table_done(chm); + + chmc_add_empty(chm, "/#ITBITS"); + + // NOTE in this implementation compressed Content should be the last file + // added to section 0 + + chmc_add_meta(chm, "::DataSpace/Storage/MSCompressed/Content", 0, NULL, + chm->sections[1]->offset); + + chmc_entries_qsort(chm); + chmc_uncompressed_done(chm); + chmc_pmgl_done(chm); + + chmc_pmgi_done(chm); + + itsf->dir_len = _CHMC_ITSP_V1_LEN + + (_CHMC_CHUNK_LEN * itsp->num_blocks); + + itsf->data_offset = _CHMC_ITSF_V3_LEN + + _CHMC_SECT0_LEN + + _CHMC_ITSP_V1_LEN + + (_CHMC_CHUNK_LEN * itsp->num_blocks); + + sect0->file_len += _CHMC_CHUNK_LEN * itsp->num_blocks; + + chmc_write(chm); + + { + struct chmcSection *section; + struct list_head *pos; + UChar buf[4096]; + + list_for_each(pos, &chm->sections_list) { + section = list_entry(pos, struct chmcSection, list); + chmc_appendfile(chm, section->filename, buf, 4096); + } + } + + return CHMC_NOERR; +} + +int chmc_crunch_lzx(struct chmcFile *chm, int sect_id) +{ + struct chmcLzxInfo lzx_info; + + lzx_data *lzxd; + int subd_ok = 1; + int do_reset = 1; + int block_size; + lzx_results lzxr; + int wsize_code = 16; + + assert(chm); + + if ((wsize_code < 15) || (wsize_code > 21)) { + fprintf(stderr, "window size must be between 15 and 21 inclusive\n"); + return CHMC_EINVAL; + } + + lzx_info.chm = chm; + lzx_info.section = chm->sections[sect_id]; + lzx_info.done = 0; + lzx_info.todo = lzx_info.section->offset; + lzx_info.pos = chm->entries_list.next; + lzx_info.error = 0; + lzx_info.eof = 0; + + lzx_info.fd = -1; + lzx_info.fd_offset = 0; + + chmc_compressed_add_mark(lzx_info.chm, 0); + lzx_info.section->reset_table_header.block_count++; + + /* undocumented fact, according to Caie -- + block size cannot exceed window size. (why not?) */ + /* The block size must not be larger than the window size. + While the compressor will create apparently-valid LZX files + if this restriction is violated, some decompressors + will not handle them. */ + + block_size = 1 << wsize_code; + + // lzx_info.section->control_data.windowSize = wsize_code; + // lzx_info.section->control_data.windowsPerReset = block_size; + + lzx_init(&lzxd, wsize_code, + _lzx_get_bytes, &lzx_info, _lzx_at_eof, + _lzx_put_bytes, &lzx_info, + _lzx_mark_frame, &lzx_info); + + while(! _lzx_at_eof(&lzx_info)) { + if (do_reset) + lzx_reset(lzxd); + lzx_compress_block(lzxd, block_size, subd_ok); + } + lzx_finish(lzxd, &lzxr); + + return CHMC_NOERR; +} + +static int _lzx_at_eof(void *arg) +{ + struct chmcLzxInfo *lzx_info = (struct chmcLzxInfo *)arg; + + return lzx_info->error || lzx_info->done >= lzx_info->todo || lzx_info->eof; +} + +static int _lzx_put_bytes(void *arg, int n, void *buf) +{ + struct chmcLzxInfo *lzx_info = (struct chmcLzxInfo *)arg; + struct chmcSect0 *sect0 = &lzx_info->chm->sect0; + ssize_t wx; + + wx = write(lzx_info->section->fd, buf, n); + sect0->file_len += wx; + lzx_info->section->len += wx; + + return wx; +} + +static void _lzx_mark_frame(void *arg, uint32_t uncomp, uint32_t comp) +{ + struct chmcLzxInfo *lzx_info = (struct chmcLzxInfo *)arg; + struct chmcSection *section = lzx_info->chm->sections[1]; + + UInt64 compressed; + + chmc_dump( "Aligned data at %d(in compressed stream, %d) (%lu/%lu)\n", + uncomp, comp, lzx_info->done, lzx_info->todo ); + + compressed = comp; + + section->reset_table_header.block_count++; + + chmc_compressed_add_mark( lzx_info->chm, compressed ); + + section->reset_table_header.uncompressed_len = uncomp; + section->reset_table_header.compressed_len = comp; +} + +static int _lzx_get_bytes(void *arg, int n, void *buf) +{ + struct chmcLzxInfo *lzx_info = (struct chmcLzxInfo *)arg; + struct chmcFile *chm = lzx_info->chm; + struct chmcTreeNode *node; + + int todo; + int done; + int toread; + int rx; + + todo = n; + done = 0; + + // compression state machine + // lzx compressor ask for block input bytes + // need to keep current entry file and offset trought blocks + // until last entry + while (todo) { + // end of entris reached? + if (lzx_info->pos == &chm->entries_list) { + lzx_info->eof = 1; + break; + } + + node = list_entry( lzx_info->pos, struct chmcTreeNode, list ); + + // skip empty files and directories + if (node->len == 0 + || strcmp("MSCompressed", chm->sections[node->sect_id]->name)) { + lzx_info->pos = lzx_info->pos->next; + continue; + } + else + if (node->buf) { + // have len and buffer, it's mallocated not file + } + else + if (lzx_info->fd == -1) { + // open file if it isn't + lzx_info->fd = open(node->name, O_RDONLY); + if (lzx_info->fd < 0) { + chmc_error("%s: %d: error %d: '%s' %s\n", + __FILE__, __LINE__, + errno, node->name, strerror(errno)); + lzx_info->error = 1; + break; + } + } + + // read till the end of the file or till the lzx buffer is filled + toread = node->len - lzx_info->fd_offset; + if (toread > todo) + toread = todo; + + if (toread <= 0) + continue; + + // read input + if (node->buf) { + memcpy(buf + (n - todo), &node->buf[lzx_info->fd_offset], toread); + rx = toread; + } + else + { + rx = read(lzx_info->fd, buf + (n - todo), toread); + if (rx <= 0) { + chmc_error("read error\n"); + lzx_info->error = 2; + break; + } + } + + todo -= rx; + lzx_info->fd_offset += rx; + done += rx; + lzx_info->done += rx; + + // end of current file reached, goto next entry + if (lzx_info->fd_offset == node->len) { + if (lzx_info->fd > -1) + close(lzx_info->fd); + lzx_info->fd = -1; + lzx_info->fd_offset = 0; + lzx_info->pos = lzx_info->pos->next; + } + } + + return done; +} + +int chmc_compressed_add_mark(struct chmcFile *chm, UInt64 at) +{ + struct chmcSection *section; + struct chmcResetTableMark *mark; + + assert(chm); + + section = chm->sections[1]; + + mark = malloc(_CHMC_RSTTBL_MARK); + if (mark) { + mark->at = at; + chmc_dump("[%d] at: %jd\n", section->mark_count, at); + list_add_tail(&mark->list, §ion->mark_list); + section->mark_count++; + return CHMC_NOERR; + } + + return CHMC_ENOMEM; +} + +int chmc_control_data_done(struct chmcFile *chm) +{ + struct chmcTreeNode *ctrl; + + ctrl = chmc_add_meta(chm, "::DataSpace/Storage/MSCompressed/ControlData", + 0, (UChar *)&chm->sections[1]->control_data, + _CHMC_LZXC_V2_LEN); + + if (ctrl) { + ctrl->flags |= CHMC_TNFL_STATIC; + return CHMC_NOERR; + } + + return CHMC_ENOMEM; +} + +int chmc_reset_table_done(struct chmcFile *chm) +{ + struct chmcSection *section; + struct chmcLzxcResetTable *reset_table; + struct list_head *pos; + struct chmcResetTableMark *mark; + + UInt64 *at; + int i, len; + + section = chm->sections[1]; + + len = _CHMC_LZXC_RESETTABLE_V1_LEN + (section->mark_count * sizeof(UInt64)); + + reset_table = malloc(len); + + if (reset_table) { + memcpy(reset_table, §ion->reset_table_header, + _CHMC_LZXC_RESETTABLE_V1_LEN); + at = (void *)reset_table + _CHMC_LZXC_RESETTABLE_V1_LEN; + + i = 0; + list_for_each(pos, §ion->mark_list) { + mark = list_entry(pos, struct chmcResetTableMark, list); + at[i++] = mark->at; + } + + chmc_add_dir(chm, "::DataSpace/Storage/MSCompressed/Transform/" + "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/"); + chmc_add_meta(chm, "::DataSpace/Storage/MSCompressed/Transform/" + "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}" + "/InstanceData/ResetTable", + 0, (UChar *)reset_table, len); + + { // TODO FIXME do better + UInt64 *uncompressed_len = malloc(8); + if (uncompressed_len) { + *uncompressed_len = reset_table->uncompressed_len; + chmc_add_meta(chm, "::DataSpace/Storage/MSCompressed/SpanInfo", + 0, (UChar *)uncompressed_len, 8); + } + } + + return CHMC_NOERR; + } + + return CHMC_ENOMEM; +} + +void chmc_entries_qsort(struct chmcFile *chm) +{ + struct chmcTreeNode *node; + struct list_head *pos; + int i; + + assert(chm); + + chm->sort_entries = malloc(sizeof(struct chmcTreeNode *) + * chm->entries_num); + + i = 0; + list_for_each(pos, &chm->entries_list) { + node = list_entry(pos, struct chmcTreeNode, list); + chm->sort_entries[i++] = node; + } + + qsort(chm->sort_entries, chm->entries_num, sizeof(struct chmcTreeNode *), + (__compar_fn_t) _entry_cmp); +} + +static int _entry_cmp(struct chmcTreeNode **pa, struct chmcTreeNode **pb) +{ + struct chmcTreeNode *a = *pa, *b = *pb; + + return strcmp( &a->name[a->prefixlen], &b->name[b->prefixlen] ); +} + +int chmc_uncompressed_done(struct chmcFile *chm) +{ + struct chmcSect0 *sect0 = &chm->sect0; + struct chmcTreeNode *node; + struct list_head *pos; + ssize_t wx; + + list_for_each(pos, &chm->entries_list) { + node = list_entry( pos, struct chmcTreeNode, list ); + + if (strcmp( "MSCompressed", chm->sections[node->sect_id]->name ) == 0) + continue; + + if ((node->buf) && (node->len > 0)) { + wx = write(chm->sections[node->sect_id]->fd, node->buf, node->len); + sect0->file_len += wx; + } + } + + return CHMC_NOERR; +} + +void chmc_pmgl_done(struct chmcFile *chm) +{ + struct chmcTreeNode *entry; + int i; + + assert(chm); + + for(i=0; i < chm->entries_num; i++) { + entry = chm->sort_entries[i]; + chmc_pmgl_add_entry(chm, entry); + } +} + +int chmc_pmgl_add_entry(struct chmcFile *chm, struct chmcTreeNode *entry) +{ + struct chmcPmglChunkNode *pmgl; + struct chmcPmglChunk *chunk; + struct chmcSection *section; + struct chmcItspHeader *itsp = &chm->itsp; + + UChar *p; + UInt16 *idx; + int name_len; + int outlen; + int should_idx, idx_intlv; + int free; + + assert(chm); + assert(entry); + + // check section bound + section = chmc_section_lookup(chm, entry->sect_id); + if (!section) + chmcerr_set_return(CHMC_ENOMEM, "section %d lookup failed: ", + entry->sect_id); + + // check chunk space for new entry + name_len = strlen(&entry->name[entry->prefixlen]); + + outlen = chmc_encint_len(name_len); + outlen += name_len; + outlen += chmc_encint_len(entry->sect_id); + outlen += chmc_encint_len(entry->offset); + outlen += chmc_encint_len(entry->len); + + // look for current pmgl chunk, create if doesn't exist + if (!chm->pmgl_last) { + pmgl = chmc_pmgl_create(); + if (pmgl) + chmc_pmgl_add(chm, pmgl); + else + chmcerr_set_return(CHMC_ENOMEM, "pmgl chunk: "); + } + else + pmgl = chm->pmgl_last; + + do { + + chunk = &chm->pmgl_last->chunk; + + idx_intlv = 1 + ( 1 << itsp->blockidx_intvl ); + should_idx = ( ( chunk->entries_count > 0 ) + && ! ( ( chunk->entries_count + 1 ) % idx_intlv ) + ? 2 : 0 ); + + free = sizeof(chunk->data) - pmgl->data_len - pmgl->index_len + - should_idx; + + // current(last) chunk doesn't have enough room? force new one + if (outlen + should_idx > free) { + //chm->pmgl_last = NULL; + pmgl = chmc_pmgl_create(); + if ( pmgl ) + chmc_pmgl_add(chm, pmgl); + else + chmcerr_set_return(CHMC_ENOMEM, "pmgl chunk: "); + + continue; + } + + p = (void *)&chunk->data[pmgl->data_len]; + + if (should_idx) { + idx = (void *)&chunk->data[CHMC_PMGL_DATA_LEN] - pmgl->index_len; + *idx = (void *)p - (void *)&chunk->data; + } + + p += chmc_encint(name_len, p); + memcpy(p, &entry->name[entry->prefixlen], name_len); + p += name_len; + p += chmc_encint(entry->sect_id, p); + p += chmc_encint(entry->offset, p); + p += chmc_encint(entry->len, p); + + pmgl->data_len += outlen; + pmgl->index_len += should_idx; + + chunk->entries_count++; + chunk->header.free_space -= outlen; + break; + + } while (1); + + return CHMC_NOERR; +} + +struct chmcSection *chmc_section_lookup(struct chmcFile *chm, int id) +{ + struct chmcSection *current; + struct list_head *pos; + int i; + + assert(chm); + + i = 0; + list_for_each(pos, &chm->sections_list) { + current = list_entry(pos, struct chmcSection, list); + if (i == id) + return current; + i++; + } + + return NULL; +} + +struct chmcPmglChunkNode *chmc_pmgl_create(void) +{ + struct chmcPmglChunkNode *node; + + node = malloc(sizeof(struct chmcPmglChunkNode)); + if (node) + chmc_pmgl_init(node); + + return node; +} + +void chmc_pmgl_init(struct chmcPmglChunkNode *node) +{ + struct chmcPmglChunk *chunk; + + assert(node); + + node->data_len = 0; + node->index_len = 0; + + chunk = &node->chunk; + + memcpy(chunk->header.signature, "PMGL", 4); + + // FIXME check it is the right len + chunk->header.free_space = CHMC_PMGL_DATA_LEN + 2; + chunk->header.unknown_0008 = 0; + chunk->header.block_prev = -1; + chunk->header.block_next = -1; + + memset(chunk->data, 0, CHMC_PMGL_DATA_LEN); +} + +void chmc_pmgi_init(struct chmcPmgiChunkNode *node) +{ + struct chmcPmgiChunk *chunk; + + assert(node); + + node->data_len = 0; + node->index_len = 0; + + chunk = &node->chunk; + + memcpy(chunk->header.signature, "PMGI", 4); + + // FIXME check it is the right len + chunk->header.free_space = CHMC_PMGI_DATA_LEN + 2; + // chunk->header.unknown_0008 = 0; + // chunk->header.block_prev = -1; + // chunk->header.block_next = -1; + + memset(chunk->data, 0, CHMC_PMGI_DATA_LEN); +} + + + +struct chmcPmgiChunkNode *chmc_pmgi_create(void) +{ + struct chmcPmgiChunkNode *node; + + node = malloc(sizeof(struct chmcPmgiChunkNode)); + if (node) + chmc_pmgi_init(node); + + return node; +} + +void chmc_pmgl_destroy(struct chmcPmglChunkNode *node) +{ + assert(node); + free(node); +} + +void chmc_pmgi_destroy(struct chmcPmgiChunkNode *node) +{ + assert(node); + free(node); +} + +void chmc_pmgl_add(struct chmcFile *chm, struct chmcPmglChunkNode *pmgl) +{ + struct chmcItspHeader *itsp = &chm->itsp; + struct chmcPmglHeader *hdr; + + assert(chm); + assert(pmgl); + + list_add_tail(&pmgl->list, &chm->pmgl_list); + + itsp->index_last = itsp->num_blocks; + + hdr = &pmgl->chunk.header; + hdr->block_prev = itsp->num_blocks - 1; + + if (chm->pmgl_last) { + hdr = &chm->pmgl_last->chunk.header; + hdr->block_next = itsp->num_blocks; + } + + itsp->num_blocks++; + + chm->pmgl_last = pmgl; +} + +int chmc_pmgi_done(struct chmcFile *chm) +{ + struct chmcItspHeader *itsp = &chm->itsp; + struct chmcPmglChunkNode *pmgl; + struct list_head *pos; + + int i, j; + char name[256]; //FIXME use malloc + UInt32 name_len; + + assert(chm); + + // only one pml, omitted pmgi + if (itsp->num_blocks == 1) { + itsp->index_depth = 1; + itsp->index_root = -1; + itsp->index_last = 0; + return CHMC_NOERR; + } + + itsp->index_root = itsp->num_blocks; + + i = 0; + list_for_each(pos, &chm->pmgl_list) { + pmgl = list_entry(pos, struct chmcPmglChunkNode, list); + j = chmc_decint(&pmgl->chunk.data[0], &name_len); + if (name_len <= 255) { + memcpy(name, &pmgl->chunk.data[j], name_len); + name[name_len] = '\0'; + chmc_pmgi_add_entry(chm, name, i); + } + else + BUG_ON("name_len >= 255(%lu) %.*s\n", name_len, 255, + &pmgl->chunk.data[j]); + i++; + } + + return CHMC_NOERR; +} + +int chmc_pmgi_add_entry(struct chmcFile *chm, const char *name, int pmgl_id) +{ + struct chmcPmgiChunkNode *pmgi; + struct chmcPmgiChunk *chunk; + struct chmcItspHeader *itsp = &chm->itsp; + + UChar *p; + UInt16 *idx; + int name_len; + int outlen; + int should_idx, idx_intlv; + int free; + + assert(chm); + + // check chunk space for new entry + name_len = strlen(name); + + outlen = chmc_encint_len(name_len); + outlen += name_len; + outlen += chmc_encint_len(pmgl_id); + + // look for current pmgi chunk, create if doesn't exist + if (!chm->pmgi_last) { + pmgi = chmc_pmgi_create(); + if (pmgi) + chmc_pmgi_add(chm, pmgi); + else + chmcerr_set_return(CHMC_ENOMEM, "pmgi chunk: "); + } + else + pmgi = chm->pmgi_last; + + do { + + chunk = &chm->pmgi_last->chunk; + + idx_intlv = 1 + ( 1 << itsp->blockidx_intvl ); + should_idx = ( ( chunk->entries_count > 0 ) + && ! ( ( chunk->entries_count + 1 ) % idx_intlv ) + ? 2 : 0 ); + + free = sizeof(chunk->data) - pmgi->data_len - + pmgi->index_len - should_idx; + + // current(last) chunk doesn't have enough room? force new one + if (outlen + should_idx > free) { + pmgi = chmc_pmgi_create(); + if (pmgi) + chmc_pmgi_add(chm, pmgi); + else + chmcerr_set_return(CHMC_ENOMEM, "pmgi chunk: "); + + continue; + } + + p = (void *)&chunk->data[pmgi->data_len]; + + if (should_idx) { + idx = (void *)&chunk->data[CHMC_PMGI_DATA_LEN] - pmgi->index_len; + *idx = (void *)p - (void *)&chunk->data; + } + + p += chmc_encint(name_len, p); + memcpy(p, name, name_len); + p += name_len; + p += chmc_encint(pmgl_id, p); + + pmgi->data_len += outlen; + pmgi->index_len += should_idx; + + chunk->entries_count++; + chunk->header.free_space -= outlen; + break; + + } while (1); + + return CHMC_NOERR; +} + +void chmc_pmgi_add(struct chmcFile *chm, struct chmcPmgiChunkNode *pmgi) +{ + struct chmcItspHeader *itsp = &chm->itsp; + + assert(chm); + assert(pmgi); + + list_add_tail(&pmgi->list, &chm->pmgi_list); + itsp->num_blocks++; + + chm->pmgi_last = pmgi; +} + +int chmc_write(struct chmcFile *chm) +{ + struct chmcItsfHeader *itsf = &chm->itsf; + struct chmcSect0 *sect0 = &chm->sect0; + struct chmcItspHeader *itsp = &chm->itsp; + + struct chmcPmglChunkNode *pmgl; + struct chmcPmgiChunkNode *pmgi; + struct list_head *pos; + + assert(chm); + + write(chm->fd, itsf, _CHMC_ITSF_V3_LEN); + write(chm->fd, sect0, _CHMC_SECT0_LEN); + write(chm->fd, itsp, _CHMC_ITSP_V1_LEN); + + list_for_each(pos, &chm->pmgl_list) { + pmgl = list_entry(pos, struct chmcPmglChunkNode, list); + write(chm->fd, &pmgl->chunk, _CHMC_CHUNK_LEN); + } + + if (itsp->num_blocks > 1) { + list_for_each( pos, &chm->pmgi_list ) { + pmgi = list_entry(pos, struct chmcPmgiChunkNode, list); + write(chm->fd, &pmgi->chunk, _CHMC_CHUNK_LEN); + } + } + + return CHMC_NOERR; +} + +int chmc_appendfile(struct chmcFile *chm, const char *filename, void *buf, + size_t size ) +{ + struct stat statbuf; + int in; + off_t todo, toread; + ssize_t rx; + + if (stat(filename, &statbuf) < 0) + return errno; + + in = open(filename, O_RDONLY); + if (in >= 0) { + todo = statbuf.st_size; + + while (todo) { + toread = size; + if (toread > todo) + toread = todo; + + rx = read(in, buf, toread); + if (rx > 0) { + write(chm->fd, buf, rx); + todo -= rx; + } + } + + close(in); + } + else + BUG_ON("open %s\n", filename); + + return CHMC_NOERR; +} diff --git a/reactos/tools/hhpcomp/chmc/chmc.h b/reactos/tools/hhpcomp/chmc/chmc.h new file mode 100644 index 00000000000..23a0f47dc06 --- /dev/null +++ b/reactos/tools/hhpcomp/chmc/chmc.h @@ -0,0 +1,261 @@ +/* + + Copyright (C) 2010 Alex Andreotti + + This file is part of chmc. + + chmc is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + chmc is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with chmc. If not, see . + +*/ +#ifndef CHMC_CHMC_H +#define CHMC_CHMC_H + +#include +#include + +#include "chm.h" +#include "list.h" + +#define CHMC_DIR_UUID \ + "\x10\xfd\x01\x7c\xaa\x7b\xd0\x11\x9e\x0c\x00\xa0\xc9\x22\xe6\xec" +#define CHMC_STREAM_UUID \ + "\x11\xfd\x01\x7c\xaa\x7b\xd0\x11\x9e\x0c\x00\xa0\xc9\x22\xe6\xec" +#define CHMC_SYSTEM_UUID \ + "\x6a\x92\x02\x5d\x2e\x21\xd0\x11\x9d\xf9\x00\xa0\xc9\x22\xe6\xec" + +struct chmcIndexHeader { + char signature[4]; + Int32 unknown_4; + Int32 unknown_8; + Int32 num_of_topic; + Int32 unknown_10; + Int32 off_img_list; + Int32 unknown_18; + Int32 img_type_folder; + Int32 background; + Int32 foreground; + Int32 off_font; + Int32 win_style; + Int32 ex_win_style; + Int32 unknown_34; + Int32 off_frame_name; + Int32 off_win_name; + Int32 num_of_info; + Int32 unknown_44; + Int32 num_of_merge_files; + Int32 unknown_4c; + Int32 merge_files_offs[1004]; +}; + +/* Sys Info Entry codes */ +#define SIEC_DEFTOPIC 2 +#define SIEC_TITLE 3 +#define SIEC_LCASEFILE 6 +#define SIEC_DEFWINDOW 5 + +/* present in files with Binary Index turned on. (eg: af 08 63 ac) + The entry in the #URLTBL file that points to the sitemap index had + the same first DWORD */ +#define SIEC_HAVE_BINDX 7 +#define SIEC_NUMOFINFOT 12 + +/* The #IDXHDR file contains exactly the same bytes (len 4096) */ +#define SIEC_IDXHDR 13 + +#define SIEC_INFOCHKSUM 15 +#define SIEC_DEFFONT 16 + +#define SIEC_TIMESTAMP 10 +#define SIEC_COMPVER 9 +#define SIEC_SYSINFO 4 + +/* NOTE use only as pointer */ +#define _CHMC_SYS_ENTRY_HDR_LEN (sizeof(UInt16)*2) +struct chmcSystemEntry { + UInt16 code; /* FIXME check unsigned */ + UInt16 len; /* FIXME check unsigned */ + UChar data[65535]; +}; + +/* NOTE use only as pointer */ +#define _CHMC_SYS_ENTRY_NODE_HDR_LEN \ + (sizeof(struct chmcSystemEntryNode *)+_CHMC_SYS_ENTRY_HDR_LEN) + +struct chmcSystemEntryNode { + struct chmcSystemEntryNode *next; + struct chmcSystemEntry entry; +}; + +/* HHA Version 4.72.7294 and earlier */ +#define _CHMC_SYS_INFO_V4_72_7294_LEN (28) +/* HHA Version 4.72.8086 and later */ +#define _CHMC_SYS_INFO_V4_72_8086_LEN (36) +struct chmcSystemInfo { + UInt32 lcid; + UInt32 dbcs; + UInt32 full_search; + UInt32 klinks; + UInt32 alinks; + UInt64 timestamp; + UInt32 unknown_1c; // >= 8086 only + UInt32 unknown_20; // >= 8086 only +}; + + +/* /usr/include/freetype2/freetype/ttnameid.h maybe useful */ +#define CHMC_MS_LCID_EN_US (0x0409) + +#define _CHMC_SYSTEM_HDR_LEN (sizeof(Int32)+sizeof(struct chmcSystemInfo)) +struct chmcSystem { + Int32 version; + struct chmcSystemInfo info; + +/* private: */ + struct chmcSystemEntryNode *_entries; + UInt32 _size; /* keep track for alloc before save */ +}; + +#define _CHMC_CHUNK_LEN (4096) +#define CHMC_PMGL_DATA_LEN (_CHMC_CHUNK_LEN - _CHMC_PMGL_LEN - 2) + +struct chmcPmglChunk { + struct chmcPmglHeader header; + UChar data[CHMC_PMGL_DATA_LEN]; + UInt16 entries_count; +}; + +struct chmcPmglChunkNode { + struct list_head list; + int data_len; + int index_len; + struct chmcPmglChunk chunk; +}; + +#define CHMC_PMGI_DATA_LEN (_CHMC_CHUNK_LEN - _CHMC_PMGI_LEN - 2) + +struct chmcPmgiChunk { + struct chmcPmgiHeader header; + UChar data[CHMC_PMGI_DATA_LEN]; + UInt16 entries_count; +}; + +struct chmcPmgiChunkNode { + struct list_head list; + int data_len; + int index_len; + struct chmcPmgiChunk chunk; +}; + +#define CHMC_TNFL_STATIC (1 << 0) /* don't free() */ + +struct chmcTreeNode { + struct list_head list; + UInt32 flags; + UInt32 sect_id; + char *name; + UInt16 prefixlen; + UChar *buf; + UInt64 offset; + UInt64 len; +}; + +struct chmcStringChunk { + struct list_head list; + UInt16 used; + UChar data[4096]; +}; + +struct chmcConfig { + const char *title; + const char *tmpdir; + const char *hhc; + const char *hhk; + const char *deftopic; + UInt16 language; +}; + +struct chmcFile { + int fd; + struct chmcItsfHeader itsf; + struct chmcSect0 sect0; + struct chmcItspHeader itsp; + int sections_num; + struct list_head sections_list; + struct chmcSection **sections; + struct list_head pmgl_list; + struct chmcPmglChunkNode *pmgl_last; + struct list_head entries_list; + int entries_num; + struct chmcTreeNode **sort_entries; + struct list_head pmgi_list; + struct chmcPmgiChunkNode *pmgi_last; + struct chmcSystem system; + struct chmcIndexHeader idxhdr; + UChar *strings; + UInt32 strings_offset; + UInt32 strings_len; + struct chmcConfig *config; +}; + +#define CHMC_SECTNAME_MAXLEN (64) + +struct chmcSection { + struct list_head list; + char name[CHMC_SECTNAME_MAXLEN]; + UInt64 offset; + UInt64 len; + char filename[PATH_MAX]; + int fd; + struct chmcLzxcResetTable reset_table_header; + struct chmcLzxcControlData control_data; + struct list_head mark_list; + int mark_count; +}; + +#define _CHMC_RSTTBL_MARK (sizeof(struct chmcResetTableMark)) + +struct chmcResetTableMark { + UInt64 at; + struct list_head list; +}; + +struct chmcUrlStrEntry { + UInt32 url_offset; + UInt32 framename_offset; +}; + +struct chmcUtlTblEntry { + UInt32 unknown; + UInt32 topic_index; + UInt32 urlstr_offset; +}; + +struct chmcTopicEntry { + UInt32 tocidx_offset; + UInt32 strings_offset; + UInt32 urltbl_offset; + short in_content; + short unknown; +}; + + +int chmc_init(struct chmcFile *chm, const char *filename, + struct chmcConfig *config); +void chmc_sections_done(struct chmcFile *chm); +void chmc_term(struct chmcFile *chm); +int chmc_tree_done(struct chmcFile *chm); + +#define chmc_dump(fmt, args...) fprintf(stderr, fmt , ##args) + +#endif /* CHMC_CHMC_H */ diff --git a/reactos/tools/hhpcomp/chmc/encint.h b/reactos/tools/hhpcomp/chmc/encint.h new file mode 100644 index 00000000000..2c2f50f8d15 --- /dev/null +++ b/reactos/tools/hhpcomp/chmc/encint.h @@ -0,0 +1,96 @@ +/* + + Copyright (C) 2010 Alex Andreotti + + This file is part of chmc. + + chmc is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + chmc is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with chmc. If not, see . + +*/ +#ifndef CHMC_ENCINT_H +#define CHMC_ENCINT_H + +// 0x7f 127 +// 0x3fff 16383 +// 0x1fffff 2097151 +// 0xfffffff 268435455 +static inline int chmc_encint_len ( const UInt32 val ) { + int len; + + // FIXME should support 64 bit? + if ( val > 0xfffffffUL ) + len = 0; // overflow + else if ( val > 0x1fffffUL ) + len = 4; + else if ( val > 0x3fffUL ) + len = 3; + else if ( val > 0x7fUL ) + len = 2; + else + len = 1; + + return len; +} + +static inline int chmc_encint ( const UInt32 val, UChar *out ) { + int len; + UInt32 a; + UChar *p, *l; + + // FIXME should support 64 bit? + if ( ! out || val > 0xfffffffUL ) + return 0; // FIXME can't handle, overflow + + if ( val > 0x1fffffUL ) + len = 4; + else if ( val > 0x3fffUL ) + len = 3; + else if ( val > 0x7fUL ) + len = 2; + else + len = 1; + + a = val; + l = p = out + (len - 1); + + while ( p >= out ) { + *p = (a & 0x7fUL); + if ( p < l ) + *p |= 0x80UL; + p--; + a >>= 7; + } + + return len; +} + +static inline int chmc_decint ( const UChar *in, UInt32 *value ) { + int len; + + len = 0; + *value = 0; + + while ( (in[len] & 0x80) && (len < 3) ) { + *value <<= 7; + *value |= in[len] & 0x7f; + len++; + } + *value <<= 7; + *value |= in[len] & 0x7f; + len++; + + return len; +} + +#endif /* CHMC_ENCINT_H */ diff --git a/reactos/tools/hhpcomp/chmc/err.c b/reactos/tools/hhpcomp/chmc/err.c new file mode 100644 index 00000000000..485b81fb466 --- /dev/null +++ b/reactos/tools/hhpcomp/chmc/err.c @@ -0,0 +1,66 @@ +/* + + Copyright (C) 2010 Alex Andreotti + + This file is part of chmc. + + chmc is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + chmc is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with chmc. If not, see . + +*/ +#include "err.h" + +#include +#include + +struct chmcErr +{ + int code; + char msg[CHMC_ERRMAXLEN+1]; +}; + +static struct chmcErr chmc_err = { + .code = CHMC_NOERR, + .msg[0] = '\0', +}; + +void chmcerr_clean(void) { + chmc_err.code = CHMC_NOERR; + chmc_err.msg[0] = '\0'; +} + +int chmcerr_code(void) { + return chmc_err.code; +} + +const char *chmcerr_message( void ) { + return chmc_err.msg; +} + +void chmcerr_set(int code, const char *fmt, ...) +{ + int len; + va_list ap; + + chmc_err.code = code; + + va_start(ap, fmt); + + len = vsnprintf(chmc_err.msg, CHMC_ERRMAXLEN, fmt, ap); + if (len == CHMC_ERRMAXLEN) + chmc_err.msg[CHMC_ERRMAXLEN] = '\0'; + + assert(len <= CHMC_ERRMAXLEN); + + va_end(ap); +} diff --git a/reactos/tools/hhpcomp/chmc/err.h b/reactos/tools/hhpcomp/chmc/err.h new file mode 100644 index 00000000000..b1c780a45a0 --- /dev/null +++ b/reactos/tools/hhpcomp/chmc/err.h @@ -0,0 +1,73 @@ +/* + + Copyright (C) 2010 Alex Andreotti + + This file is part of chmc. + + chmc is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + chmc is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with chmc. If not, see . + +*/ +#ifndef CHMC_ERR_H +#define CHMC_ERR_H + +#include +#define chmcerr_printf(fmt,args...) fprintf (stderr, fmt , ##args) + +#include +#define BUG_ON(fmt, args...) \ + do { \ + fprintf (stderr, "%s:%d: ", __FILE__, __LINE__); \ + fprintf (stderr, fmt , ##args); \ + abort (); \ + } while (0) + +#define CHMC_ERRMAXLEN (1023) + +#include + +#define CHMC_NOERR (0) +#define CHMC_ENOMEM (ENOMEM) +#define CHMC_EINVAL (EINVAL) + +void chmcerr_set(int code, const char *fmt, ...); +void chmcerr_clean(void); +int chmcerr_code(void); +const char *chmcerr_message(void); + +#define chmc_error(fmt, args...) fprintf (stdout, fmt , ##args) + +#define chmcerr_return_msg(fmt,args...) \ + do { \ + chmcerr_printf ( "%s: %d: ", __FILE__, __LINE__ ); \ + chmcerr_printf ( "error %d: ", chmcerr_code () ); \ + chmcerr_printf ( fmt , ##args ); \ + chmcerr_printf ( ": %s\n", chmcerr_message () ); \ + return chmcerr_code (); \ + } while (0) + +#define chmcerr_msg(fmt,args...) \ + do { \ + chmcerr_printf ("%s: %d: ", __FILE__, __LINE__); \ + chmcerr_printf ("error %d: ", chmcerr_code ()); \ + chmcerr_printf (fmt , ##args ); \ + chmcerr_printf (": %s\n", chmcerr_message ()); \ + } while (0) + +#define chmcerr_set_return(code,fmt,args...) \ + do { \ + chmcerr_set ( (code), (fmt), ##args ); \ + return (code); \ + } while (0) + +#endif /* CHMC_ERR_H */ diff --git a/reactos/tools/hhpcomp/chmc/list.h b/reactos/tools/hhpcomp/chmc/list.h new file mode 100644 index 00000000000..1f0dff82010 --- /dev/null +++ b/reactos/tools/hhpcomp/chmc/list.h @@ -0,0 +1,244 @@ +#ifndef __LIST_H +#define __LIST_H + +/* This file is from Linux Kernel (include/linux/list.h) + * and modified by simply removing hardware prefetching of list items. + * Here by copyright, credits attributed to wherever they belong. + * Kulesh Shanmugasundaram (kulesh [squiggly] isis.poly.edu) + */ + +/* + * Simple doubly linked list implementation. + * + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + +struct list_head { + struct list_head *next, *prev; +}; + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +#define INIT_LIST_HEAD(ptr) do { \ + (ptr)->next = (ptr); (ptr)->prev = (ptr); \ +} while (0) + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_add(struct list_head *New, + struct list_head *prev, + struct list_head *next) +{ + next->prev = New; + New->next = next; + New->prev = prev; + prev->next = New; +} + +/** + * list_add - add a new entry + * @New: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static inline void list_add(struct list_head *New, struct list_head *head) +{ + __list_add(New, head, head->next); +} + +/** + * list_add_tail - add a new entry + * @New: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static inline void list_add_tail(struct list_head *New, struct list_head *head) +{ + __list_add(New, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_del(struct list_head *prev, struct list_head *next) +{ + next->prev = prev; + prev->next = next; +} + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty on entry does not return true after this, the entry is in an undefined state. + */ +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = (struct list_head *) 0; + entry->prev = (struct list_head *) 0; +} + +/** + * list_del_init - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + */ +static inline void list_del_init(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + INIT_LIST_HEAD(entry); +} + +/** + * list_move - delete from one list and add as another's head + * @list: the entry to move + * @head: the head that will precede our entry + */ +static inline void list_move(struct list_head *list, struct list_head *head) +{ + __list_del(list->prev, list->next); + list_add(list, head); +} + +/** + * list_move_tail - delete from one list and add as another's tail + * @list: the entry to move + * @head: the head that will follow our entry + */ +static inline void list_move_tail(struct list_head *list, + struct list_head *head) +{ + __list_del(list->prev, list->next); + list_add_tail(list, head); +} + +/** + * list_empty - tests whether a list is empty + * @head: the list to test. + */ +static inline int list_empty(struct list_head *head) +{ + return head->next == head; +} + +static inline void __list_splice(struct list_head *list, + struct list_head *head) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + struct list_head *at = head->next; + + first->prev = head; + head->next = first; + + last->next = at; + at->prev = last; +} + +/** + * list_splice - join two lists + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static inline void list_splice(struct list_head *list, struct list_head *head) +{ + if (!list_empty(list)) + __list_splice(list, head); +} + +/** + * list_splice_init - join two lists and reinitialise the emptied list. + * @list: the new list to add. + * @head: the place to add it in the first list. + * + * The list at @list is reinitialised + */ +static inline void list_splice_init(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) { + __list_splice(list, head); + INIT_LIST_HEAD(list); + } +} + +/** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_struct within the struct. + */ +#define list_entry(ptr, type, member) \ + ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) + +/** + * list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); \ + pos = pos->next) +/** + * list_for_each_prev - iterate over a list backwards + * @pos: the &struct list_head to use as a loop counter. + * @head: the head for your list. + */ +#define list_for_each_prev(pos, head) \ + for (pos = (head)->prev; pos != (head); \ + pos = pos->prev) + +/** + * list_for_each_safe - iterate over a list safe against removal of list entry + * @pos: the &struct list_head to use as a loop counter. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +/** + * list_for_each_entry - iterate over list of given type + * @pos: the type * to use as a loop counter. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry(pos, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry(pos->member.next, typeof(*pos), member)) + +/** + * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_entry((head)->next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) + + +#endif diff --git a/reactos/tools/hhpcomp/hhp_reader.cpp b/reactos/tools/hhpcomp/hhp_reader.cpp new file mode 100644 index 00000000000..55344ad4737 --- /dev/null +++ b/reactos/tools/hhpcomp/hhp_reader.cpp @@ -0,0 +1,262 @@ + +// This file is part of hhpcomp, a free HTML Help Project (*.hhp) compiler. +// Copyright (C) 2015 Benedikt Freisen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +#include +#include +#include + +#include + +#include "hhp_reader.h" +#include "utils.h" + +using namespace std; + +string hhp_section::get_name() +{ + return name; +} + +void hhp_section::set_name(string name) +{ + this->name = name; +} + +hhp_pair::hhp_pair(string key, bool has_default_value, string default_value) +{ + this->key = key; + this->has_default_value = has_default_value; + this->default_value = default_value; + value_has_been_set = false; +} + +void hhp_pair::set_value(string value) +{ + this->value = value; + value_has_been_set = true; +} + +string hhp_pair::get_value() +{ + if (value_has_been_set) + return value; + else + { + if (has_default_value) + return default_value; + else + throw domain_error("pair '" + key + "' does not have a default value"); + } +} + +string hhp_pair::get_key() +{ + return key; +} + +void hhp_key_value_section::process_line(string line) +{ + int pos_equals_sign = line.find_first_of('='); + if (pos_equals_sign == string::npos) + throw runtime_error("key-value pair does not contain an equals sign"); + string key = to_upper(line.substr(0, pos_equals_sign)); + string value = line.substr(pos_equals_sign + 1); + if (key.length() == 0) + throw runtime_error("key has length zero"); + + entries.find(key)->second->set_value(value); +} + +void hhp_key_value_section::add_entry(hhp_pair* entry) +{ + string upper_case_key = to_upper(entry->get_key()); + if (entries.count(upper_case_key) != 0) + throw logic_error("trying to redundantly add key '" + upper_case_key + "'"); + entries.insert(pair(upper_case_key, entry)); +} + +hhp_options_section::hhp_options_section() +{ + set_name("OPTIONS"); + + add_entry(binary_TOC = new hhp_pair("Binary TOC", true, "No")); + add_entry(binary_index = new hhp_pair("Binary Index", true, "Yes")); + add_entry(compiled_file = new hhp_pair("Compiled File", false)); + add_entry(contents_file = new hhp_pair("Contents File", true, "")); + add_entry(index_file = new hhp_pair("Index File", true, "")); + add_entry(autoindex = new hhp_pair("AutoIndex", true, "No")); + add_entry(defaultwindow = new hhp_pair("DefaultWindow", true, ""));//? + add_entry(default_topic = new hhp_pair("Default Topic", true, "Index.htm"));//? + add_entry(defaultfont = new hhp_pair("DefaultFont", true, "")); + add_entry(language = new hhp_pair("Language", true, "0x409 English (US)"));//? + add_entry(title = new hhp_pair("Title", true, ""));//? + add_entry(createchifile = new hhp_pair("CreateCHIFile", true, "No")); + add_entry(compatibility = new hhp_pair("Compatibility", true, "1.1")); + add_entry(errorlogfile = new hhp_pair("ErrorLogFile", true, "Compiler.log"));//? + add_entry(full_text_search = new hhp_pair("Full-text search", true, "Yes"));//? + add_entry(display_compile_progress = new hhp_pair("Display compile progress", true, "Yes"));//? + add_entry(display_compile_note = new hhp_pair("Display compile note", true, "Yes"));//? + add_entry(flat = new hhp_pair("Flat", true, "No")); + add_entry(full_text_search_stop_list_file = new hhp_pair("Full text search stop list file", true, "")); +} + +hhp_options_section::~hhp_options_section() +{ + delete binary_TOC; + delete binary_index; + delete compiled_file; + delete contents_file; + delete index_file; + delete autoindex; + delete defaultwindow; + delete default_topic; + delete defaultfont; + delete language; + delete title; + delete createchifile; + delete compatibility; + delete errorlogfile; + delete full_text_search; + delete display_compile_progress; + delete display_compile_note; + delete flat; + delete full_text_search_stop_list_file; +} + +hhp_files_section::hhp_files_section() +{ + set_name("FILES"); +} + +void hhp_files_section::process_line(string line) +{ + filenames.push_back(line); +} + +hhp_reader::hhp_reader(string filename) +{ + this->filename = filename; + + options = new hhp_options_section(); + add_section(options); + files = new hhp_files_section(); + add_section(files); + + read(); + compute_unique_file_pathes_set(); +} + +hhp_reader::~hhp_reader() +{ + delete options; + delete files; +} + +void hhp_reader::add_section(hhp_section* section) +{ + string upper_case_name = to_upper(section->get_name()); + if (sections.count(upper_case_name) != 0) + throw logic_error("trying to redundantly add section '" + upper_case_name + "'"); + sections.insert(pair(upper_case_name, section)); +} + +void hhp_reader::read() +{ + ifstream hhp_file; + hhp_file.open(filename.c_str()); + + string line; + int line_number = 0; + hhp_section* section = NULL; + while (hhp_file.good()) + { + getline(hhp_file, line); + line_number++; + if (line[line.length() - 1] == '\015') // delete CR character if present + line = line.substr(0, line.length() - 1); + if (line[0] == '[' && line[line.length() - 1] == ']') + { + string name = to_upper(line.substr(1, line.length() - 2)); + if (sections.count(name)) + { + section = sections.find(name)->second; + clog << section->get_name() << endl; + } + else + { + clog << "unknown section: " << name << endl; + } + } + else if (line[0] != ';' && !line.empty()) + { + if (section) + section->process_line(line); + } + } + + hhp_file.close(); +} + +void hhp_reader::compute_unique_file_pathes_set() +{ + for (list::iterator it = files->filenames.begin(); it != files->filenames.end(); ++it) + { + unique_file_pathes.insert(replace_backslashes(realpath(it->c_str()))); + } +} + +string hhp_reader::get_title_string() +{ + return options->title->get_value(); +} + +string hhp_reader::get_contents_file_string() +{ + return options->contents_file->get_value(); +} + +string hhp_reader::get_index_file_string() +{ + return options->index_file->get_value(); +} + +string hhp_reader::get_default_topic_string() +{ + return options->default_topic->get_value(); +} + +unsigned int hhp_reader::get_language_code() +{ + return strtoul(options->language->get_value().c_str(), NULL, 0); +} + +string hhp_reader::get_compiled_file_string() +{ + return options->compiled_file->get_value(); +} + +set::iterator hhp_reader::get_file_pathes_iterator_begin() +{ + return unique_file_pathes.begin(); +} + +set::iterator hhp_reader::get_file_pathes_iterator_end() +{ + return unique_file_pathes.end(); +} diff --git a/reactos/tools/hhpcomp/hhp_reader.h b/reactos/tools/hhpcomp/hhp_reader.h new file mode 100644 index 00000000000..9e307b69f1f --- /dev/null +++ b/reactos/tools/hhpcomp/hhp_reader.h @@ -0,0 +1,136 @@ + +// This file is part of hhpcomp, a free HTML Help Project (*.hhp) compiler. +// Copyright (C) 2015 Benedikt Freisen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +#include +#include +#include +#include + +using namespace std; // using 'using' here for convenience + +class hhp_reader; // forward declaration + +class hhp_section +{ +private: + string name; + +public: + virtual void process_line(string line) = 0; + string get_name(); + void set_name(string name); +}; + +class hhp_pair +{ +private: + string key; + bool value_has_been_set; + string value; + bool has_default_value; + string default_value; + +public: + hhp_pair(string key, bool has_default_value = false, string default_value = ""); + void set_value(string value); + string get_value(); + string get_key(); +}; + +class hhp_key_value_section : public hhp_section +{ +protected: + map entries; + + void add_entry(hhp_pair* entry); + +public: + virtual void process_line(string line); +}; + +class hhp_options_section : public hhp_key_value_section +{ + friend hhp_reader; + +private: + hhp_pair* binary_TOC; + hhp_pair* binary_index; + hhp_pair* compiled_file; + hhp_pair* contents_file; + hhp_pair* index_file; + hhp_pair* autoindex; + hhp_pair* defaultwindow; + hhp_pair* default_topic; + hhp_pair* defaultfont; + hhp_pair* language; + hhp_pair* title; + hhp_pair* createchifile; + hhp_pair* compatibility; + hhp_pair* errorlogfile; + hhp_pair* full_text_search; + hhp_pair* display_compile_progress; + hhp_pair* display_compile_note; + hhp_pair* flat; + hhp_pair* full_text_search_stop_list_file; + +public: + hhp_options_section(); + ~hhp_options_section(); +}; + +class hhp_files_section : public hhp_section +{ + friend hhp_reader; + +private: + list filenames; + +public: + hhp_files_section(); + virtual void process_line(string line); +}; + +class hhp_reader +{ +private: + string filename; + map sections; + hhp_options_section* options; + hhp_files_section* files; + set unique_file_pathes; + + void add_section(hhp_section* section); + void read(); + void compute_unique_file_pathes_set(); + +public: + hhp_reader(string filename); + ~hhp_reader(); + + string get_title_string(); + string get_contents_file_string(); + string get_index_file_string(); + string get_default_topic_string(); + unsigned int get_language_code(); + string get_compiled_file_string(); + + set::iterator get_file_pathes_iterator_begin(); + set::iterator get_file_pathes_iterator_end(); +}; + diff --git a/reactos/tools/hhpcomp/hhpcomp.cpp b/reactos/tools/hhpcomp/hhpcomp.cpp new file mode 100644 index 00000000000..531c8fee726 --- /dev/null +++ b/reactos/tools/hhpcomp/hhpcomp.cpp @@ -0,0 +1,90 @@ + +// This file is part of hhpcomp, a free HTML Help Project (*.hhp) compiler. +// Copyright (C) 2015 Benedikt Freisen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +#include +#include +#include +#include + +#include +#include + +#include "hhp_reader.h" +#include "utils.h" + +extern "C" { +#include "chmc/chmc.h" +#include "chmc/err.h" +} + +extern "C" struct chmcTreeNode *chmc_add_file(struct chmcFile *chm, const char *filename, + UInt16 prefixlen, int sect_id, UChar *buf, + UInt64 len); + + +using namespace std; + +int main(int argc, char** argv) +{ + if (argc != 2) + { + cerr << "Usage: hhpcomp " << endl; + exit(0); + } + + string absolute_name = replace_backslashes(realpath(argv[1])); + int prefixlen = absolute_name.find_last_of('/'); + clog << prefixlen << endl; + chdir(absolute_name.substr(0, prefixlen).c_str()); // change to the project file's directory + hhp_reader project_file(absolute_name); + + struct chmcFile chm; + struct chmcConfig chm_config; + + chm_config.title = project_file.get_title_string().c_str(); + chm_config.hhc = project_file.get_contents_file_string().c_str(); + chm_config.hhk = project_file.get_index_file_string().c_str(); + chm_config.deftopic = project_file.get_default_topic_string().c_str(); + chm_config.language = project_file.get_language_code(); + + int err; + err = chmc_init(&chm, replace_backslashes(project_file.get_compiled_file_string()).c_str(), &chm_config); + if (err) + { + cerr << "could not initialize chmc" << endl; + exit(EXIT_FAILURE); + } + + for (set::iterator it = project_file.get_file_pathes_iterator_begin(); + it != project_file.get_file_pathes_iterator_end(); ++it) + { + clog << "File: " << *it << endl; + struct stat buf; + stat(it->c_str(), &buf); + if ((chmc_add_file(&chm, it->c_str(), prefixlen, 1, NULL, buf.st_size)) ? chmcerr_code() : CHMC_NOERR) + { + cerr << "could not add file: " << *it << endl; + exit(EXIT_FAILURE); + } + } + + chmc_tree_done(&chm); + chmc_term(&chm); + +} diff --git a/reactos/tools/hhpcomp/lzx_compress/ChangeLog b/reactos/tools/hhpcomp/lzx_compress/ChangeLog new file mode 100644 index 00000000000..1341ef47465 --- /dev/null +++ b/reactos/tools/hhpcomp/lzx_compress/ChangeLog @@ -0,0 +1,3 @@ +2002-06-17 Matthew T. Russotto + Switched to non-sliding version of Lempel-Ziv for + major performance boost diff --git a/reactos/tools/hhpcomp/lzx_compress/lz_nonslide.c b/reactos/tools/hhpcomp/lzx_compress/lz_nonslide.c new file mode 100644 index 00000000000..702fd2c5c81 --- /dev/null +++ b/reactos/tools/hhpcomp/lzx_compress/lz_nonslide.c @@ -0,0 +1,388 @@ +/* + File lz_nonslide.c, part of lzxcomp library + Copyright (C) 2002 Matthew T. Russotto + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; version 2.1 only + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ + +/* + * Document here + */ +#include +#include +#include +#include +#ifdef DEBUG_PERF +#include +#include +#endif +#include "lz_nonslide.h" + +#define MAX_MATCH 253 +#define MIN_MATCH 2 + +void lz_init(lz_info *lzi, int wsize, int max_dist, + int max_match, int min_match, + int frame_size, + get_chars_t get_chars, + output_match_t output_match, + output_literal_t output_literal, void *user_data) +{ + /* the reason for the separate max_dist value is LZX can't reach the + first three characters in its nominal window. But using a smaller + window results in inefficiency when dealing with reset intervals + which are the length of the nominal window */ + + lzi->wsize = wsize; + if (max_match > wsize) + lzi->max_match = wsize; + else + lzi->max_match = max_match; + + lzi->min_match = min_match; + if (lzi->min_match < 3) lzi->min_match = 3; + + lzi->max_dist = max_dist; + lzi->block_buf_size = wsize + lzi->max_dist; + lzi->block_buf = malloc(lzi->block_buf_size); + lzi->block_bufe = lzi->block_buf + lzi->block_buf_size; + assert(lzi->block_buf != NULL); + + lzi->cur_loc = 0; + lzi->block_loc = 0; + lzi->chars_in_buf = 0; + lzi->eofcount = 0; + lzi->get_chars = get_chars; + lzi->output_match = output_match; + lzi->output_literal = output_literal; + lzi->user_data = user_data; + lzi->frame_size = frame_size; + lzi->lentab = calloc(sizeof(int), lzi->block_buf_size); + lzi->prevtab = calloc(sizeof(u_char *), lzi->block_buf_size); + lzi->analysis_valid = 0; +} + +void lz_release(lz_info *lzi) +{ + free(lzi->block_buf); + free(lzi->lentab); + free(lzi->prevtab); +} + +void lz_reset(lz_info *lzi) +{ + int residual = lzi->chars_in_buf - lzi->block_loc; + memmove(lzi->block_buf, lzi->block_buf + lzi->block_loc, residual); + lzi->chars_in_buf = residual; + lzi->block_loc = 0; + lzi->analysis_valid = 0; +} + +#ifdef LZNONSLIDE_MAIN +typedef struct lz_user_data +{ + FILE *infile; + FILE *outfile; + int R0, R1, R2; +} lz_user_data; + +int tmp_get_chars(lz_info *lzi, int n, u_char *buf) +{ + lz_user_data *lzud = (lz_user_data *)lzi->user_data; + return fread(buf, 1, n, lzud->infile); +} + +int tmp_output_match(lz_info *lzi, int match_pos, int match_len) +{ + lz_user_data *lzud = (lz_user_data *)lzi->user_data; + int mod_match_loc; + + mod_match_loc = match_pos; + + fprintf(lzud->outfile, "(%d, %d)(%d)\n", match_pos, match_len, mod_match_loc); + return 0; +} + +void tmp_output_literal(lz_info *lzi, u_char ch) +{ + lz_user_data *lzud = (lz_user_data *)lzi->user_data; + fprintf(lzud->outfile, "'%c'", ch); +} + +int main(int argc, char *argv[]) +{ + int wsize = atoi(argv[1]); + lz_info lzi; + lz_user_data lzu = {stdin, stdout, 1, 1, 1}; + + lz_init(&lzi, wsize, wsize, MAX_MATCH, MIN_MATCH, 8192, tmp_get_chars, tmp_output_match, tmp_output_literal,&lzu); + lz_compress(&lzi); + return 0; +} +#endif + +__inline__ int lz_left_to_process(lz_info *lzi) +{ + return lzi->chars_in_buf - lzi->block_loc; +} + +static void +fill_blockbuf(lz_info *lzi, int maxchars) +{ + int toread; + u_char *readhere; + int nread; + + if (lzi->eofcount) return; + maxchars -= lz_left_to_process(lzi); + toread = lzi->block_buf_size - lzi->chars_in_buf; + if (toread > maxchars) toread = maxchars; + readhere = lzi->block_buf + lzi->chars_in_buf; + nread = lzi->get_chars(lzi, toread, readhere); + lzi->chars_in_buf += nread; + if (nread != toread) + lzi->eofcount++; +} + +static void lz_analyze_block(lz_info *lzi) +{ + int *lentab, *lenp; + u_char **prevtab, **prevp; + u_char *bbp, *bbe; + u_char *chartab[256]; + u_char *cursor; + int prevlen; + int ch; + int maxlen; + long wasinc; + int max_dist = lzi->max_dist; +#ifdef DEBUG_ANALYZE_BLOCK + static short n = 0; +#endif +#ifdef DEBUG_PERF + struct rusage innerloop; + struct timeval innertime, tmptime; + struct rusage outerloop; + struct timeval outertime; + struct rusage initialloop; + struct timeval initialtime; + struct rusage totalloop; + struct timeval totaltime; +#endif + +#ifdef DEBUG_ANALYZE_BLOCK + fprintf(stderr, "Analyzing block %d, cur_loc = %06x\n", n, lzi->cur_loc); +#endif + memset(chartab, 0, sizeof(chartab)); + prevtab = prevp = lzi->prevtab; + lentab = lenp = lzi->lentab; + memset(prevtab, 0, sizeof(*prevtab) * lzi->chars_in_buf); + memset(lentab, 0, sizeof(*prevtab) * lzi->chars_in_buf); +#ifdef DEBUG_PERF + memset(&innertime, 0, sizeof(innertime)); + memset(&outertime, 0, sizeof(outertime)); + getrusage(RUSAGE_SELF, &initialloop); + totalloop = initialloop; +#endif + bbp = lzi->block_buf; + bbe = bbp + lzi->chars_in_buf; + while (bbp < bbe) { + if (chartab[ch = *bbp]) { + *prevp = chartab[ch]; + *lenp = 1; + } + chartab[ch] = bbp; + bbp++; + prevp++; + lenp++; + } +#ifdef DEBUG_PERF + initialtime = initialloop.ru_utime; + getrusage(RUSAGE_SELF, &initialloop); + timersub(&initialloop.ru_utime, &initialtime, &initialtime); +#endif + wasinc = 1; + for (maxlen = 1; wasinc && (maxlen < lzi->max_match); maxlen++) { +#ifdef DEBUG_PERF + getrusage(RUSAGE_SELF, &outerloop); +#endif + bbp = bbe - maxlen - 1; + lenp = lentab + lzi->chars_in_buf - maxlen - 1; + prevp = prevtab + lzi->chars_in_buf - maxlen - 1; + wasinc = 0; + while (bbp > lzi->block_buf) { + if (*lenp == maxlen) { +#ifdef DEBUG_PERF + getrusage(RUSAGE_SELF, &innerloop); +#endif + ch = bbp[maxlen]; + cursor = *prevp; + while(cursor && ((bbp - cursor) <= max_dist)) { + prevlen = *(cursor - lzi->block_buf + lentab); + if (cursor[maxlen] == ch) { + *prevp = cursor; + (*lenp)++; + wasinc++; + break; + } + if (prevlen != maxlen) break; + cursor = *(cursor - lzi->block_buf + prevtab); + } +#ifdef DEBUG_PERF + tmptime = innerloop.ru_utime; + getrusage(RUSAGE_SELF, &innerloop); + timersub(&innerloop.ru_utime, &tmptime, &tmptime); + timeradd(&tmptime, &innertime, &innertime); +#endif + } + bbp--; + prevp--; + lenp--; + } +#ifdef DEBUG_PERF + tmptime = outerloop.ru_utime; + getrusage(RUSAGE_SELF, &outerloop); + timersub(&outerloop.ru_utime, &tmptime, &tmptime); + timeradd(&tmptime, &outertime, &outertime); +#endif + // fprintf(stderr, "maxlen = %d, wasinc = %ld\n", maxlen, wasinc); + } +#ifdef DEBUG_PERF + totaltime = totalloop.ru_utime; + getrusage(RUSAGE_SELF, &totalloop); + timersub(&totalloop.ru_utime, &totaltime, &totaltime); + fprintf(stderr, "Time spend in initial loop = %f\n", initialtime.tv_sec + initialtime.tv_usec/(double)1E6); + fprintf(stderr, "Time spend in outer loop = %f\n", outertime.tv_sec + outertime.tv_usec/(double)1E6); + fprintf(stderr, "Time spend in inner loop = %f\n", innertime.tv_sec + innertime.tv_usec/(double)1E6); + fprintf(stderr, "Time spend in all loops = %f\n", totaltime.tv_sec + totaltime.tv_usec/(double)1E6); +#endif + lzi->analysis_valid = 1; +#ifdef DEBUG_ANALYZE_BLOCK + fprintf(stderr, "Done analyzing block %d, cur_loc = %06x\n", n++, lzi->cur_loc); +#endif +} + +void lz_stop_compressing(lz_info *lzi) +{ + lzi->stop = 1; + /* fprintf(stderr, "Stopping...\n");*/ +} + +int lz_compress(lz_info *lzi, int nchars) +{ + + u_char *bbp, *bbe; + int *lentab, *lenp; + u_char **prevtab, **prevp; + int len; + int holdback; + short trimmed; + + lzi->stop = 0; + while ((lz_left_to_process(lzi) || !lzi->eofcount) && !lzi->stop && nchars > 0) { +#if 1 + if (!lzi->analysis_valid || + (!lzi->eofcount && + ((lzi->chars_in_buf- lzi->block_loc) < nchars))) { + int residual = lzi->chars_in_buf - lzi->block_loc; + int bytes_to_move = lzi->max_dist + residual; + if (bytes_to_move > lzi->chars_in_buf) + bytes_to_move = lzi->chars_in_buf; +#ifdef DEBUG_ANALYZE_BLOCK + fprintf(stderr, "Moving %06x, chars_in_buf %06x, residual = %06x, nchars= %06x block_loc = %06x\n", bytes_to_move, lzi->chars_in_buf, residual, nchars, lzi->block_loc); +#endif + memmove(lzi->block_buf, lzi->block_buf + lzi->chars_in_buf - bytes_to_move, + bytes_to_move); + + lzi->block_loc = bytes_to_move - residual; + lzi->chars_in_buf = bytes_to_move; +#ifdef DEBUG_ANALYZE_BLOCK + fprintf(stderr, "New chars_in_buf %06x, new block_loc = %06x, eof = %1d\n", lzi->chars_in_buf, lzi->block_loc, lzi->eofcount); +#endif + fill_blockbuf(lzi, nchars); +#ifdef DEBUG_ANALYZE_BLOCK + fprintf(stderr, "Really new chars_in_buf %06x, new block_loc = %06x, eof = %1d\n", lzi->chars_in_buf, lzi->block_loc, lzi->eofcount); +#endif + lz_analyze_block(lzi); + } +#else + if (!lzi->analysis_valid || + (lzi->block_loc - lzi->chars_in_buf) == 0) { + lzi->block_loc = 0; + lzi->chars_in_buf = 0; + fill_blockbuf(lzi, nchars); + lz_analyze_block(lzi); + } +#endif + prevtab = prevp = lzi->prevtab + lzi->block_loc; + lentab = lenp = lzi->lentab + lzi->block_loc; + bbp = lzi->block_buf + lzi->block_loc; + holdback = lzi->max_match; + if (lzi->eofcount) holdback = 0; + if (lzi->chars_in_buf < (nchars + lzi->block_loc)) + bbe = lzi->block_buf + lzi->chars_in_buf - holdback; + else + bbe = bbp + nchars; + while ((bbp < bbe) && (!lzi->stop)) { + trimmed = 0; + len = *lenp; + if (lzi->frame_size && (len > (lzi->frame_size - lzi->cur_loc % lzi->frame_size))) { +#ifdef DEBUG_TRIMMING + fprintf(stderr, "Trim for framing: %06x %d %d\n", lzi->cur_loc,len, (lzi->frame_size - lzi->cur_loc % lzi->frame_size)); +#endif + trimmed = 1; + len = (lzi->frame_size - lzi->cur_loc % lzi->frame_size); + } + if (len > nchars) { +#ifdef DEBUG_TRIMMING + fprintf(stderr, "Trim for blocking: %06x %d %d\n", lzi->cur_loc,len, nchars); +#endif + trimmed = 1; + len = nchars; + } + if (len >= lzi->min_match) { +#ifdef LAZY + if ((bbp < bbe -1) && !trimmed && + ((lenp[1] > (len + 1)) /* || ((lenp[1] == len) && (prevp[1] > prevp[0])) */)) { + len = 1; + /* this is the lazy eval case */ + } + else +#endif + if (lzi->output_match(lzi, (*prevp - lzi->block_buf) - lzi->block_loc, + len) < 0) { + // fprintf(stderr, "Match rejected: %06x %d\n", lzi->cur_loc, len); + len = 1; /* match rejected */ + } + } + else + len = 1; + + if (len < lzi->min_match) { + assert(len == 1); + lzi->output_literal(lzi, *bbp); + } + // fprintf(stderr, "len = %3d, *lenp = %3d, cur_loc = %06x, block_loc = %06x\n", len, *lenp, lzi->cur_loc, lzi->block_loc); + bbp += len; + prevp += len; + lenp += len; + lzi->cur_loc += len; + lzi->block_loc += len; + assert(nchars >= len); + nchars -= len; + + } + } + return 0; +} diff --git a/reactos/tools/hhpcomp/lzx_compress/lz_nonslide.h b/reactos/tools/hhpcomp/lzx_compress/lz_nonslide.h new file mode 100644 index 00000000000..a721fede604 --- /dev/null +++ b/reactos/tools/hhpcomp/lzx_compress/lz_nonslide.h @@ -0,0 +1,60 @@ +/* + File lz_nonslide.h, part of lzxcomp library + Copyright (C) 2002 Matthew T. Russotto + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; version 2.1 only + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +typedef struct lz_info lz_info; +typedef int (*get_chars_t)(lz_info *lzi, int n, u_char *buf); +typedef int (*output_match_t)(lz_info *lzi, int match_pos, int match_len); +typedef void (*output_literal_t)(lz_info *lzi, u_char ch); + +struct lz_info +{ + int wsize; /* window size in bytes */ + int max_match; /* size of longest match in bytes */ + int min_match; + u_char *block_buf; + u_char *block_bufe; + int block_buf_size; + int chars_in_buf; + int cur_loc; /* location within stream */ + int block_loc; + int frame_size; + int max_dist; + u_char **prevtab; + int *lentab; + short eofcount; + short stop; + short analysis_valid; + + get_chars_t get_chars; + output_match_t output_match; + output_literal_t output_literal; + void *user_data; +}; + +void lz_init(lz_info *lzi, int wsize, int max_dist, + int max_match, int min_match, + int frame_size, + get_chars_t get_chars, + output_match_t output_match, + output_literal_t output_literal, void *user_data); + +void lz_release(lz_info *lzi); + +void lz_reset(lz_info *lzi); +void lz_stop_compressing(lz_info *lzi); +int lz_left_to_process(lz_info *lzi); /* returns # chars read in but unprocessed */ +int lz_compress(lz_info *lzi, int nchars); diff --git a/reactos/tools/hhpcomp/lzx_compress/lzx_compress.h b/reactos/tools/hhpcomp/lzx_compress/lzx_compress.h new file mode 100644 index 00000000000..9390fbfeedc --- /dev/null +++ b/reactos/tools/hhpcomp/lzx_compress/lzx_compress.h @@ -0,0 +1,42 @@ +/* + File lzx_compress.h, part of lzxcomp library + Copyright (C) 2002 Matthew T. Russotto + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; version 2.1 only + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +typedef struct lzx_data lzx_data; +typedef int (*lzx_get_bytes_t)(void *arg, int n, void *buf); +typedef int (*lzx_put_bytes_t)(void *arg, int n, void *buf); +typedef void (*lzx_mark_frame_t)(void *arg, uint32_t uncomp, uint32_t comp); +typedef int (*lzx_at_eof_t)(void *arg); + +typedef struct lzx_results +{ + /* add more here? Error codes, # blocks, # frames, etc? */ + long len_compressed_output; + long len_uncompressed_input; +} lzx_results; + +int lzx_init(struct lzx_data **lzxdp, int wsize_code, + lzx_get_bytes_t get_bytes, void *get_bytes_arg, + lzx_at_eof_t at_eof, + lzx_put_bytes_t put_bytes, void *put_bytes_arg, + lzx_mark_frame_t mark_frame, void *mark_frame_arg); + +void lzx_reset(lzx_data *lzxd); + +int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide); + +int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr); + diff --git a/reactos/tools/hhpcomp/lzx_compress/lzx_config.h b/reactos/tools/hhpcomp/lzx_compress/lzx_config.h new file mode 100644 index 00000000000..f5a17ecc932 --- /dev/null +++ b/reactos/tools/hhpcomp/lzx_compress/lzx_config.h @@ -0,0 +1,4 @@ + +#if BYTE_ORDER == BIG_ENDIAN +#define LZX_BIG_ENDIAN +#endif diff --git a/reactos/tools/hhpcomp/lzx_compress/lzx_constants.h b/reactos/tools/hhpcomp/lzx_compress/lzx_constants.h new file mode 100644 index 00000000000..02b8ebac8b4 --- /dev/null +++ b/reactos/tools/hhpcomp/lzx_compress/lzx_constants.h @@ -0,0 +1,40 @@ +/* + File lzx_constants.h, part of lzxcomp library + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; version 2.1 only + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + -------------------------------------- + The above lines apply to the lzxcomp library as a whole. This file, + lzx_constants.h, however, is probably uncopyrightable, and in any + case I explicitly place it in the public domain. + + Matthew T. Russotto +*/ + +/* these named constants are from the Microsoft LZX documentation */ +#define MIN_MATCH 2 +#define MAX_MATCH 257 +#define NUM_CHARS 256 +#define NUM_PRIMARY_LENGTHS 7 +#define NUM_SECONDARY_LENGTHS 249 + +/* the names of these constants are specific to this library */ +#define LZX_MAX_CODE_LENGTH 16 +#define LZX_FRAME_SIZE 32768 +#define LZX_PRETREE_SIZE 20 +#define LZX_ALIGNED_BITS 3 +#define LZX_ALIGNED_SIZE 8 + +#define LZX_VERBATIM_BLOCK 1 +#define LZX_ALIGNED_OFFSET_BLOCK 2 diff --git a/reactos/tools/hhpcomp/lzx_compress/lzx_layer.c b/reactos/tools/hhpcomp/lzx_compress/lzx_layer.c new file mode 100644 index 00000000000..3d6ec0e60b9 --- /dev/null +++ b/reactos/tools/hhpcomp/lzx_compress/lzx_layer.c @@ -0,0 +1,1251 @@ +/* + File lzx_layer.c, part of lzxcomp library + Copyright (C) 2002 Matthew T. Russotto + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; version 2.1 only + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +#include +#include +#include +#include /* for memset on Linux */ +#include +#include +#include "lzx_config.h" +#ifdef NONSLIDE +#include "lz_nonslide.h" +#else +#include "hash_slide.h" +#include "lz_slide.h" +#endif +#include "lzx_compress.h" +#include "lzx_constants.h" + +/* Debugging defines useful during development. All add diagnostic output + at various points in the system */ + +/*#define DEBUG_MATCHES *//* When matches come in from the LZ engine */ +/*#define DEBUG_MATCHES_2 *//* When matches are being output */ +/*#define DEBUG_HUFFMAN *//* When huffman trees are built */ +/*#define DEBUG_ENTROPY *//* In entropy calculation */ +/*#define DEBUG_LZ *//* Uncompressed input reconstructed from + LZ engine */ +/*#define DEBUG_BITBUF *//* Raw output to upper layer */ +/*#define DEBUG_EXTRA_BITS *//* Savings due to extra bits huffman tree */ +/*#define DEBUG_POSITION_SLOT_LOOKUP */ +/*#define DEBUG_TREE_COMPRESSION *//* During RLE compression of trees */ + +/* number of position slots given window_size-5 */ +/* as corrected by Caie */ +short num_position_slots[] = {30, 32, 34, 36, 38, 42, 50}; +unsigned long position_base[51]; +u_char extra_bits[52]; +double rloge2; + +typedef struct ih_elem { + int freq; + short sym; + short pathlength; + struct ih_elem *parent; + struct ih_elem *left; + struct ih_elem *right; +} ih_elem; + +typedef struct h_elem { + int freq; + short sym; + short pathlength; + struct ih_elem *parent; + unsigned short code; +} h_elem; + +typedef struct huff_entry { + short codelength; + unsigned short code; +} huff_entry; + +static int cmp_leaves(const void *in_a, const void *in_b) +{ + const struct h_elem *a = in_a; + const struct h_elem *b = in_b; + + if (!a->freq && b->freq) + return 1; + if (a->freq && !b->freq) + return -1; + + if (a->freq == b->freq) + return a->sym - b->sym; + + return a->freq - b->freq; +} + +static int +cmp_pathlengths(const void *in_a, const void *in_b) +{ + const struct h_elem *a = in_a; + const struct h_elem *b = in_b; + + if (a->pathlength == b->pathlength) +#if 0 + return a->sym - b->sym; +#else + /* see note on canonical pathlengths */ + return b->sym - a->sym; +#endif + return b->pathlength - a->pathlength; +} + +/* standard huffman building algorithm */ +static void +build_huffman_tree(int nelem, int max_code_length, int *freq, huff_entry *tree) +{ + h_elem *leaves = malloc(nelem * sizeof(h_elem)); + ih_elem *inodes; + ih_elem *next_inode; + ih_elem *cur_inode; + h_elem *cur_leaf; + int leaves_left; + int nleaves; + int pathlength; + unsigned short cur_code; + short codes_too_long = 0; + ih_elem *f1, *f2; + int i; + + for (i = 0; i < nelem; i++) { + leaves[i].freq = freq[i]; + leaves[i].sym = i; + leaves[i].pathlength = 0; + } + qsort(leaves, nelem, sizeof(h_elem), cmp_leaves); + for (leaves_left = 0; leaves_left < nelem; leaves_left++) { +#ifdef DEBUG_HUFFMAN + fprintf(stderr, "%3d: %3d '%c'\n", leaves_left, leaves[leaves_left].freq, + leaves[leaves_left].sym); +#endif + if (!leaves[leaves_left].freq) break; + } + nleaves = leaves_left; + + if (nleaves >= 2) { + inodes = malloc((nelem-1) * sizeof(ih_elem)); + do { + if (codes_too_long) { + for (leaves_left = 0; leaves_left < nelem; leaves_left++) { + if (!leaves[leaves_left].freq) break; + if (leaves[leaves_left].freq != 1) { + leaves[leaves_left].freq >>= 1; + codes_too_long = 0; + } + } + assert (!codes_too_long); + } + + cur_leaf = leaves; + next_inode = cur_inode = inodes; + + do { + f1 = f2 = NULL; + if (leaves_left && + ((cur_inode == next_inode) || + (cur_leaf->freq <= cur_inode->freq))) { + f1 = (ih_elem *)cur_leaf++; + leaves_left--; + } + else if (cur_inode != next_inode) { + f1 = cur_inode++; + } + + if (leaves_left && + ((cur_inode == next_inode) || + (cur_leaf->freq <= cur_inode->freq))) { + f2 = (ih_elem *)cur_leaf++; + leaves_left--; + } + else if (cur_inode != next_inode) { + f2 = cur_inode++; + } + +#ifdef DEBUG_HUFFMAN + fprintf(stderr, "%d %d\n", f1, f2); +#endif + if (f1 && f2) { + next_inode->freq = f1->freq + f2->freq; + next_inode->sym = -1; + next_inode->left = f1; + next_inode->right = f2; + next_inode->parent = NULL; + f1->parent = next_inode; + f2->parent = next_inode; + if (f1->pathlength > f2->pathlength) + next_inode->pathlength = f1->pathlength + 1; + else + next_inode->pathlength = f2->pathlength + 1; + if (next_inode->pathlength > max_code_length) { + codes_too_long = 1; + break; + } + next_inode++; + } + } + while (f1 && f2); + } + while (codes_too_long); + +#ifdef DEBUG_HUFFMAN + cur_inode = inodes; + while (cur_inode < next_inode) { + fprintf(stderr, "%d l: %3d%c r: %3d%c freq: %8d\n", + cur_inode - inodes, + (cur_inode->left->sym!=-1)?(((struct h_elem *)cur_inode->left)-leaves):(cur_inode->left-inodes), + (cur_inode->left->sym!=-1)?'l':'i', + (cur_inode->right->sym!=-1)?(((struct h_elem *)cur_inode->right)-leaves):(cur_inode->right-inodes), + (cur_inode->right->sym!=-1)?'l':'i', + (cur_inode->freq) + ); + cur_inode++; + } +#endif + + /* now traverse tree depth-first */ + cur_inode = next_inode - 1; + pathlength = 0; + cur_inode->pathlength = -1; + do { + /* precondition: at unmarked node*/ + if (cur_inode->sym == -1) /*&& (cur_inode->left)*/ { + /* left node of unmarked node is unmarked */ + cur_inode = cur_inode->left; + cur_inode->pathlength = -1; + pathlength++; + } + else { + /* mark node */ + cur_inode->pathlength = pathlength; +#if 0 + if (cur_inode->right) { + /* right node of previously unmarked node is unmarked */ + cur_inode = cur_inode->right; + cur_inode->pathlength = -1; + pathlength++; + } + else +#endif + { + + /* time to come up. Keep coming up until an unmarked node is reached */ + /* or the tree is exhausted */ + do { + cur_inode = cur_inode->parent; + pathlength--; + } + while (cur_inode && (cur_inode->pathlength != -1)); + if (cur_inode) { + /* found unmarked node; mark it and go right */ + cur_inode->pathlength = pathlength; + cur_inode = cur_inode->right; + cur_inode->pathlength = -1; + pathlength++; + /* would be complex if cur_inode could be null here. It can't */ + } + } + } + } + while (cur_inode); + +#ifdef DEBUG_HUFFMAN + cur_inode = inodes; + while (cur_inode < next_inode) { + fprintf(stderr, "%d l: %3d%c r: %3d%c freq: %8d pathlength %4d\n", + cur_inode - inodes, + (cur_inode->left->sym!=-1)?(((struct h_elem *)cur_inode->left)-leaves):(cur_inode->left-inodes), + (cur_inode->left->sym!=-1)?'l':'i', + (cur_inode->right->sym!=-1)?(((struct h_elem *)cur_inode->right)-leaves):(cur_inode->right-inodes), + (cur_inode->right->sym!=-1)?'l':'i', + (cur_inode->freq), + (cur_inode->pathlength) + ); + cur_inode++; + } +#endif + free(inodes); + + /* the pathlengths are already in order, so this sorts by symbol */ + qsort(leaves, nelem, sizeof(h_elem), cmp_pathlengths); + + /** + Microsoft's second condition on its canonical huffman codes is: + + For each level, starting at the deepest level of the tree and then + moving upwards, leaf nodes must start as far left as possible. An + alternative way of stating this constraint is that if any tree node + has children then all tree nodes to the left of it with the same path + length must also have children. + + These 'alternatives' are not equivalent. The latter alternative gives + the common canonical code where the longest code is all zeros. The former + gives an opposite code where the longest code is all ones. Microsoft uses the + former alternative. + **/ + +#if 0 + pathlength = leaves[0].pathlength; + cur_code = 0; + for (i = 0; i < nleaves; i++) { + while (leaves[i].pathlength < pathlength) { + assert(!(cur_code & 1)); + cur_code >>= 1; + pathlength--; + } + leaves[i].code = cur_code; + cur_code++; + } +#else + pathlength = leaves[nleaves-1].pathlength; + assert(leaves[0].pathlength <= 16); /* this method cannot deal with bigger codes, though + the other canonical method can in some cases + (because it starts with zeros ) */ + cur_code = 0; + for (i = nleaves - 1; i >= 0; i--) { + while (leaves[i].pathlength > pathlength) { + cur_code <<= 1; + pathlength++; + } + leaves[i].code = cur_code; + cur_code++; + } +#endif + +#ifdef DEBUG_HUFFMAN + for (i = 0; i < nleaves; i++) { + char code[18]; + int j; + + cur_code = leaves[i].code; + code[leaves[i].pathlength] = 0; + for (j = leaves[i].pathlength-1; j >= 0; j--) { + if (cur_code & 1) code[j] = '1'; + else code[j] = '0'; + cur_code >>= 1; + } + fprintf(stderr, "%3d: %3d %3d %-16.16s '%c'\n", i, leaves[i].freq, leaves[i].pathlength, code, + leaves[i].sym); + } +#endif + } + else if (nleaves == 1) { + /* 0 symbols is OK (not according to doc, but according to Caie) */ + /* but if only one symbol is present, two symbols are required */ + nleaves = 2; + leaves[0].pathlength = leaves[1].pathlength = 1; + if (leaves[1].sym > leaves[0].sym) { + leaves[1].code = 1; + leaves[0].code = 0; + } + else { + leaves[0].code = 1; + leaves[1].code = 0; + } + } + + memset(tree, 0, nelem * sizeof(huff_entry)); + for (i = 0; i < nleaves; i++) { + tree[leaves[i].sym].codelength = leaves[i].pathlength; + tree[leaves[i].sym].code = leaves[i].code; + } + + free(leaves); +} + +/* from Stuart Caie's code -- I'm hoping this code is too small to encumber + this file. If not, you could rip it out and hard-code the tables */ + +static void lzx_init_static(void) +{ + int i, j; + + if (extra_bits[49]) return; + + rloge2 = 1.0/log(2); + for (i=0, j=0; i <= 50; i += 2) { + extra_bits[i] = extra_bits[i+1] = j; /* 0,0,0,0,1,1,2,2,3,3... */ + if ((i != 0) && (j < 17)) j++; /* 0,0,1,2,3,4...15,16,17,17,17,17... */ + } + + for (i=0, j=0; i <= 50; i++) { + position_base[i] = j; /* 0,1,2,3,4,6,8,12,16,24,32,... */ + j += 1 << extra_bits[i]; /* 1,1,1,1,2,2,4,4,8,8,16,16,32,32,... */ + } +} + +struct lzx_data +{ + void *in_arg; + void *out_arg; + void *mark_frame_arg; + lzx_get_bytes_t get_bytes; + lzx_at_eof_t at_eof; + lzx_put_bytes_t put_bytes; + lzx_mark_frame_t mark_frame; + struct lz_info *lzi; + /* a 'frame' is an 0x8000 byte thing. Called that because otherwise + I'd confuse myself overloading 'block' */ + int left_in_frame; + int left_in_block; + int R0, R1, R2; + int num_position_slots; + /* this is the LZX block size */ + int block_size; + int *main_freq_table; + int length_freq_table[NUM_SECONDARY_LENGTHS]; + int aligned_freq_table[LZX_ALIGNED_SIZE]; + uint32_t *block_codes; + uint32_t *block_codesp; + huff_entry *main_tree; + huff_entry length_tree[NUM_SECONDARY_LENGTHS]; + huff_entry aligned_tree[LZX_ALIGNED_SIZE]; + int main_tree_size; + uint16_t bit_buf; + int bits_in_buf; + double main_entropy; + double last_ratio; + uint8_t *prev_main_treelengths; + uint8_t prev_length_treelengths[NUM_SECONDARY_LENGTHS]; + uint32_t len_uncompressed_input; + uint32_t len_compressed_output; + short need_1bit_header; + short subdivide; /* 0 = don't subdivide, 1 = allowed, -1 = requested */ +}; + +static int +lzx_get_chars(lz_info *lzi, int n, u_char *buf) +{ + /* force lz compression to stop after every block */ + int chars_read; + int chars_pad; + + lzx_data *lzud = (lzx_data *)lzi->user_data; +#ifdef OLDFRAMING + if (lzud->subdivide < 0) return 0; + if (n > lzud->left_in_frame) + n = lzud->left_in_frame; + if (n > lzud->left_in_block) + n = lzud->left_in_block; +#endif + chars_read = lzud->get_bytes(lzud->in_arg, n, buf); +#ifdef OLDFRAMING + lzud->left_in_frame -= chars_read; + lzud->left_in_block -= chars_read; +#else + lzud->left_in_frame -= chars_read % LZX_FRAME_SIZE; + if (lzud->left_in_frame < 0) + lzud->left_in_frame += LZX_FRAME_SIZE; +#endif + if ((chars_read < n) && (lzud->left_in_frame)) { + chars_pad = n - chars_read; + if (chars_pad > lzud->left_in_frame) chars_pad = lzud->left_in_frame; + /* never emit a full frame of padding. This prevents silliness when + lzx_compress is called when at EOF but EOF not yet detected */ + if (chars_pad == LZX_FRAME_SIZE) chars_pad = 0; +#ifdef OLDFRAMING + if (chars_pad > lzud->left_in_block) chars_pad = lzud->left_in_block; +#endif + memset(buf + chars_read, 0, chars_pad); + lzud->left_in_frame -= chars_pad; +#ifdef OLDFRAMING + lzud->left_in_block -= chars_pad; +#endif + chars_read += chars_pad; + } + return chars_read; +} + +#ifdef NONSLIDE +static int find_match_at(lz_info *lzi, int loc, int match_len, int *match_locp) +{ + u_char *matchb; + u_char *nmatchb; + u_char *c1, *c2; + int j; + + if (-*match_locp == loc) return -1; + if (loc < match_len) return -1; + + matchb = lzi->block_buf + lzi->block_loc + *match_locp; + nmatchb = lzi->block_buf + lzi->block_loc - loc; + c1 = matchb; + c2 = nmatchb; + for (j = 0; j < match_len; j++) { + if (*c1++ != *c2++) break; + } + if (j == match_len) { +#ifdef DEBUG_MATCHES + fprintf(stderr, "match found %d, old = %d new = %d len = %d\n", lzi->cur_loc, -*match_locp, loc, match_len); +#endif + *match_locp = -loc; + return 0; + } + return -1; +} +#else +static int find_match_at(lz_info *lzi, int loc, int match_len, int *match_locp) +{ + u_char *matchb; + u_char *nmatchb; + u_char *c1, *c2; + int j; + + if (-*match_locp == loc) return -1; + if (loc < match_len) return -1; + + matchb = lzi->slide_bufp + *match_locp; + if (matchb < lzi->slide_buf) matchb += lzi->slide_buf_size; + nmatchb = lzi->slide_bufp - loc; + if (nmatchb < lzi->slide_buf) nmatchb += lzi->slide_buf_size; + c1 = matchb; + c2 = nmatchb; + for (j = 0; j < match_len; j++) { + if (*c1++ != *c2++) break; + if (c1 == lzi->slide_bufe) c1 = lzi->slide_buf; + if (c2 == lzi->slide_bufe) c2 = lzi->slide_buf; + } + if (j == match_len) { +#ifdef DEBUG_MATCHES + fprintf(stderr, "match found %d, old = %d new = %d len = %d\n", lzi->cur_loc, -*match_locp, loc, match_len); +#endif + *match_locp = -loc; + return 0; + } + return -1; +} +#endif +static void check_entropy(lzx_data *lzud, int main_index) +{ + /* entropy = - sum_alphabet P(x) * log2 P(x) */ + /* entropy = - sum_alphabet f(x)/N * log2 (f(x)/N) */ + /* entropy = - 1/N sum_alphabet f(x) * (log2 f(x) - log2 N) */ + /* entropy = - 1/N (sum_alphabet f(x) * log2 f(x)) - sum_alphabet f(x) log2 N */ + /* entropy = - 1/N (sum_alphabet f(x) * log2 f(x)) - log2 N sum_alphabet f(x) */ + /* entropy = - 1/N (sum_alphabet f(x) * log2 f(x)) - N * log2 N */ + + /* entropy = - 1/N ((sum_alphabet f(x) * log2 f(x) ) - N * log2 N) */ + /* entropy = - 1/N ((sum_alphabet f(x) * ln f(x) * 1/ln 2) - N * ln N * 1/ln 2) */ + /* entropy = 1/(N ln 2) (N * ln N - (sum_alphabet f(x) * ln f(x))) */ + /* entropy = 1/(N ln 2) (N * ln N + (sum_alphabet -f(x) * ln f(x))) */ + + /* entropy = 1/(N ln 2) ( sum_alphabet ln N * f(x) + (sum_alphabet -f(x) * ln f(x))) */ + /* entropy = 1/(N ln 2) ( sum_alphabet ln N * f(x) + (-f(x) * ln f(x))) */ + /* entropy = -1/(N ln 2) ( sum_alphabet -ln N * f(x) + (f(x) * ln f(x))) */ + /* entropy = -1/(N ln 2) ( sum_alphabet f(x)(- ln N + ln f(x))) */ + /* entropy = -1/(N ln 2) ( sum_alphabet f(x)(ln f(x)/N)) */ + /* entropy = -1/N ( sum_alphabet (1/(ln 2))f(x)(ln f(x)/N)) */ + /* entropy = -1/N ( sum_alphabet f(x)(log2 f(x)/N)) */ + /* entropy = - ( sum_alphabet f(x)/N(log2 f(x)/N)) */ + /* entropy = - ( sum_alphabet P(x)(log2 P(x))) */ + + + double freq; + double n_ln_n; + double rn_ln2; + double cur_ratio; + int n; + + /* delete old entropy accumulation */ + if (lzud->main_freq_table[main_index] != 1) { + freq = (double)lzud->main_freq_table[main_index]-1; + lzud->main_entropy += freq * log(freq); + } + /* add new entropy accumulation */ + freq = (double)lzud->main_freq_table[main_index]; + lzud->main_entropy -= freq * log(freq); + n = lzud->block_codesp - lzud->block_codes; + + if (((n & 0xFFF) == 0) && (lzud->left_in_block >= 0x1000)) { + n_ln_n = (double)n * log((double)n); + rn_ln2 = rloge2 / (double)n; + cur_ratio = (n * rn_ln2 *(n_ln_n + lzud->main_entropy) + 24 + 3 * 80 + NUM_CHARS + (lzud->main_tree_size-NUM_CHARS)*3 + NUM_SECONDARY_LENGTHS ) / (double)n; +#ifdef DEBUG_ENTROPY + fprintf(stderr, "n = %d\n", n); + fprintf(stderr, "main entropy = %f\n", rn_ln2 *(n_ln_n + lzud->main_entropy) ); + fprintf(stderr, "compression ratio (raw) = %f\n", 100.0 * rn_ln2 *(n_ln_n + lzud->main_entropy) /9.0 ); + fprintf(stderr, "compression ratio (ovh) = %f\n", 100.0 * cur_ratio/9.0); +#endif + if (cur_ratio > lzud->last_ratio) { +#ifdef DEBUG_ENTROPY + fprintf(stderr, "resetting huffman tables at %d\n", n); +#endif + lzud->subdivide = -1; + lz_stop_compressing(lzud->lzi); + } + lzud->last_ratio = cur_ratio; + } +} + +static int +lzx_output_match(lz_info *lzi, int match_pos, int match_len) +{ + lzx_data *lzud = (lzx_data *)lzi->user_data; + uint32_t formatted_offset; + uint32_t position_footer; + uint8_t length_footer; + uint8_t length_header; + uint16_t len_pos_header; + int position_slot; + short btdt; + +#ifdef DEBUG_LZ + { + int i; + int pos; + for (i = 0; i < match_len; i++) { + +#ifdef NONSLIDE + pos = match_pos + lzi->block_loc + i; + fprintf(stderr, "%c", lzi->block_buf[pos]); +#else + pos = match_pos + lzi->front_offset + i; + if (pos > lzi->slide_buf_size) + pos -= lzi->slide_buf_size; + fprintf(stderr, "%c", lzi->slide_buf[pos]); +#endif + } + } +#endif + position_footer = 0; + btdt = 0; + testforr: + if (match_pos == -lzud->R0) { + match_pos = 0; + formatted_offset = 0; + position_slot = 0; + } + else if (match_pos == -lzud->R1) { + lzud->R1 = lzud->R0; + lzud->R0 = -match_pos; + match_pos = 1; + formatted_offset = 1; + position_slot = 1; + } + else if (match_pos == -lzud->R2) { + lzud->R2 = lzud->R0; + lzud->R0 = -match_pos; + match_pos = 2; + formatted_offset = 2; + position_slot = 2; + } + else { + if (!btdt) { + btdt = 1; + if (find_match_at(lzi, lzud->R0, match_len, &match_pos) == 0) + goto testforr; + if (find_match_at(lzi, lzud->R1, match_len, &match_pos) == 0) + goto testforr; + if (find_match_at(lzi, lzud->R2, match_len, &match_pos) == 0) + goto testforr; + } + + formatted_offset = -match_pos + 2; + + if ((match_len < 3) || + ((formatted_offset >= 64) && (match_len < 4)) || + ((formatted_offset >= 2048) && (match_len < 5)) || + ((formatted_offset >= 65536) && (match_len < 6))) { + /* reject matches where extra_bits will likely be bigger than just outputting + literals. The numbers are basically derived through guessing + and trial and error */ + return -1; /* reject the match */ + } + + lzud->R2 = lzud->R1; + lzud->R1 = lzud->R0; + lzud->R0 = -match_pos; + + /* calculate position base using binary search of table; if log2 can be + done in hardware, approximation might work; + trunc(log2(formatted_offset*formatted_offset)) gets either the proper + position slot or the next one, except for slots 0, 1, and 39-49 + + Slots 0-1 are handled by the R0-R1 procedures + + Slots 36-49 (formatted_offset >= 262144) can be found by + (formatted_offset/131072) + 34 == + (formatted_offset >> 17) + 34; + */ + if (formatted_offset >= 262144) { + position_slot = (formatted_offset >> 17) + 34; + } + else { + int left, right, mid; + + left = 3; + right = lzud->num_position_slots - 1; + position_slot = -1; + while (left <= right) { + mid = (left + right)/2; + if ((position_base[mid] <= formatted_offset) && + position_base[mid+1] > formatted_offset) { + position_slot = mid; + break; + } +#if 0 + fprintf(stderr, "BEFORE: %06x %06x %06x %06x\n", + position_base[left], position_base[mid], + formatted_offset, position_base[right]); +#endif + if (formatted_offset > position_base[mid]) + /* too low */ + left = mid + 1; + else /* too high */ + right = mid; +#if 0 + fprintf(stderr, "AFTER : %06x %06x %06x %06x\n", + position_base[left], position_base[mid], + formatted_offset, position_base[right]); +#endif + } +#ifdef DEBUG_POSITION_SLOT_LOOKUP + if (position_slot < 0) { + fprintf(stderr, "lmr npr: %d %d %d %d\n", left, mid, right, lzud->num_position_slots); + fprintf(stderr, "AFTER : %07d %07d %07d %07d\n", + position_base[left], position_base[mid], + formatted_offset, position_base[right]); + fprintf(stderr, "(%d, %d, %d, %d, %d)\n", match_pos, match_len, formatted_offset, position_slot, position_footer); + } +#endif + assert(position_slot >= 0); + /* FIXME precalc extra_mask table */ + } + position_footer = ((1UL << extra_bits[position_slot]) - 1) & formatted_offset; + } +#ifdef DEBUG_MATCHES +#ifdef NONSLIDE + fprintf(stderr, "(%08x, %d, %d, %d, %d, %d)\n", lzud->lzi->cur_loc , match_pos, match_len, formatted_offset, position_slot, position_footer); +#else + fprintf(stderr, "(%08x, %d, %d, %d, %d, %d)\n", lzud->lzi->cur_loc - lzud->lzi->chars_in_match , match_pos, match_len, formatted_offset, position_slot, position_footer); +#endif +#endif + /* match length = 8 bits */ + /* position_slot = 6 bits */ + /* position_footer = 17 bits */ + /* total = 31 bits */ + /* plus one to say whether it's a literal or not */ + *lzud->block_codesp++ = 0x80000000 | /* bit 31 in intelligent bit ordering */ + (position_slot << 25) | /* bits 30-25 */ + (position_footer << 8) | /* bits 8-24 */ + (match_len - MIN_MATCH); /* bits 0-7 */ + + if (match_len < (NUM_PRIMARY_LENGTHS + MIN_MATCH)) { + length_header = match_len - MIN_MATCH; + /* length_footer = 255; */ /* not necessary */ + } + else { + length_header = NUM_PRIMARY_LENGTHS; + length_footer = match_len - (NUM_PRIMARY_LENGTHS + MIN_MATCH); + lzud->length_freq_table[length_footer]++; + } + len_pos_header = (position_slot << 3) | length_header; + lzud->main_freq_table[len_pos_header + NUM_CHARS]++; + if (extra_bits[position_slot] >= 3) { + lzud->aligned_freq_table[position_footer & 7]++; + } +#ifndef OLDFRAMING + lzud->left_in_block -= match_len; +#endif + if (lzud->subdivide) + check_entropy(lzud, len_pos_header + NUM_CHARS); + return 0; /* accept the match */ +} + +static void +lzx_output_literal(lz_info *lzi, u_char ch) +{ + lzx_data *lzud = (lzx_data *)lzi->user_data; + +#ifndef OLDFRAMING + lzud->left_in_block--; +#endif + *lzud->block_codesp++ = ch; +#ifdef DEBUG_LZ + fprintf(stderr, "%c", ch); +#endif + lzud->main_freq_table[ch]++; + if (lzud->subdivide) + check_entropy(lzud, ch); +} + +static void lzx_write_bits(lzx_data *lzxd, int nbits, uint32_t bits) +{ + int cur_bits; + int shift_bits; + int rshift_bits; + uint16_t mask_bits; + +#ifdef DEBUG_BITBUF + fprintf(stderr, "WB: %2d %08x\n", nbits, bits); +#endif + cur_bits = lzxd->bits_in_buf; + while ((cur_bits + nbits) >= 16) { + shift_bits = 16 - cur_bits; + rshift_bits = nbits - shift_bits; + if (shift_bits == 16) { + lzxd->bit_buf = (bits>>rshift_bits) & 0xFFFF; + } + else { + mask_bits = (1U << shift_bits) - 1; + lzxd->bit_buf <<= shift_bits; + lzxd->bit_buf |= (bits>>rshift_bits) & mask_bits; + } +#ifdef DEBUG_BITBUF + fprintf(stderr, "WBB: %04x\n", lzxd->bit_buf); +#endif +#ifdef LZX_BIG_ENDIAN + lzxd->bit_buf = ((lzxd->bit_buf & 0xFF)<<8) | (lzxd->bit_buf >> 8); +#endif + lzxd->put_bytes(lzxd->out_arg, sizeof(lzxd->bit_buf), &lzxd->bit_buf); + lzxd->len_compressed_output += sizeof(lzxd->bit_buf); + lzxd->bit_buf = 0; + nbits -= shift_bits; + cur_bits = 0; + } + /* (cur_bits + nbits) < 16. If nbits = 0, we're done. + otherwise move bits in */ + shift_bits = nbits; + mask_bits = (1U << shift_bits) - 1; + lzxd->bit_buf <<= shift_bits; + lzxd->bit_buf |= bits & mask_bits; + cur_bits += nbits; + +#ifdef DEBUG_BITBUF + fprintf(stderr, "OBB: %2d %04x\n", cur_bits, lzxd->bit_buf); +#endif + lzxd->bits_in_buf = cur_bits; +} + +static void lzx_align_output(lzx_data *lzxd) +{ + if (lzxd->bits_in_buf) { + lzx_write_bits(lzxd, 16 - lzxd->bits_in_buf, 0); + } + if (lzxd->mark_frame) + lzxd->mark_frame(lzxd->mark_frame_arg, lzxd->len_uncompressed_input, lzxd->len_compressed_output); +} + +static void +lzx_write_compressed_literals(lzx_data *lzxd, int block_type) +{ + uint32_t *cursor = lzxd->block_codes; + uint32_t *endp = lzxd->block_codesp; + uint16_t position_slot; + uint32_t position_footer; + uint32_t match_len_m2; /* match length minus 2, which is MIN_MATCH */ + uint32_t verbatim_bits; + uint32_t block_code; + uint16_t length_header; + uint16_t length_footer; + uint16_t len_pos_header; + huff_entry *huffe; + int frame_count = (lzxd->len_uncompressed_input % LZX_FRAME_SIZE); + + lzxd->len_uncompressed_input -= frame_count; /* will be added back in later */ + while (cursor < endp) { + block_code = *cursor++; + if (block_code & 0x80000000) { + /* + * 0x80000000 | bit 31 in intelligent bit ordering + * (position_slot << 25) | bits 30-25 + * (position_footer << 8) | bits 8-24 + * (match_len - MIN_MATCH); bits 0-7 + * + */ + + match_len_m2 = block_code & 0xFF; /* 8 bits */ + position_footer = (block_code >> 8)& 0x1FFFF; /* 17 bits */ + position_slot = (block_code >> 25) & 0x3F; /* 6 bits */ + +#ifdef DEBUG_MATCHES_2 + fprintf(stderr, "%08x, %3d %2d %d\n", lzxd->len_uncompressed_input + frame_count, match_len_m2, position_slot, position_footer); +#endif + if (match_len_m2 < NUM_PRIMARY_LENGTHS) { + length_header = match_len_m2; + length_footer = 255; /* personal encoding for NULL */ + } + else { + length_header = NUM_PRIMARY_LENGTHS; + length_footer = match_len_m2 - NUM_PRIMARY_LENGTHS; + } + len_pos_header = (position_slot << 3) | length_header; + huffe = &lzxd->main_tree[len_pos_header+NUM_CHARS]; + lzx_write_bits(lzxd, huffe->codelength, huffe->code); + if (length_footer != 255) { + huffe = &lzxd->length_tree[length_footer]; + lzx_write_bits(lzxd, huffe->codelength, huffe->code); + } + if ((block_type == LZX_ALIGNED_OFFSET_BLOCK) && (extra_bits[position_slot] >= 3)) { + /* aligned offset block and code */ + verbatim_bits = position_footer >> 3; + lzx_write_bits(lzxd, extra_bits[position_slot] - 3, verbatim_bits); + huffe = &lzxd->aligned_tree[position_footer&7]; + lzx_write_bits(lzxd, huffe->codelength, huffe->code); + } + else { + verbatim_bits = position_footer; + lzx_write_bits(lzxd, extra_bits[position_slot], verbatim_bits); + } + frame_count += match_len_m2 + 2; + } + else { + /* literal */ + assert(block_code < NUM_CHARS); + huffe = &lzxd->main_tree[block_code]; + lzx_write_bits(lzxd, huffe->codelength, huffe->code); + frame_count++; + } + if (frame_count == LZX_FRAME_SIZE) { + lzxd->len_uncompressed_input += frame_count; + lzx_align_output(lzxd); + frame_count = 0; + } +#ifdef DEBUG_MATCHES_2 + if (frame_count > LZX_FRAME_SIZE) { + fprintf(stderr, "uncomp_len = %x, frame_count = %x, block_code = %08x, match_len_m2 = %d", lzxd->len_uncompressed_input, frame_count, block_code, match_len_m2); + } +#endif + assert (frame_count < LZX_FRAME_SIZE); + } + lzxd->len_uncompressed_input += frame_count; +} + +static int +lzx_write_compressed_tree(struct lzx_data *lzxd, + struct huff_entry *tree, uint8_t *prevlengths, + int treesize) +{ + u_char *codes; + u_char *runs; + int freqs[LZX_PRETREE_SIZE]; + int cur_run; + int last_len; + huff_entry pretree[20]; + u_char *codep; + u_char *codee; + u_char *runp; + int excess; + int i; + int cur_code; + + codep = codes = malloc(treesize*sizeof(char)); + runp = runs = malloc(treesize*sizeof(char)); + memset(freqs, 0, sizeof(freqs)); + cur_run = 1; + last_len = tree[0].codelength; + for (i = 1; i <= treesize; i++) { + if ((i == treesize) || (tree[i].codelength != last_len)) { + if (last_len == 0) { + while (cur_run >= 20) { + excess = cur_run - 20; + if (excess > 31) excess = 31; + *codep++ = 18; + *runp++ = excess; + cur_run -= excess + 20; + freqs[18]++; + } + while (cur_run >= 4) { + excess = cur_run - 4; + if (excess > 15) excess = 15; + *codep++ = 17; + *runp++ = excess; + cur_run -= excess + 4; + freqs[17]++; + } + while (cur_run > 0) { + *codep = prevlengths[i - cur_run]; + freqs[*codep++]++; + *runp++ = 0; /* not necessary */ + cur_run--; + } + } + else { + while (cur_run >= 4) { + if (cur_run == 4) excess = 0; + else excess = 1; + *codep++ = 19; + *runp++ = excess; + freqs[19]++; + /* right, MS lies again. Code is NOT + prev_len + len (mod 17), it's prev_len - len (mod 17)*/ + *codep = prevlengths[i-cur_run] - last_len; + if (*codep > 16) *codep += 17; + freqs[*codep++]++; + *runp++ = 0; /* not necessary */ + cur_run -= excess+4; + } + while (cur_run > 0) { + *codep = prevlengths[i-cur_run] - last_len; + if (*codep > 16) *codep += 17; + *runp++ = 0; /* not necessary */ + cur_run--; + freqs[*codep++]++; + } + } + if (i != treesize) + last_len = tree[i].codelength; + cur_run = 0; + } + cur_run++; + } + codee = codep; +#ifdef DEBUG_TREE_COMPRESSION + *codep++ = 255; + *runp++ = 255; + fprintf(stderr, "num: len code run\n"); + for (i = 0; i < treesize; i++) { + fprintf(stderr, "%3d: %2d %2d %2d\n", i, tree[i].codelength, codes[i], runs[i]); + } +#endif + /* now create the huffman table and write out the pretree */ + build_huffman_tree(LZX_PRETREE_SIZE, 16, freqs, pretree); + for (i = 0; i < LZX_PRETREE_SIZE; i++) { + lzx_write_bits(lzxd, 4, pretree[i].codelength); + } + codep = codes; + runp = runs; + cur_run = 0; + while (codep < codee) { + cur_code = *codep++; + lzx_write_bits(lzxd, pretree[cur_code].codelength, pretree[cur_code].code); + if (cur_code == 17) { + cur_run += *runp + 4; + lzx_write_bits(lzxd, 4, *runp); + } + else if (cur_code == 18) { + cur_run += *runp + 20; + lzx_write_bits(lzxd, 5, *runp); + } + else if (cur_code == 19) { + cur_run += *runp + 4; + lzx_write_bits(lzxd, 1, *runp); + cur_code = *codep++; + lzx_write_bits(lzxd, pretree[cur_code].codelength, pretree[cur_code].code); + runp++; + } + else { + cur_run++; + } + runp++; + } + free(codes); + free(runs); + return 0; +} + +void +lzx_reset(lzx_data *lzxd) +{ + lzxd->need_1bit_header = 1; + lzxd->R0 = lzxd->R1 = lzxd->R2 = 1; + memset(lzxd->prev_main_treelengths, 0, lzxd->main_tree_size * sizeof(uint8_t)); + memset(lzxd->prev_length_treelengths, 0, NUM_SECONDARY_LENGTHS * sizeof(uint8_t)); + lz_reset(lzxd->lzi); +} + +int lzx_compress_block(lzx_data *lzxd, int block_size, int subdivide) +{ + int i; + uint32_t written_sofar = 0; + int block_type; + long uncomp_bits; + long comp_bits; + long comp_bits_ovh; + long uncomp_length; + + if ((lzxd->block_size != block_size) || (lzxd->block_codes == NULL)) { + if (lzxd->block_codes != NULL) free(lzxd->block_codes); + lzxd->block_size = block_size; + lzxd->block_codes = malloc(block_size * sizeof(uint32_t)); + } + lzxd->subdivide = subdivide?1:0; + + lzxd->left_in_block = block_size; + lzxd->left_in_frame = LZX_FRAME_SIZE; + lzxd->main_entropy = 0.0; + lzxd->last_ratio = 9999999.0; + lzxd->block_codesp = lzxd->block_codes; + + memset(lzxd->length_freq_table, 0, NUM_SECONDARY_LENGTHS * sizeof(int)); + memset(lzxd->main_freq_table, 0, lzxd->main_tree_size * sizeof(int)); + memset(lzxd->aligned_freq_table, 0, LZX_ALIGNED_SIZE * sizeof(int)); + do { + lz_compress(lzxd->lzi, lzxd->left_in_block); + if (lzxd->left_in_frame == 0) + lzxd->left_in_frame = LZX_FRAME_SIZE; + + if ((lzxd->subdivide<0) || !lzxd->left_in_block || + (!lz_left_to_process(lzxd->lzi) && lzxd->at_eof(lzxd->in_arg))) { + /* now one block is LZ-analyzed. */ + /* time to write it out */ + uncomp_length = lzxd->block_size - lzxd->left_in_block - written_sofar; + /* uncomp_length will sometimes be 0 when input length is + an exact multiple of frame size */ + if (uncomp_length == 0) + continue; + if (lzxd->subdivide < 0) { +#ifdef DEBUG_ENTROPY + fprintf(stderr, "subdivided\n"); +#endif + lzxd->subdivide = 1; + } + + if (lzxd->need_1bit_header) { + /* one bit Intel preprocessing header */ + /* always 0 because this implementation doesn't do Intel preprocessing */ + lzx_write_bits(lzxd, 1, 0); + lzxd->need_1bit_header = 0; + } + + /* handle extra bits */ + uncomp_bits = comp_bits = 0; + build_huffman_tree(LZX_ALIGNED_SIZE, 7, lzxd->aligned_freq_table, lzxd->aligned_tree); + for (i = 0; i < LZX_ALIGNED_SIZE; i++) { + uncomp_bits += lzxd->aligned_freq_table[i]* 3; + comp_bits += lzxd->aligned_freq_table[i]* lzxd->aligned_tree[i].codelength; + } + comp_bits_ovh = comp_bits + LZX_ALIGNED_SIZE * 3; + if (comp_bits_ovh < uncomp_bits) + block_type = LZX_ALIGNED_OFFSET_BLOCK; + else + block_type = LZX_VERBATIM_BLOCK; + +#ifdef DEBUG_EXTRA_BITS + fprintf(stderr, "Extra bits uncompressed: %5d compressed: %5d compressed w/overhead %5d gain/loss %5d\n", uncomp_bits, comp_bits, comp_bits_ovh, uncomp_bits - comp_bits_ovh); +#endif + + /* block type */ + lzx_write_bits(lzxd, 3, block_type); + /* uncompressed length */ + lzx_write_bits(lzxd, 24, uncomp_length); + + written_sofar = lzxd->block_size - lzxd->left_in_block; + + /* now write out the aligned offset trees if present */ + if (block_type == LZX_ALIGNED_OFFSET_BLOCK) { + for (i = 0; i < LZX_ALIGNED_SIZE; i++) { + lzx_write_bits(lzxd, 3, lzxd->aligned_tree[i].codelength); + } + } + /* end extra bits */ + build_huffman_tree(lzxd->main_tree_size, LZX_MAX_CODE_LENGTH, + lzxd->main_freq_table, lzxd->main_tree); + build_huffman_tree(NUM_SECONDARY_LENGTHS, 16, + lzxd->length_freq_table, lzxd->length_tree); + + + + /* now write the pre-tree and tree for main 1 */ + lzx_write_compressed_tree(lzxd, lzxd->main_tree, lzxd->prev_main_treelengths, NUM_CHARS); + + /* now write the pre-tree and tree for main 2*/ + lzx_write_compressed_tree(lzxd, lzxd->main_tree + NUM_CHARS, + lzxd->prev_main_treelengths + NUM_CHARS, + lzxd->main_tree_size - NUM_CHARS); + + /* now write the pre tree and tree for length */ + lzx_write_compressed_tree(lzxd, lzxd->length_tree, lzxd->prev_length_treelengths, + NUM_SECONDARY_LENGTHS); + + /* now write literals */ + lzx_write_compressed_literals(lzxd, block_type); + + /* copy treelengths somewhere safe to do delta compression */ + for (i = 0; i < lzxd->main_tree_size; i++) { + lzxd->prev_main_treelengths[i] = lzxd->main_tree[i].codelength; + } + for (i = 0; i < NUM_SECONDARY_LENGTHS; i++) { + lzxd->prev_length_treelengths[i] = lzxd->length_tree[i].codelength; + } + lzxd->main_entropy = 0.0; + lzxd->last_ratio = 9999999.0; + lzxd->block_codesp = lzxd->block_codes; + + memset(lzxd->length_freq_table, 0, NUM_SECONDARY_LENGTHS * sizeof(int)); + memset(lzxd->main_freq_table, 0, lzxd->main_tree_size * sizeof(int)); + memset(lzxd->aligned_freq_table, 0, LZX_ALIGNED_SIZE * sizeof(int)); + } + } + while (lzxd->left_in_block && (lz_left_to_process(lzxd->lzi) || !lzxd->at_eof(lzxd->in_arg))); + return 0; +} + +int lzx_init(struct lzx_data **lzxdp, int wsize_code, + lzx_get_bytes_t get_bytes, void *get_bytes_arg, + lzx_at_eof_t at_eof, + lzx_put_bytes_t put_bytes, void *put_bytes_arg, + lzx_mark_frame_t mark_frame, void *mark_frame_arg) +{ + int wsize; + struct lzx_data *lzxd; + + if ((wsize_code < 15) || (wsize_code > 21)) { + return -1; + } + lzx_init_static(); + + *lzxdp = lzxd = malloc(sizeof(*lzxd)); + if (lzxd == 0) + return -2; + + lzxd->in_arg = get_bytes_arg; + lzxd->out_arg = put_bytes_arg; + lzxd->mark_frame_arg = mark_frame_arg; + lzxd->get_bytes = get_bytes; + lzxd->put_bytes = put_bytes; + lzxd->at_eof = at_eof; + lzxd->mark_frame = mark_frame; + + wsize = 1 << (wsize_code); + + lzxd->bits_in_buf = 0; + lzxd->block_codes = NULL; + lzxd->num_position_slots = num_position_slots[wsize_code-15]; + lzxd->main_tree_size = (NUM_CHARS + 8 * lzxd->num_position_slots); + + lzxd->main_freq_table = malloc(sizeof(int) * lzxd->main_tree_size); + lzxd->main_tree = malloc(sizeof(huff_entry)* lzxd->main_tree_size); + lzxd->prev_main_treelengths = malloc(sizeof(uint8_t)*lzxd->main_tree_size); + + lzxd->lzi = malloc(sizeof (*lzxd->lzi)); + /* the -3 prevents matches at wsize, wsize-1, wsize-2, all of which are illegal */ + lz_init(lzxd->lzi, wsize, wsize - 3, MAX_MATCH, MIN_MATCH, LZX_FRAME_SIZE, + lzx_get_chars, lzx_output_match, lzx_output_literal,lzxd); + lzxd->len_uncompressed_input = 0; + lzxd->len_compressed_output = 0; + lzx_reset(lzxd); + return 0; +} + +int lzx_finish(struct lzx_data *lzxd, struct lzx_results *lzxr) +{ + /* lzx_align_output(lzxd); Not needed as long as frame padding is in place */ + if (lzxr) { + lzxr->len_compressed_output = lzxd->len_compressed_output; + lzxr->len_uncompressed_input = lzxd->len_uncompressed_input; + } + lz_release(lzxd->lzi); + free(lzxd->lzi); + free(lzxd->prev_main_treelengths); + free(lzxd->main_tree); + free(lzxd->main_freq_table); + free(lzxd); + return 0; +} + diff --git a/reactos/tools/hhpcomp/utils.cpp b/reactos/tools/hhpcomp/utils.cpp new file mode 100644 index 00000000000..af74ba958e1 --- /dev/null +++ b/reactos/tools/hhpcomp/utils.cpp @@ -0,0 +1,53 @@ + +// This file is part of hhpcomp, a free HTML Help Project (*.hhp) compiler. +// Copyright (C) 2015 Benedikt Freisen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +#include +#include +#include + +#include +#include + +using namespace std; + +string to_upper(string s) +{ + string temp = s; + transform(temp.begin(), temp.end(), temp.begin(), ::toupper); + return temp; +} + +string realpath(const char* path) +{ + char* temp = realpath(path, NULL); + if (temp == NULL) + throw runtime_error("realpath failed"); + string result(temp); + free(temp); + return result; +} + +string replace_backslashes(string s) +{ + string temp = s; + for (string::iterator it = temp.begin(); it != temp.end(); ++it) + if (*it == '\\') + *it = '/'; + return temp; +} diff --git a/reactos/tools/hhpcomp/utils.h b/reactos/tools/hhpcomp/utils.h new file mode 100644 index 00000000000..930830c055a --- /dev/null +++ b/reactos/tools/hhpcomp/utils.h @@ -0,0 +1,24 @@ + +// This file is part of hhpcomp, a free HTML Help Project (*.hhp) compiler. +// Copyright (C) 2015 Benedikt Freisen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +string to_upper(string s); + +string realpath(const char* path); + +string replace_backslashes(string s);