[WIN-ICONV]

* Import a library that provides iconv implementation using Win32 API.
[LIBXML2][LIBXSLT][MSXML3]
* Make use of this library to support Windows-1252 encoding.
* Fixes some failed msxml:domdoc tests.
CORE-6697

svn path=/trunk/; revision=62423
This commit is contained in:
Amine Khaldi 2014-03-05 10:42:27 +00:00
parent 61d834485b
commit 47ce544479
14 changed files with 2664 additions and 3 deletions

View file

@ -37,7 +37,7 @@ list(APPEND SOURCE
add_library(libxslt SHARED ${SOURCE})
set_module_type(libxslt win32dll)
target_link_libraries(libxslt libxml2)
target_link_libraries(libxslt libxml2 iconv-static)
add_importlibs(libxslt msvcrt ws2_32 kernel32)
if(MSVC)
add_importlibs(libxslt ntdll)

View file

@ -66,7 +66,7 @@ add_library(msxml3 SHARED
add_idl_headers(xmlparser_idlheader xmlparser.idl)
set_module_type(msxml3 win32dll)
target_link_libraries(msxml3 libxml2 uuid wine)
target_link_libraries(msxml3 libxml2 iconv-static uuid wine)
add_importlibs(msxml3 urlmon ws2_32 shlwapi oleaut32 ole32 user32 msvcrt kernel32 ntdll)
add_dependencies(msxml3 xmlparser_idlheader stdole2) # msxml3_v1.tlb needs stdole2.tlb
add_pch(msxml3 precomp.h SOURCE)

View file

@ -7,6 +7,7 @@ add_subdirectory(fullfat)
add_subdirectory(libmpg123)
add_subdirectory(libsamplerate)
add_subdirectory(libwine)
add_subdirectory(libwin-iconv)
add_subdirectory(libxml2)
if(MSVC)
add_subdirectory(stlport)

View file

@ -0,0 +1,8 @@
if(MSVC)
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
endif()
add_library(iconv-static win_iconv.c)
set_target_properties(iconv-static PROPERTIES OUTPUT_NAME "iconv")
add_dependencies(iconv-static psdk)

View file

@ -0,0 +1,161 @@
2014-02-05 Yukihiro Nakadaira
* win_iconv.c: Added alias. ISO_8859-* ISO_8859_*
* win_iconv.c, win_iconv_test.c: Fixed for compiler warning.
2013-09-15 Yukihiro Nakadaira
* iconv.h: Fixed c++ style comment. (Issue 21) (Thanks to bgilbert)
2012-11-22 Yukihiro Nakadaira
* win_iconv.c: Fix warnings.
(Issue 19) (Thanks to yselkowitz)
2012-10-21 Yukihiro Nakadaira
* win_iconv.c, win_iconv_test.c: Add //ignore and -c flag.
2012-10-15 Yukihiro Nakadaira
* win_iconv.c, win_iconv_test.c: cosmetic change.
2012-09-19 Yukihiro Nakadaira
* iconv.h, win_iconv.c, win_iconv_test.c: Change iconv(3) prototype.
"const char **inbuf" -> "char **inbuf"
(Issue 8)
* win_iconv.c: Change to not use TEXT macro for GetProcAddress.
(Issue 17) (Thanks to EPienkowskia)
* win_iconv_test.c: Fix for -DUNICODE. Use GetModuleFileNameA.
2011-10-28 Yukihiro Nakadaira
* win_iconv.c: Add UCS-2.
(Issue 14) (Thanks to j.g.rennison)
2011-10-24 Yukihiro Nakadaira
* win_iconv.c: Add Big5-HKSCS alias.
(Issue 13) (Thanks to timothy.ty.lee)
2011-09-06 Yukihiro Nakadaira
* Makefile: Improvement of the creation of the DLL.
(Issue 10) (Thanks to vincent.torri)
2011-08-19 Yukihiro Nakadaira
* win_iconv.c: Fixed a bug that assumption that
sizeof(DWORD)==sizeof(void*) in find_imported_module_by_funcname.
(Issue 7) (Thanks to j.g.rennison)
2011-08-13 Yukihiro Nakadaira
* win_iconv.c, win_iconv_test.c: Fixed a bug that //translit
flag does not work when transliterating to the default
character.
(Issue 9) (Thanks to j.g.rennison)
2011-07-26 Yukihiro Nakadaira
* CMakeLists.txt: fix dll name with mingw.
(Issue 6) (Thanks to kalevlember)
2011-05-19 Yukihiro Nakadaira
* win_iconv.c: Add some more UCS aliases.
Merge from Tor Lillqvist version.
(Issue 4) (Thanks to mkbosmans)
2011-05-15 Yukihiro Nakadaira
* Makefile: use variable for tools in Makefile
(Issue 3) (Thanks to mkbosmans)
2011-01-13 Yukihiro Nakadaira
* win_iconv_test.c: Removed unused variable.
* win_iconv_test.c: Added USE_ICONV_H flag to compile with -liconv.
(Issue 2) (Thanks to amorilia.gamebox)
2010-04-14 Patrick von Reth
* added c++ support
2010-03-28 Patrick Spendrin
* CMakeLists.txt, win_iconv.c: add CMake buildsystem, fix bug from issue tracker
2009-07-25 Yukihiro Nakadaira
* win_iconv.c, readme.txt: doc fix
2009-07-06 Yukihiro Nakadaira
* win_iconv.c, Makefile, readme.txt: doc fix
2009-06-19 Yukihiro Nakadaira
* win_iconv.c: cosmetic change
* win_iconv.c: Change Unicode BOM behavior
1. Remove the BOM when "fromcode" is utf-16 or utf-32.
2. Add the BOM when "tocode" is utf-16 or utf-32.
2009-06-18 Yukihiro Nakadaira
* win_iconv.c: Fixed a bug that invalid input may cause an
endless loop
2009-06-18 Yukihiro Nakadaira
* win_iconv.c: Fixed a bug that libiconv_iconv_open() doesn't
work (Christophe Benoit)
2008-04-01 Yukihiro Nakadaira
* win_iconv.c: Added //TRANSLIT option.
http://bugzilla.gnome.org/show_bug.cgi?id=524314
2008-03-20 Yukihiro Nakadaira
* win_iconv.c: The dwFlags parameter to MultiByteToWideChars()
must be zero for some code pages (Tor Lillqvist)
2008-03-19 Yukihiro Nakadaira
* win_iconv.c: Added support for UCS-2 and GB18030 (Tor Lillqvist)
2007-12-03 Yukihiro Nakadaira
* iconv.h: #include <stddef.h> to use size_t
2007-11-28 Yukihiro Nakadaira
* win_iconv.c: bug fix for two things (Tor Lillqvist)
1) This is probably not important: Add a function
must_use_null_useddefaultchar() that checks for those
codepages for which the docs for WideCharToMultiByte() say
one has to use a NULL lpDefaultChar pointer. Don't know if
this is actually needed, but better to be safe than sorry.
2) This is essential: In kernel_wctomb(), the code should first
check if bufsize is zero, and return the E2BIG error in that
case.
2007-11-26 Yukihiro Nakadaira
* win_iconv.c: ISO-8859-1 should be CP28591, not CP1252 (Tor
Lillqvist)
2007-11-26 Yukihiro Nakadaira
* win_iconv.c: patch from Tor Lillqvist (with alteration)
2007-09-04 Yukihiro Nakadaira
* : Initial import

View file

@ -0,0 +1,23 @@
EXPORTS
iconv
iconv_open
iconv_close
iconvctl
libiconv=iconv
libiconv_open=iconv_open
libiconv_close=iconv_close
libiconvctl=iconvctl
;; libiconv-1.11.dll
;; TODO for binary compatibility
; _libiconv_version @1
; aliases2_lookup @2
; aliases_lookup @3
; iconv_canonicalize @4
; libiconv @5
; libiconv_close @6
; libiconv_open @7
; libiconv_relocate @8
; libiconv_set_relocation_prefix @9
; libiconvctl @10
; libiconvlist @11
; locale_charset @12

View file

@ -0,0 +1,14 @@
#ifndef _LIBICONV_H
#define _LIBICONV_H
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef void* iconv_t;
iconv_t iconv_open(const char *tocode, const char *fromcode);
int iconv_close(iconv_t cd);
size_t iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
#ifdef __cplusplus
}
#endif
#endif

View file

@ -0,0 +1,11 @@
LIBRARY MLANG.DLL
EXPORTS
ConvertINetMultiByteToUnicode@24
;; ConvertINetReset (not documented)
ConvertINetString@28
ConvertINetUnicodeToMultiByte@24
IsConvertINetStringAvailable@8
LcidToRfc1766A@12
LcidToRfc1766W@12
Rfc1766ToLcidA@8
Rfc1766ToLcidW@8

View file

@ -0,0 +1,54 @@
HRESULT WINAPI ConvertINetString(
LPDWORD lpdwMode,
DWORD dwSrcEncoding,
DWORD dwDstEncoding,
LPCSTR lpSrcStr,
LPINT lpnSrcSize,
LPBYTE lpDstStr,
LPINT lpnDstSize
);
HRESULT WINAPI ConvertINetMultiByteToUnicode(
LPDWORD lpdwMode,
DWORD dwSrcEncoding,
LPCSTR lpSrcStr,
LPINT lpnMultiCharCount,
LPWSTR lpDstStr,
LPINT lpnWideCharCount
);
HRESULT WINAPI ConvertINetUnicodeToMultiByte(
LPDWORD lpdwMode,
DWORD dwEncoding,
LPCWSTR lpSrcStr,
LPINT lpnWideCharCount,
LPSTR lpDstStr,
LPINT lpnMultiCharCount
);
HRESULT WINAPI IsConvertINetStringAvailable(
DWORD dwSrcEncoding,
DWORD dwDstEncoding
);
HRESULT WINAPI LcidToRfc1766A(
LCID Locale,
LPSTR pszRfc1766,
int nChar
);
HRESULT WINAPI LcidToRfc1766W(
LCID Locale,
LPWSTR pszRfc1766,
int nChar
);
HRESULT WINAPI Rfc1766ToLcidA(
LCID *pLocale,
LPSTR pszRfc1766
);
HRESULT WINAPI Rfc1766ToLcidW(
LCID *pLocale,
LPWSTR pszRfc1766
);

View file

@ -0,0 +1,20 @@
win_iconv is a iconv implementation using Win32 API to convert.
win_iconv is placed in the public domain.
ENVIRONMENT VARIABLE:
WINICONV_LIBICONV_DLL
If $WINICONV_LIBICONV_DLL is set, win_iconv uses the DLL. If
loading the DLL or iconv_open() failed, falls back to internal
conversion. If a few DLL are specified as comma separated list,
the first loadable DLL is used. The DLL should have
iconv_open(), iconv_close() and iconv(). Or libiconv_open(),
libiconv_close() and libiconv().
(only available when USE_LIBICONV_DLL is defined at compile time)
Win32 API does not support strict encoding conversion for some codepage.
And MLang function drop or replace invalid bytes and does not return
useful error status as iconv. This implementation cannot be used for
encoding validation purpose.
Yukihiro Nakadaira <yukihiro.nakadaira@gmail.com>

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,286 @@
#ifdef USE_ICONV_H
#include <iconv.h>
#include <windows.h>
#include <errno.h>
#include <string.h>
#include <stdlib.h>
#else
#include "win_iconv.c"
#endif
#include <stdio.h>
const char *
tohex(const char *str, int size)
{
static char buf[BUFSIZ];
char *pbuf = buf;
int i;
buf[0] = 0;
for (i = 0; i < size; ++i)
pbuf += sprintf(pbuf, "%02X", str[i] & 0xFF);
return buf;
}
const char *
errstr(int errcode)
{
static char buf[BUFSIZ];
switch (errcode)
{
case 0: return "NOERROR";
case EINVAL: return "EINVAL";
case EILSEQ: return "EILSEQ";
case E2BIG: return "E2BIG";
}
sprintf(buf, "%d\n", errcode);
return buf;
}
#ifdef USE_LIBICONV_DLL
int use_dll;
int
setdll(const char *dllpath)
{
char buf[BUFSIZ];
rec_iconv_t cd;
sprintf(buf, "WINICONV_LIBICONV_DLL=%s", dllpath);
putenv(buf);
if (libiconv_iconv_open(&cd, "ascii", "ascii"))
{
FreeLibrary(cd.hlibiconv);
use_dll = TRUE;
return TRUE;
}
use_dll = FALSE;
return FALSE;
}
#endif
/*
* We can test the codepage that is installed in the system.
*/
int
check_enc(const char *encname, int codepage)
{
iconv_t cd;
int cp;
cd = iconv_open("utf-8", encname);
if (cd == (iconv_t)(-1))
{
printf("%s(%d) IS NOT SUPPORTED: SKIP THE TEST\n", encname, codepage);
return FALSE;
}
#ifndef USE_ICONV_H
cp = ((rec_iconv_t *)cd)->from.codepage;
if (cp != codepage)
{
printf("%s(%d) ALIAS IS MAPPED TO DIFFERENT CODEPAGE (%d)\n", encname, codepage, cp);
exit(1);
}
#endif
iconv_close(cd);
return TRUE;
}
void
test(const char *from, const char *fromstr, int fromsize, const char *to, const char *tostr, int tosize, int errcode, int bufsize, int line)
{
char outbuf[BUFSIZ];
const char *pin;
char *pout;
size_t inbytesleft;
size_t outbytesleft;
iconv_t cd;
size_t r;
#ifdef USE_LIBICONV_DLL
char dllpath[_MAX_PATH];
#endif
cd = iconv_open(to, from);
if (cd == (iconv_t)(-1))
{
printf("%s -> %s: NG: INVALID ENCODING NAME: line=%d\n", from, to, line);
exit(1);
}
#ifdef USE_LIBICONV_DLL
if (((rec_iconv_t *)cd)->hlibiconv != NULL)
GetModuleFileNameA(((rec_iconv_t *)cd)->hlibiconv, dllpath, sizeof(dllpath));
if (use_dll && ((rec_iconv_t *)cd)->hlibiconv == NULL)
{
printf("%s: %s -> %s: NG: FAILED TO USE DLL: line=%d\n", dllpath, from, to, line);
exit(1);
}
else if (!use_dll && ((rec_iconv_t *)cd)->hlibiconv != NULL)
{
printf("%s: %s -> %s: NG: DLL IS LOADED UNEXPECTEDLY: line=%d\n", dllpath, from, to, line);
exit(1);
}
#endif
errno = 0;
pin = (char *)fromstr;
pout = outbuf;
inbytesleft = fromsize;
outbytesleft = bufsize;
r = iconv(cd, &pin, &inbytesleft, &pout, &outbytesleft);
if (r != (size_t)(-1))
r = iconv(cd, NULL, NULL, &pout, &outbytesleft);
*pout = 0;
#ifdef USE_LIBICONV_DLL
if (use_dll)
printf("%s: ", dllpath);
#endif
printf("%s(%s) -> ", from, tohex(fromstr, fromsize));
printf("%s(%s%s%s): ", to, tohex(tostr, tosize),
errcode == 0 ? "" : ":",
errcode == 0 ? "" : errstr(errcode));
if (strcmp(outbuf, tostr) == 0 && errno == errcode)
printf("OK\n");
else
{
printf("RESULT(%s:%s): ", tohex(outbuf, sizeof(outbuf) - outbytesleft),
errstr(errno));
printf("NG: line=%d\n", line);
exit(1);
}
}
#define STATIC_STRLEN(arr) (sizeof(arr) - 1)
#define success(from, fromstr, to, tostr) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), 0, BUFSIZ, __LINE__)
#define einval(from, fromstr, to, tostr) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), EINVAL, BUFSIZ, __LINE__)
#define eilseq(from, fromstr, to, tostr) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), EILSEQ, BUFSIZ, __LINE__)
#define e2big(from, fromstr, to, tostr, bufsize) test(from, fromstr, STATIC_STRLEN(fromstr), to, tostr, STATIC_STRLEN(tostr), E2BIG, bufsize, __LINE__)
int
main(int argc, char **argv)
{
#ifdef USE_LIBICONV_DLL
/* test use of dll if $DEFAULT_LIBICONV_DLL was defined. */
if (setdll(""))
{
success("ascii", "ABC", "ascii", "ABC");
success("ascii", "ABC", "utf-16be", "\x00\x41\x00\x42\x00\x43");
}
else
{
printf("\nDLL TEST IS SKIPPED\n\n");
}
setdll("none");
#endif
if (check_enc("ascii", 20127))
{
success("ascii", "ABC", "ascii", "ABC");
/* MSB is dropped. Hmm... */
success("ascii", "\x80\xFF", "ascii", "\x00\x7F");
}
/* unicode (CP1200 CP1201 CP12000 CP12001 CP65001) */
if (check_enc("utf-8", 65001)
&& check_enc("utf-16be", 1201) && check_enc("utf-16le", 1200)
&& check_enc("utf-32be", 12001) && check_enc("utf-32le", 12000)
)
{
/* Test the BOM behavior
* 1. Remove the BOM when "fromcode" is utf-16 or utf-32.
* 2. Add the BOM when "tocode" is utf-16 or utf-32. */
success("utf-16", "\xFE\xFF\x01\x02", "utf-16be", "\x01\x02");
success("utf-16", "\xFF\xFE\x02\x01", "utf-16be", "\x01\x02");
success("utf-32", "\x00\x00\xFE\xFF\x00\x00\x01\x02", "utf-32be", "\x00\x00\x01\x02");
success("utf-32", "\xFF\xFE\x00\x00\x02\x01\x00\x00", "utf-32be", "\x00\x00\x01\x02");
success("utf-16", "\xFE\xFF\x00\x01", "utf-8", "\x01");
#ifndef GLIB_COMPILATION
success("utf-8", "\x01", "utf-16", "\xFE\xFF\x00\x01");
success("utf-8", "\x01", "utf-32", "\x00\x00\xFE\xFF\x00\x00\x00\x01");
#else
success("utf-8", "\x01", "utf-16", "\xFF\xFE\x01\x00");
success("utf-8", "\x01", "utf-32", "\xFF\xFE\x00\x00\x01\x00\x00\x00");
#endif
success("utf-16be", "\xFE\xFF\x01\x02", "utf-16be", "\xFE\xFF\x01\x02");
success("utf-16le", "\xFF\xFE\x02\x01", "utf-16be", "\xFE\xFF\x01\x02");
success("utf-32be", "\x00\x00\xFE\xFF\x00\x00\x01\x02", "utf-32be", "\x00\x00\xFE\xFF\x00\x00\x01\x02");
success("utf-32le", "\xFF\xFE\x00\x00\x02\x01\x00\x00", "utf-32be", "\x00\x00\xFE\xFF\x00\x00\x01\x02");
success("utf-16be", "\xFE\xFF\x00\x01", "utf-8", "\xEF\xBB\xBF\x01");
success("utf-8", "\xEF\xBB\xBF\x01", "utf-8", "\xEF\xBB\xBF\x01");
success("utf-16be", "\x01\x02", "utf-16le", "\x02\x01");
success("utf-16le", "\x02\x01", "utf-16be", "\x01\x02");
success("utf-16be", "\xFE\xFF", "utf-16le", "\xFF\xFE");
success("utf-16le", "\xFF\xFE", "utf-16be", "\xFE\xFF");
success("utf-32be", "\x00\x00\x03\x04", "utf-32le", "\x04\x03\x00\x00");
success("utf-32le", "\x04\x03\x00\x00", "utf-32be", "\x00\x00\x03\x04");
success("utf-32be", "\x00\x00\xFF\xFF", "utf-16be", "\xFF\xFF");
success("utf-16be", "\xFF\xFF", "utf-32be", "\x00\x00\xFF\xFF");
success("utf-32be", "\x00\x01\x00\x00", "utf-16be", "\xD8\x00\xDC\x00");
success("utf-16be", "\xD8\x00\xDC\x00", "utf-32be", "\x00\x01\x00\x00");
success("utf-32be", "\x00\x10\xFF\xFF", "utf-16be", "\xDB\xFF\xDF\xFF");
success("utf-16be", "\xDB\xFF\xDF\xFF", "utf-32be", "\x00\x10\xFF\xFF");
eilseq("utf-32be", "\x00\x11\x00\x00", "utf-16be", "");
eilseq("utf-16be", "\xDB\xFF\xE0\x00", "utf-32be", "");
success("utf-8", "\xE3\x81\x82", "utf-16be", "\x30\x42");
einval("utf-8", "\xE3", "utf-16be", "");
}
/* Japanese (CP932 CP20932 CP50220 CP50221 CP50222 CP51932) */
if (check_enc("cp932", 932)
&& check_enc("cp20932", 20932) && check_enc("euc-jp", 51932)
&& check_enc("cp50220", 50220) && check_enc("cp50221", 50221)
&& check_enc("cp50222", 50222) && check_enc("iso-2022-jp", 50221))
{
/* Test the compatibility for each other Japanese codepage.
* And validate the escape sequence handling for iso-2022-jp. */
success("utf-16be", "\xFF\x5E", "cp932", "\x81\x60");
success("utf-16be", "\x30\x1C", "cp932", "\x81\x60");
success("utf-16be", "\xFF\x5E", "cp932//nocompat", "\x81\x60");
eilseq("utf-16be", "\x30\x1C", "cp932//nocompat", "");
success("euc-jp", "\xA4\xA2", "utf-16be", "\x30\x42");
einval("euc-jp", "\xA4\xA2\xA4", "utf-16be", "\x30\x42");
eilseq("euc-jp", "\xA4\xA2\xFF\xFF", "utf-16be", "\x30\x42");
success("cp932", "\x81\x60", "iso-2022-jp", "\x1B\x24\x42\x21\x41\x1B\x28\x42");
success("UTF-16BE", "\xFF\x5E", "iso-2022-jp", "\x1B\x24\x42\x21\x41\x1B\x28\x42");
eilseq("UTF-16BE", "\x30\x1C", "iso-2022-jp//nocompat", "");
success("UTF-16BE", "\x30\x42\x30\x44", "iso-2022-jp", "\x1B\x24\x42\x24\x22\x24\x24\x1B\x28\x42");
success("iso-2022-jp", "\x1B\x24\x42\x21\x41\x1B\x28\x42", "UTF-16BE", "\xFF\x5E");
}
/*
* test for //translit
* U+FF41 (FULLWIDTH LATIN SMALL LETTER A) <-> U+0062 (LATIN SMALL LETTER A)
*/
eilseq("UTF-16BE", "\xFF\x41", "iso-8859-1", "");
success("UTF-16BE", "\xFF\x41", "iso-8859-1//translit", "a");
/*
* test for //translit
* Some character, not in "to" encoding -> DEFAULT CHARACTER (maybe "?")
*/
eilseq("UTF-16BE", "\x30\x42", "ascii", "");
success("UTF-16BE", "\x30\x42", "ascii//translit", "?");
/*
* test for //ignore
*/
eilseq("UTF-8", "\xFF A \xFF B", "ascii//ignore", " A B");
eilseq("UTF-8", "\xEF\xBC\xA1 A \xEF\xBC\xA2 B", "ascii//ignore", " A B");
eilseq("UTF-8", "\xEF\x01 A \xEF\x02 B", "ascii//ignore", "\x01 A \x02 B");
/*
* TODO:
* Test for state after iconv() failed.
* Ensure iconv() error is safe and continuable.
*/
return 0;
}

View file

@ -1,5 +1,7 @@
include_directories(include)
include_directories(
include
${REACTOS_SOURCE_DIR}/lib/3rdparty/libwin-iconv)
add_definitions(
-D__MINGW32__
@ -10,6 +12,8 @@ add_definitions(
-D_WINDOWS
-DWIN32
-DHAVE_CONFIG_H
-DLIBXML_ICONV_ENABLED
-DICONV_CONST=const
-D_DLL -D__USE_CRTIMP)
list(APPEND SOURCE

View file

@ -61,3 +61,7 @@ Website: http://www.mpg123.de/
Title: STLport
Used Version: 5.2.1
Website: http://stlport.sourceforge.net/
Title: win-iconv
Used Version: r44
Website: https://code.google.com/p/win-iconv/