- Remove 2 wrong versions of wctomb and 2 wrong versions of wcstombs

- Implement new versions of wctomb and wcstombs, which consider the language set by setlocale() and work according to all behaviours I could find when testing under WinXP SP2.
  This was tested with an own test suite (which I can commit as well if anyone is interested)
- Do a real conversion to MultiByte characters using wctomb in fputwc and vfwprintf. (verified under WinXP SP2)
- Set 'MSVCRT___lc_codepage' and 'MSVCRT___lc_collate_cp' to 1252 by default ("C" locale) and not the current active codepage (which might not work with i.e. Eastern codepages)
- Add a new check for 'MultiByteCount < 0' to WideCharToMultiByte (also verified under WinXP SP2)
- Change MB_LEN_MAX back to 2, the value 5 only applies to newer CRT's (msvcrt only handles single-byte and double-byte characters)
- Don't compile the Wine-imported 'wcscpy_s', it isn't available in msvcrt

svn path=/trunk/; revision=34557
This commit is contained in:
Colin Finck 2008-07-16 21:40:09 +00:00
parent d37dc48ec3
commit 248d39652c
11 changed files with 163 additions and 399 deletions

View file

@ -1295,7 +1295,8 @@ WideCharToMultiByte(UINT CodePage, DWORD Flags,
/* Check the parameters. */
if (WideCharString == NULL ||
(MultiByteString == NULL && MultiByteCount > 0) ||
(PVOID)WideCharString == (PVOID)MultiByteString)
(PVOID)WideCharString == (PVOID)MultiByteString ||
MultiByteCount < 0)
{
SetLastError(ERROR_INVALID_PARAMETER);
return 0;

View file

@ -34,7 +34,7 @@
* Characteristics of the char data type.
*/
#define CHAR_BIT 8
#define MB_LEN_MAX 5
#define MB_LEN_MAX 2
#define SCHAR_MIN (-128)
#define SCHAR_MAX 127

View file

@ -355,9 +355,7 @@
<file>strxfrm.c</file>
<file>wcs.c</file>
<file>wcstol.c</file>
<file>wcstombs.c</file>
<file>wcstoul.c</file>
<file>wctomb.c</file>
<file>wsplitp.c</file>
<file>wtoi.c</file>
<file>wtoi64.c</file>

View file

@ -37,7 +37,10 @@
#define LT (_MLEAD | _MTRAIL)
#define PT (_MPUNCT | _MTRAIL)
#define MAX_LOCALE_LENGTH 256
extern unsigned char _mbctype[257];
extern int MSVCRT___lc_codepage;
extern char MSVCRT_current_lc_all[MAX_LOCALE_LENGTH];
#if defined (_MSC_VER)

View file

@ -26,7 +26,6 @@
* string to produce lc_all.
*/
#define MAX_ELEM_LEN 64 /* Max length of country/language/CP string */
#define MAX_LOCALE_LENGTH 256
unsigned char MSVCRT_mbctype[257];
static int g_mbcp_is_multibyte = 0;
@ -388,8 +387,8 @@ char *setlocale(int category, const char *locale)
{
MSVCRT_current_lc_all[0] = 'C';
MSVCRT_current_lc_all[1] = '\0';
MSVCRT___lc_codepage = GetACP();
MSVCRT___lc_collate_cp = GetACP();
MSVCRT___lc_codepage = 1252;
MSVCRT___lc_collate_cp = 1252;
switch (category) {
case MSVCRT_LC_ALL:

View file

@ -2475,23 +2475,28 @@ size_t CDECL fwrite(const void *ptr, size_t size, size_t nmemb, FILE* file)
*/
wint_t CDECL fputwc(wint_t wc, FILE* file)
{
if (file->_flag & _IOBINARY)
{
wchar_t mwc = wc;
if (file->_flag & _IOBINARY)
{
if (fwrite(&wc, sizeof(wc), 1, file) != 1)
return WEOF;
}
else
{
/* Convert to multibyte in text mode */
char mbc[MB_LEN_MAX];
int mb_return;
if (fwrite( &mwc, sizeof(mwc), 1, file) != 1)
return WEOF;
}
else
{
/* Convert the character to ANSI */
char c = (unsigned char)wc;
mb_return = wctomb(mbc, wc);
if (fwrite( &c, sizeof(c), 1, file) != 1)
return WEOF;
}
if(mb_return == -1)
return WEOF;
return wc;
/* Output all characters */
if (fwrite(mbc, mb_return, 1, file) != 1)
return WEOF;
}
return wc;
}
/*********************************************************************
@ -3121,7 +3126,6 @@ int CDECL vfprintf(FILE* file, const char *format, va_list valist)
int CDECL vfwprintf(FILE* file, const wchar_t *format, va_list valist)
{
wchar_t buf[2048], *mem = buf;
char mbbuf[2048], *mbmem = mbbuf;
int written, resize = sizeof(buf) / sizeof(wchar_t), retval;
/* See vfprintf comments */
while ((written = _vsnwprintf(mem, resize, format, valist)) == -1 ||
@ -3137,17 +3141,22 @@ int CDECL vfwprintf(FILE* file, const wchar_t *format, va_list valist)
/* Check if outputting to a text-file */
if (fdesc[file->_file].wxflag & WX_TEXT)
{
/* Convert to multibyte then */
written = wcstombs(NULL, mem, 0);
/* Convert each character and stop at the first invalid character. Behavior verified by tests under WinXP SP2 */
char chMultiByte[MB_LEN_MAX];
int nReturn;
if (written >= sizeof(mbbuf) && (written != (int)-1))
mbmem = malloc(written + 1);
retval = 0;
wcstombs(mbmem, mem, written);
retval = fwrite(mbmem, 1, written, file);
while(*mem)
{
nReturn = wctomb(chMultiByte, *mem);
if (mbmem != mbbuf)
free(mbmem);
if(nReturn == -1)
break;
retval += fwrite(chMultiByte, 1, nReturn, file);
mem++;
}
}
else
{

View file

@ -40,12 +40,6 @@
//WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);
// HACK for LIBCNT
#ifndef debugstr_w
#define debugstr_w
#endif
#undef sprintf
#undef wsprintf
#undef snprintf
@ -77,6 +71,7 @@ INT CDECL _wcsicoll( const wchar_t* str1, const wchar_t* str2 )
return strcmpiW( str1, str2 );
}
#endif
/*********************************************************************
* _wcsnset (MSVCRT.@)
*/
@ -102,6 +97,7 @@ wchar_t* CDECL _wcsrev( wchar_t* str )
}
return ret;
}
#ifndef _LIBCNT_
/*********************************************************************
* _wcsset (MSVCRT.@)
@ -854,6 +850,7 @@ int CDECL vswprintf( wchar_t* str, const wchar_t* format, va_list args )
return _vsnwprintf( str, INT_MAX, format, args );
}
#endif
/*********************************************************************
* wcscoll (MSVCRT.@)
*/
@ -876,6 +873,7 @@ wchar_t* CDECL wcspbrk( const wchar_t* str, const wchar_t* accept )
}
return NULL;
}
#ifndef _LIBCNT_
/*********************************************************************
* wcstok (MSVCRT.@)
@ -896,16 +894,130 @@ wchar_t * CDECL wcstok( wchar_t *str, const wchar_t *delim )
data->wcstok_next = str;
return ret;
}
#endif
#ifndef __REACTOS__
/*********************************************************************
* wctomb (MSVCRT.@)
*/
INT CDECL wctomb( char *dst, wchar_t ch )
INT CDECL wctomb(char *mbchar, wchar_t wchar)
{
return WideCharToMultiByte( CP_ACP, 0, &ch, 1, dst, 6, NULL, NULL );
BOOL bUsedDefaultChar;
char chMultiByte[MB_LEN_MAX];
int nBytes;
/* At least one parameter needs to be given, the length of a null character cannot be queried (verified by tests under WinXP SP2) */
if(!mbchar && !wchar)
return 0;
/* Use WideCharToMultiByte for doing the conversion using the codepage currently set with setlocale() */
nBytes = WideCharToMultiByte(MSVCRT___lc_codepage, 0, &wchar, 1, chMultiByte, MB_LEN_MAX, NULL, &bUsedDefaultChar);
/* Only copy the character if an 'mbchar' pointer was given.
The "C" locale is emulated with codepage 1252 here. This codepage has a default character "?", but the "C" locale doesn't have one.
Therefore don't copy the character in this case. */
if(mbchar && !(MSVCRT_current_lc_all[0] == 'C' && !MSVCRT_current_lc_all[1] && bUsedDefaultChar))
memcpy(mbchar, chMultiByte, nBytes);
/* If the default character was used, set errno to EILSEQ and return -1. */
if(bUsedDefaultChar)
{
__set_errno(EILSEQ);
return -1;
}
/* Otherwise return the number of bytes this character occupies. */
return nBytes;
}
size_t CDECL wcstombs(char *mbstr, const wchar_t *wcstr, size_t count)
{
BOOL bUsedDefaultChar;
char* p = mbstr;
int nResult;
/* Does the caller query for output buffer size? */
if(!mbstr)
{
int nLength;
/* If we currently use the "C" locale, the length of the input string is returned (verified by tests under WinXP SP2) */
if(MSVCRT_current_lc_all[0] == 'C' && !MSVCRT_current_lc_all[1])
return wcslen(wcstr);
/* Otherwise check the length each character needs and build a final return value out of this */
count = wcslen(wcstr);
nLength = 0;
while((int)(--count) >= 0 && *wcstr)
{
/* Get the length of this character */
nResult = wctomb(NULL, *wcstr++);
/* If this character is not convertible in the current locale, the end result will be -1 */
if(nResult == -1)
return -1;
nLength += nResult;
}
/* Return the final length */
return nLength;
}
/* Convert the string then */
bUsedDefaultChar = FALSE;
for(;;)
{
char chMultiByte[MB_LEN_MAX];
UINT uLength;
/* Are we at the terminating null character? */
if(!*wcstr)
{
/* Set the null character, but don't increment the pointer as the returned length never includes the terminating null character */
*p = 0;
break;
}
/* Convert this character into the temporary chMultiByte variable */
ZeroMemory(chMultiByte, MB_LEN_MAX);
nResult = wctomb(chMultiByte, *wcstr++);
/* Check if this was an invalid character */
if(nResult == -1)
bUsedDefaultChar = TRUE;
/* If we got no character, stop the conversion process here */
if(!chMultiByte[0])
break;
/* Determine whether this is a double-byte or a single-byte character */
if(chMultiByte[1])
uLength = 2;
else
uLength = 1;
/* Decrease 'count' by the character length and check if the buffer can still hold the full character */
count -= uLength;
if((int)count < 0)
break;
/* It can, so copy it and move the pointer forward */
memcpy(p, chMultiByte, uLength);
p += uLength;
}
if(bUsedDefaultChar)
return -1;
/* Return the length in bytes of the copied characters (without the terminating null character) */
return p - mbstr;
}
#endif
#ifndef __REACTOS__
/*********************************************************************
* iswalnum (MSVCRT.@)
*/
@ -993,8 +1105,7 @@ INT CDECL iswxdigit( wchar_t wc )
{
return isxdigitW( wc );
}
#endif
#ifndef _LIBCNT_
/*********************************************************************
* wcscpy_s (MSVCRT.@)
*/

View file

@ -1,39 +0,0 @@
/*
* COPYRIGHT: See COPYING in the top level directory
* PROJECT: ReactOS system libraries
* FILE: lib/msvcrt/mbstring/wcstom.c
* PURPOSE:
* PROGRAMER:
* UPDATE HISTORY:
* 05/30/08: Samuel Serapion adapted from PROJECT C Library
*
*/
#include <precomp.h>
#include <mbctype.h>
/*
* @implemented
*/
size_t wcstombs (char *string, const wchar_t *widechar, size_t count)
{
int n, bytes;
int cnt = 0;
for (n = 0; n < count; n++) {
if ((bytes = wctomb (string, *widechar)) < 0)
return -1;
if (*string == 0)
return cnt;
widechar++;
string += bytes;
cnt += bytes;
}
return cnt;
}

View file

@ -1,114 +0,0 @@
/* Copyright (C) 1991, 1992, 1995, 1996, 1997 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <precomp.h>
#include <wchar.h>
#ifndef EILSEQ
#define EILSEQ EINVAL
#endif
static const wchar_t encoding_mask[] =
{
(wchar_t)~0x7ff, (wchar_t)~0xffff, (wchar_t)~0x1fffff, (wchar_t)~0x3ffffff
};
static const unsigned char encoding_byte[] =
{
0xc0, 0xe0, 0xf0, 0xf8, 0xfc
};
/* The state is for this UTF8 encoding not used. */
//static mbstate_t internal;
//extern mbstate_t __no_r_state; /* Defined in mbtowc.c. */
size_t
__wcrtomb (char *s, wchar_t wc);
/*
* Convert WCHAR into its multibyte character representation,
* putting this in S and returning its length.
*
* Attention: this function should NEVER be intentionally used.
* The interface is completely stupid. The state is shared between
* all conversion functions. You should use instead the restartable
* version `wcrtomb'.
*
* @implemented
*/
int
wctomb (char *s, wchar_t wchar)
{
/* If S is NULL the function has to return null or not null
depending on the encoding having a state depending encoding or
not. This is nonsense because any multibyte encoding has a
state. The ISO C amendment 1 corrects this while introducing the
restartable functions. We simply say here all encodings have a
state. */
if (s == NULL)
return 1;
return __wcrtomb (s, wchar);
}
size_t
__wcrtomb (char *s, wchar_t wc)
{
char fake[1];
size_t written = 0;
if (s == NULL)
{
s = fake;
wc = L'\0';
}
if (wc < 0x80)
{
/* It's a one byte sequence. */
if (s != NULL)
*s = (char) wc;
return 1;
}
for (written = 2; written < 6; ++written)
if ((wc & encoding_mask[written - 2]) == 0)
break;
if (s != NULL)
{
size_t cnt = written;
s[0] = encoding_byte[cnt - 2];
--cnt;
do
{
s[cnt] = 0x80 | (wc & 0x3f);
wc >>= 6;
}
while (--cnt > 0);
s[0] |= wc;
}
return written;
}

View file

@ -1,157 +0,0 @@
/* Copyright (C) 1991, 1992, 1995, 1996, 1997 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <precomp.h>
#include <wchar.h>
#ifndef EILSEQ
#define EILSEQ EINVAL
#endif
static const wchar_t encoding_mask[] =
{
(~0x7ff&WCHAR_MAX), (~0xffff&WCHAR_MAX), (~0x1fffff&WCHAR_MAX), (~0x3ffffff&WCHAR_MAX)
};
static const unsigned char encoding_byte[] =
{
0xc0, 0xe0, 0xf0, 0xf8, 0xfc
};
/* We don't need the state really because we don't have shift states
to maintain between calls to this function. */
static mbstate_t mbstate_internal;
mbstate_t __no_r_state; /* Now defined in wcstombs.c. */
//extern mbstate_t __no_r_state; /* Defined in mbtowc.c. */
size_t
__wcsrtombs (char *dst, const wchar_t **src, size_t len, mbstate_t *ps);
/*
* Convert the `wchar_t' string in PWCS to a multibyte character string
* in S, writing no more than N characters. Return the number of bytes
* written, or (size_t) -1 if an invalid `wchar_t' was found.
*
* Attention: this function should NEVER be intentionally used.
* The interface is completely stupid. The state is shared between
* all conversion functions. You should use instead the restartable
* version `wcsrtombs'.
*
* @implemented
*/
size_t
wcstombs (char *s, const wchar_t *pwcs, size_t n)
{
mbstate_t save_shift = __no_r_state;
size_t written;
written = __wcsrtombs (s, &pwcs, n, &__no_r_state);
/* Restore the old shift state. */
__no_r_state = save_shift;
/* Return how many we wrote (or maybe an error). */
return written;
}
size_t
__wcsrtombs (char *dst, const wchar_t **src, size_t len, mbstate_t *ps)
{
size_t written = 0;
const wchar_t *run = *src;
if (ps == NULL)
ps = &mbstate_internal;
if (dst == NULL)
/* The LEN parameter has to be ignored if we don't actually write
anything. */
len = ~0;
while (written < len)
{
wchar_t wc = *run++;
#if 0
if (wc < 0 || wc > WCHAR_MAX)
{
/* This is no correct ISO 10646 character. */
__set_errno (EILSEQ);
return (size_t) -1;
}
#endif
if (wc == L'\0')
{
/* Found the end. */
if (dst != NULL)
*dst = '\0';
*src = NULL;
return written;
}
else if (wc < 0x80)
{
/* It's an one byte sequence. */
if (dst != NULL)
*dst++ = (char) wc;
++written;
}
else
{
size_t step;
for (step = 2; step < 6; ++step)
if ((wc & encoding_mask[step - 2]) == 0)
break;
if (written + step >= len)
/* Too long. */
break;
if (dst != NULL)
{
size_t cnt = step;
dst[0] = encoding_byte[cnt - 2];
--cnt;
do
{
dst[cnt] = 0x80 | (wc & 0x3f);
wc >>= 6;
}
while (--cnt > 0);
dst[0] |= wc;
dst += step;
}
written += step;
}
}
/* Store position of first unprocessed word. */
*src = run;
return written;
}
//weak_alias (__wcsrtombs, wcsrtombs)

View file

@ -1,47 +0,0 @@
/*
* COPYRIGHT: See COPYING in the top level directory
* PROJECT: ReactOS system libraries
* FILE: lib/sdk/crt/mbstring/wctomb.c
* PURPOSE:
* PROGRAMER:
* UPDATE HISTORY:
* 05/30/08: Samuel Serapion adapted from PROJECT C Library
*
*/
#include <precomp.h>
#include <mbstring.h>
/*
* @implemented
*/
int wctomb (char *string, wchar_t widechar)
{
int c1, c2;
if (string == 0)
return 0;
if (widechar & 0xff00) {
c1 = (widechar >> 8) & 0xff;
c2 = (widechar & 0xff);
if (_ismbblead (c1) == 0 || _ismbbtrail (c2) == 0)
return -1;
*string++ = (char) c1;
*string = (char) c2;
return 2;
}
else {
*string = (char) widechar & 0xff;
return 1;
}
}