- Remove 2 wrong versions of wctomb and 2 wrong versions of wcstombs

- Implement new versions of wctomb and wcstombs, which consider the language set by setlocale() and work according to all behaviours I could find when testing under WinXP SP2. This was tested with an own test suite (which I can commit as well if anyone is interested) - Do a real conversion to MultiByte characters using wctomb in fputwc and vfwprintf. (verified under WinXP SP2) - Set 'MSVCRT___lc_codepage' and 'MSVCRT___lc_collate_cp' to 1252 by default ("C" locale) and not the current active codepage (which might not work with i.e. Eastern codepages) - Add a new check for 'MultiByteCount < 0' to WideCharToMultiByte (also verified under WinXP SP2) - Change MB_LEN_MAX back to 2, the value 5 only applies to newer CRT's (msvcrt only handles single-byte and double-byte characters) - Don't compile the Wine-imported 'wcscpy_s', it isn't available in msvcrt svn path=/trunk/; revision=34557
2024-07-02 10:45:24 +00:00 · 2008-07-16 21:40:09 +00:00 · 2008-07-16 21:40:09 +00:00 · 248d39652c
parent d37dc48ec3
commit 248d39652c
11 changed files with 163 additions and 399 deletions
--- a/reactos/dll/win32/kernel32/misc/nls.c
+++ b/reactos/dll/win32/kernel32/misc/nls.c
@ -1295,7 +1295,8 @@ WideCharToMultiByte(UINT CodePage, DWORD Flags,
   /* Check the parameters. */
   if (WideCharString == NULL ||
       (MultiByteString == NULL && MultiByteCount > 0) ||
-       (PVOID)WideCharString == (PVOID)MultiByteString)
+       (PVOID)WideCharString == (PVOID)MultiByteString ||
+       MultiByteCount < 0)
   {
      SetLastError(ERROR_INVALID_PARAMETER);
      return 0;
--- a/reactos/include/crt/limits.h
+++ b/reactos/include/crt/limits.h
@ -34,7 +34,7 @@
 * Characteristics of the char data type.
 */
 #define CHAR_BIT	8
-#define MB_LEN_MAX	5
+#define MB_LEN_MAX	2

 #define SCHAR_MIN	(-128)
 #define SCHAR_MAX	127
--- a/reactos/lib/sdk/crt/crt.rbuild
+++ b/reactos/lib/sdk/crt/crt.rbuild
@ -355,9 +355,7 @@
 		<file>strxfrm.c</file>
 		<file>wcs.c</file>
 		<file>wcstol.c</file>
-		<file>wcstombs.c</file>
 		<file>wcstoul.c</file>
-		<file>wctomb.c</file>
 		<file>wsplitp.c</file>
 		<file>wtoi.c</file>
 		<file>wtoi64.c</file>
--- a/reactos/lib/sdk/crt/include/internal/mbstring.h
+++ b/reactos/lib/sdk/crt/include/internal/mbstring.h
@ -37,7 +37,10 @@
 #define LT (_MLEAD  | _MTRAIL)
 #define PT (_MPUNCT | _MTRAIL)

+#define MAX_LOCALE_LENGTH 256
 extern unsigned char _mbctype[257];
+extern int MSVCRT___lc_codepage;
+extern char MSVCRT_current_lc_all[MAX_LOCALE_LENGTH];

 #if defined (_MSC_VER)

--- a/reactos/lib/sdk/crt/locale/locale.c
+++ b/reactos/lib/sdk/crt/locale/locale.c
@ -26,7 +26,6 @@
 * string to produce lc_all.
 */
 #define MAX_ELEM_LEN 64 /* Max length of country/language/CP string */
-#define MAX_LOCALE_LENGTH 256

 unsigned char MSVCRT_mbctype[257];
 static int g_mbcp_is_multibyte = 0;
@ -388,8 +387,8 @@ char *setlocale(int category, const char *locale)
  {
    MSVCRT_current_lc_all[0] = 'C';
    MSVCRT_current_lc_all[1] = '\0';
-    MSVCRT___lc_codepage = GetACP();
-    MSVCRT___lc_collate_cp = GetACP();
+    MSVCRT___lc_codepage = 1252;
+    MSVCRT___lc_collate_cp = 1252;

    switch (category) {
    case MSVCRT_LC_ALL:
--- a/reactos/lib/sdk/crt/stdio/file.c
+++ b/reactos/lib/sdk/crt/stdio/file.c
@ -2475,23 +2475,28 @@ size_t CDECL fwrite(const void *ptr, size_t size, size_t nmemb, FILE* file)
 */
 wint_t CDECL fputwc(wint_t wc, FILE* file)
 {
-  if (file->_flag & _IOBINARY)
-  {
-    wchar_t mwc = wc;
+    if (file->_flag & _IOBINARY)
+    {
+        if (fwrite(&wc, sizeof(wc), 1, file) != 1)
+            return WEOF;
+    }
+    else
+    {
+        /* Convert to multibyte in text mode */
+        char mbc[MB_LEN_MAX];
+        int mb_return;

-    if (fwrite( &mwc, sizeof(mwc), 1, file) != 1)
-      return WEOF;
-  }
-  else
-  {
-    /* Convert the character to ANSI */
-    char c = (unsigned char)wc;
+        mb_return = wctomb(mbc, wc);

-    if (fwrite( &c, sizeof(c), 1, file) != 1)
-      return WEOF;
-  }
+        if(mb_return == -1)
+            return WEOF;

-  return wc;
+        /* Output all characters */
+        if (fwrite(mbc, mb_return, 1, file) != 1)
+            return WEOF;
+    }
+
+    return wc;
 }

 /*********************************************************************
@ -3121,7 +3126,6 @@ int CDECL vfprintf(FILE* file, const char *format, va_list valist)
 int CDECL vfwprintf(FILE* file, const wchar_t *format, va_list valist)
 {
  wchar_t buf[2048], *mem = buf;
-  char mbbuf[2048], *mbmem = mbbuf;
  int written, resize = sizeof(buf) / sizeof(wchar_t), retval;
  /* See vfprintf comments */
  while ((written = _vsnwprintf(mem, resize, format, valist)) == -1 ||
@ -3137,17 +3141,22 @@ int CDECL vfwprintf(FILE* file, const wchar_t *format, va_list valist)
  /* Check if outputting to a text-file */
  if (fdesc[file->_file].wxflag & WX_TEXT)
  {
-      /* Convert to multibyte then */
-      written = wcstombs(NULL, mem, 0);
+      /* Convert each character and stop at the first invalid character. Behavior verified by tests under WinXP SP2 */
+      char chMultiByte[MB_LEN_MAX];
+      int nReturn;

-      if (written >= sizeof(mbbuf) && (written != (int)-1))
-          mbmem = malloc(written + 1);
+      retval = 0;

-      wcstombs(mbmem, mem, written);
-      retval = fwrite(mbmem, 1, written, file);
+      while(*mem)
+      {
+          nReturn = wctomb(chMultiByte, *mem);

-      if (mbmem != mbbuf)
-        free(mbmem);
+          if(nReturn == -1)
+              break;
+
+          retval += fwrite(chMultiByte, 1, nReturn, file);
+          mem++;
+      }
  }
  else
  {
--- a/reactos/lib/sdk/crt/string/wcs.c
+++ b/reactos/lib/sdk/crt/string/wcs.c
@ -40,12 +40,6 @@

 //WINE_DEFAULT_DEBUG_CHANNEL(msvcrt);

-// HACK for LIBCNT
-#ifndef debugstr_w
-#define debugstr_w
-#endif
-
-
 #undef sprintf
 #undef wsprintf
 #undef snprintf
@ -77,6 +71,7 @@ INT CDECL _wcsicoll( const wchar_t* str1, const wchar_t* str2 )
  return strcmpiW( str1, str2 );
 }
 #endif
+
 /*********************************************************************
 *		_wcsnset (MSVCRT.@)
 */
@ -102,6 +97,7 @@ wchar_t* CDECL _wcsrev( wchar_t* str )
  }
  return ret;
 }
+
 #ifndef _LIBCNT_
 /*********************************************************************
 *		_wcsset (MSVCRT.@)
@ -854,6 +850,7 @@ int CDECL vswprintf( wchar_t* str, const wchar_t* format, va_list args )
    return _vsnwprintf( str, INT_MAX, format, args );
 }
 #endif
+
 /*********************************************************************
 *		wcscoll (MSVCRT.@)
 */
@ -876,6 +873,7 @@ wchar_t* CDECL wcspbrk( const wchar_t* str, const wchar_t* accept )
  }
  return NULL;
 }
+
 #ifndef _LIBCNT_
 /*********************************************************************
 *		wcstok  (MSVCRT.@)
@ -896,16 +894,130 @@ wchar_t * CDECL wcstok( wchar_t *str, const wchar_t *delim )
    data->wcstok_next = str;
    return ret;
 }
-#endif
-#ifndef __REACTOS__
+
 /*********************************************************************
 *		wctomb (MSVCRT.@)
 */
-INT CDECL wctomb( char *dst, wchar_t ch )
+INT CDECL wctomb(char *mbchar, wchar_t wchar)
 {
-  return WideCharToMultiByte( CP_ACP, 0, &ch, 1, dst, 6, NULL, NULL );
+    BOOL bUsedDefaultChar;
+    char chMultiByte[MB_LEN_MAX];
+    int nBytes;
+
+    /* At least one parameter needs to be given, the length of a null character cannot be queried (verified by tests under WinXP SP2) */
+    if(!mbchar && !wchar)
+        return 0;
+
+    /* Use WideCharToMultiByte for doing the conversion using the codepage currently set with setlocale() */
+    nBytes = WideCharToMultiByte(MSVCRT___lc_codepage, 0, &wchar, 1, chMultiByte, MB_LEN_MAX, NULL, &bUsedDefaultChar);
+
+    /* Only copy the character if an 'mbchar' pointer was given.
+
+       The "C" locale is emulated with codepage 1252 here. This codepage has a default character "?", but the "C" locale doesn't have one.
+       Therefore don't copy the character in this case. */
+    if(mbchar && !(MSVCRT_current_lc_all[0] == 'C' && !MSVCRT_current_lc_all[1] && bUsedDefaultChar))
+        memcpy(mbchar, chMultiByte, nBytes);
+
+    /* If the default character was used, set errno to EILSEQ and return -1. */
+    if(bUsedDefaultChar)
+    {
+        __set_errno(EILSEQ);
+        return -1;
+    }
+
+    /* Otherwise return the number of bytes this character occupies. */
+    return nBytes;
 }

+size_t CDECL wcstombs(char *mbstr, const wchar_t *wcstr, size_t count)
+{
+    BOOL bUsedDefaultChar;
+    char* p = mbstr;
+    int nResult;
+
+    /* Does the caller query for output buffer size? */
+    if(!mbstr)
+    {
+        int nLength;
+
+        /* If we currently use the "C" locale, the length of the input string is returned (verified by tests under WinXP SP2) */
+        if(MSVCRT_current_lc_all[0] == 'C' && !MSVCRT_current_lc_all[1])
+            return wcslen(wcstr);
+
+        /* Otherwise check the length each character needs and build a final return value out of this */
+        count = wcslen(wcstr);
+        nLength = 0;
+
+        while((int)(--count) >= 0 && *wcstr)
+        {
+            /* Get the length of this character */
+            nResult = wctomb(NULL, *wcstr++);
+
+            /* If this character is not convertible in the current locale, the end result will be -1 */
+            if(nResult == -1)
+                return -1;
+
+            nLength += nResult;
+        }
+
+        /* Return the final length */
+        return nLength;
+    }
+
+    /* Convert the string then */
+    bUsedDefaultChar = FALSE;
+
+    for(;;)
+    {
+        char chMultiByte[MB_LEN_MAX];
+        UINT uLength;
+
+        /* Are we at the terminating null character? */
+        if(!*wcstr)
+        {
+            /* Set the null character, but don't increment the pointer as the returned length never includes the terminating null character */
+            *p = 0;
+            break;
+        }
+
+        /* Convert this character into the temporary chMultiByte variable */
+        ZeroMemory(chMultiByte, MB_LEN_MAX);
+        nResult = wctomb(chMultiByte, *wcstr++);
+
+        /* Check if this was an invalid character */
+        if(nResult == -1)
+            bUsedDefaultChar = TRUE;
+
+        /* If we got no character, stop the conversion process here */
+        if(!chMultiByte[0])
+            break;
+
+        /* Determine whether this is a double-byte or a single-byte character */
+        if(chMultiByte[1])
+            uLength = 2;
+        else
+            uLength = 1;
+
+        /* Decrease 'count' by the character length and check if the buffer can still hold the full character */
+        count -= uLength;
+
+        if((int)count < 0)
+            break;
+
+        /* It can, so copy it and move the pointer forward */
+        memcpy(p, chMultiByte, uLength);
+        p += uLength;
+    }
+
+    if(bUsedDefaultChar)
+        return -1;
+
+    /* Return the length in bytes of the copied characters (without the terminating null character) */
+    return p - mbstr;
+}
+#endif
+
+#ifndef __REACTOS__
 /*********************************************************************
 *		iswalnum (MSVCRT.@)
 */
@ -993,8 +1105,7 @@ INT CDECL iswxdigit( wchar_t wc )
 {
    return isxdigitW( wc );
 }
-#endif
-#ifndef _LIBCNT_
+
 /*********************************************************************
 *		wcscpy_s (MSVCRT.@)
 */
--- a/reactos/lib/sdk/crt/string/wcstom.c
+++ b/reactos/lib/sdk/crt/string/wcstom.c
@ -1,39 +0,0 @@
-/*
- * COPYRIGHT:   See COPYING in the top level directory
- * PROJECT:     ReactOS system libraries
- * FILE:        lib/msvcrt/mbstring/wcstom.c
- * PURPOSE:
- * PROGRAMER:   
- * UPDATE HISTORY:
- *              05/30/08: Samuel Serapion adapted  from PROJECT C Library
- *
- */
-
-#include <precomp.h>
-#include <mbctype.h>
-
-/*
- * @implemented
- */
-size_t wcstombs (char *string, const wchar_t *widechar, size_t count)
-{
-    int n, bytes;
-    int cnt = 0;
-
-    for (n = 0; n < count; n++) {
-
-	if ((bytes = wctomb (string, *widechar)) < 0)
-	    return -1;
-
-	if (*string == 0)
-	    return cnt;
-
-	widechar++;
-	string += bytes;
-	cnt += bytes;
-    }
-
-    return cnt;
-}
-
-
--- a/reactos/lib/sdk/crt/string/wcstomb.c
+++ b/reactos/lib/sdk/crt/string/wcstomb.c
@ -1,114 +0,0 @@
-/* Copyright (C) 1991, 1992, 1995, 1996, 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
-
-#include <precomp.h>
-#include <wchar.h>
-
-#ifndef EILSEQ
-#define EILSEQ EINVAL
-#endif
-
-static const wchar_t encoding_mask[] =
-{
-  (wchar_t)~0x7ff, (wchar_t)~0xffff, (wchar_t)~0x1fffff, (wchar_t)~0x3ffffff
-};
-
-static const unsigned char encoding_byte[] =
-{
-  0xc0, 0xe0, 0xf0, 0xf8, 0xfc
-};
-
-/* The state is for this UTF8 encoding not used.  */
-//static mbstate_t internal;
-
-
-//extern mbstate_t __no_r_state;  /* Defined in mbtowc.c.  */
-
-size_t
-__wcrtomb (char *s, wchar_t wc);
-
-/*
- * Convert WCHAR into its multibyte character representation,
- * putting this in S and returning its length.
- *
- * Attention: this function should NEVER be intentionally used.
- * The interface is completely stupid.  The state is shared between
- * all conversion functions.  You should use instead the restartable
- * version `wcrtomb'.
- *
- * @implemented
- */
-int
-wctomb (char *s, wchar_t wchar)
-{
-  /* If S is NULL the function has to return null or not null
-     depending on the encoding having a state depending encoding or
-     not.  This is nonsense because any multibyte encoding has a
-     state.  The ISO C amendment 1 corrects this while introducing the
-     restartable functions.  We simply say here all encodings have a
-     state.  */
-  if (s == NULL)
-    return 1;
-
-  return __wcrtomb (s, wchar);
-}
-
-
-size_t
-__wcrtomb (char *s, wchar_t wc)
-{
-  char fake[1];
-  size_t written = 0;
-
-
-
-  if (s == NULL)
-    {
-      s = fake;
-      wc = L'\0';
-    }
-
-  if (wc < 0x80)
-    {
-      /* It's a one byte sequence.  */
-      if (s != NULL)
-        *s = (char) wc;
-      return 1;
-    }
-
-  for (written = 2; written < 6; ++written)
-    if ((wc & encoding_mask[written - 2]) == 0)
-      break;
-
-  if (s != NULL)
-    {
-      size_t cnt = written;
-      s[0] = encoding_byte[cnt - 2];
-
-      --cnt;
-      do
-        {
-          s[cnt] = 0x80 | (wc & 0x3f);
-          wc >>= 6;
-        }
-      while (--cnt > 0);
-      s[0] |= wc;
-    }
-
-  return written;
-}
--- a/reactos/lib/sdk/crt/string/wcstombs.c
+++ b/reactos/lib/sdk/crt/string/wcstombs.c
@ -1,157 +0,0 @@
-/* Copyright (C) 1991, 1992, 1995, 1996, 1997 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Library General Public License as
-   published by the Free Software Foundation; either version 2 of the
-   License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Library General Public License for more details.
-
-   You should have received a copy of the GNU Library General Public
-   License along with the GNU C Library; see the file COPYING.LIB.  If not,
-   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-   Boston, MA 02111-1307, USA.  */
-
-#include <precomp.h>
-#include <wchar.h>
-
-#ifndef EILSEQ
-#define EILSEQ EINVAL
-#endif
-
-
-static const wchar_t encoding_mask[] =
-{
-  (~0x7ff&WCHAR_MAX), (~0xffff&WCHAR_MAX), (~0x1fffff&WCHAR_MAX), (~0x3ffffff&WCHAR_MAX)
-};
-
-static const unsigned char encoding_byte[] =
-{
-  0xc0, 0xe0, 0xf0, 0xf8, 0xfc
-};
-
-/* We don't need the state really because we don't have shift states
-   to maintain between calls to this function.  */
-
-static mbstate_t mbstate_internal;
-
-
-mbstate_t __no_r_state;  /* Now defined in wcstombs.c.  */
-//extern mbstate_t __no_r_state;  /* Defined in mbtowc.c.  */
-
-size_t
-__wcsrtombs (char *dst, const wchar_t **src, size_t len, mbstate_t *ps);
-
-/*
- * Convert the `wchar_t' string in PWCS to a multibyte character string
- * in S, writing no more than N characters.  Return the number of bytes
- * written, or (size_t) -1 if an invalid `wchar_t' was found.
- *
- * Attention: this function should NEVER be intentionally used.
- * The interface is completely stupid.  The state is shared between
- * all conversion functions.  You should use instead the restartable
- * version `wcsrtombs'.
- *
- * @implemented
- */
-size_t
-wcstombs (char *s, const wchar_t *pwcs, size_t n)
-{
-  mbstate_t save_shift = __no_r_state;
-  size_t written;
-
-  written = __wcsrtombs (s, &pwcs, n, &__no_r_state);
-
-  /* Restore the old shift state.  */
-  __no_r_state = save_shift;
-
-  /* Return how many we wrote (or maybe an error).  */
-  return written;
-}
-
-size_t
-__wcsrtombs (char *dst, const wchar_t **src, size_t len, mbstate_t *ps)
-{
-  size_t written = 0;
-  const wchar_t *run = *src;
-
-  if (ps == NULL)
-    ps = &mbstate_internal;
-
-  if (dst == NULL)
-    /* The LEN parameter has to be ignored if we don't actually write
-       anything.  */
-    len = ~0;
-
-  while (written < len)
-    {
-      wchar_t wc = *run++;
-
-#if 0
-      if (wc < 0 || wc > WCHAR_MAX)
-        {
-          /* This is no correct ISO 10646 character.  */
-          __set_errno (EILSEQ);
-          return (size_t) -1;
-        }
-#endif
-
-      if (wc == L'\0')
-        {
-          /* Found the end.  */
-          if (dst != NULL)
-            *dst = '\0';
-          *src = NULL;
-          return written;
-        }
-      else if (wc < 0x80)
-        {
-          /* It's an one byte sequence.  */
-          if (dst != NULL)
-            *dst++ = (char) wc;
-          ++written;
-        }
-      else
-        {
-          size_t step;
-
-          for (step = 2; step < 6; ++step)
-            if ((wc & encoding_mask[step - 2]) == 0)
-              break;
-
-          if (written + step >= len)
-            /* Too long.  */
-            break;
-
-          if (dst != NULL)
-            {
-              size_t cnt = step;
-
-              dst[0] = encoding_byte[cnt - 2];
-
-              --cnt;
-              do
-                {
-                  dst[cnt] = 0x80 | (wc & 0x3f);
-                  wc >>= 6;
-                }
-              while (--cnt > 0);
-              dst[0] |= wc;
-
-              dst += step;
-            }
-
-          written += step;
-        }
-    }
-
-  /* Store position of first unprocessed word.  */
-  *src = run;
-
-  return written;
-}
-//weak_alias (__wcsrtombs, wcsrtombs)
--- a/reactos/lib/sdk/crt/string/wctomb.c
+++ b/reactos/lib/sdk/crt/string/wctomb.c
@ -1,47 +0,0 @@
-/*
- * COPYRIGHT:   See COPYING in the top level directory
- * PROJECT:     ReactOS system libraries
- * FILE:        lib/sdk/crt/mbstring/wctomb.c
- * PURPOSE:
- * PROGRAMER:   
- * UPDATE HISTORY:
- *              05/30/08: Samuel Serapion adapted from PROJECT C Library
- *
- */
-
-#include <precomp.h>
-#include <mbstring.h>
-
-/*
- * @implemented
- */
-int wctomb (char *string, wchar_t widechar)
-{
-    int c1, c2;
-
-    if (string == 0)
-	return 0;
-
-    if (widechar & 0xff00) {
-
-	c1 = (widechar >> 8) & 0xff;
-	c2 = (widechar & 0xff);
-
-	if (_ismbblead (c1) == 0 || _ismbbtrail (c2) == 0)
-	    return -1;
-
-	*string++ = (char) c1;
-	*string   = (char) c2;
-
-	return 2;
-
-    }
-    else {
-
-	*string = (char) widechar & 0xff;
-
-	return 1;
-
-    }
-}
-