reactos/sdk/lib/ucrt/convert/wcrtomb.cpp

533 lines
15 KiB
C++

/***
*wcrtomb.cpp - Convert wide character to multibyte character, with locale.
*
* Copyright (c) Microsoft Corporation. All rights reserved.
*
*Purpose:
* Convert a wide character into the equivalent multibyte character.
*
*******************************************************************************/
#include <corecrt_internal_mbstring.h>
#include <corecrt_internal_ptd_propagation.h>
#include <corecrt_internal_securecrt.h>
#include <limits.h>
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
using namespace __crt_mbstring;
/***
*errno_t _wcrtomb_internal() - Helper function to convert wide character to multibyte character.
*
*Purpose:
* Convert a wide character into the equivalent multi-byte character,
* according to the specified LC_CTYPE category, or the current locale.
* [ANSI].
*
* NOTE: Currently, the C libraries support the "C" locale only.
* Non-C locale support now available under _INTL switch.
*Entry:
* int *return_value = the number of chars written (-1 in error case)
* char *destination = pointer to multibyte character
* size_t destination_count = size of the destinarion buffer
* wchar_t wchar = source wide character
* mbstate_t *state = pointer to state (not used)
* _locale_t locale = locale info
*
*Exit:
* Returns:
* Value of errno if errors, 0 otherwise. *return_value is set to -1 in error case.
*
*Exceptions:
*
*******************************************************************************/
_Success_(return == 0)
static errno_t __cdecl _wcrtomb_internal(
int* const return_value,
__out_bcount_z_opt(destination_count) char* const destination,
size_t const destination_count,
wchar_t const wchar,
mbstate_t* const state,
_Inout_ __crt_cached_ptd_host& ptd
)
{
_ASSERTE(destination != nullptr && destination_count > 0);
_locale_t const locale = ptd.get_locale();
_ASSERTE(
locale->locinfo->_public._locale_mb_cur_max == 1 ||
locale->locinfo->_public._locale_mb_cur_max == 2 ||
locale->locinfo->_public._locale_lc_codepage == CP_UTF8);
if (state)
{
state->_Wchar = 0;
}
if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8)
{
// Unlike c16rtomb. wctomb/wcrtomb have no ability to process a partial code point.
// So, we could call c16rtomb and check for a lone surrogate or other error, or for simplicity
// We can instead just call c32rtomb and check for any error. I choose the latter.
static mbstate_t local_state{};
int result = static_cast<int>(__crt_mbstring::__c32rtomb_utf8(destination, static_cast<char32_t>(wchar), (state != nullptr ? state : &local_state), ptd));
if (return_value != nullptr)
{
*return_value = result;
}
if (result <= 4)
{
return 0;
}
else
{
return ptd.get_errno().value_or(0);
}
}
if (!locale->locinfo->locale_name[LC_CTYPE])
{
if (wchar > 255) // Validate high byte
{
if (return_value)
*return_value = -1;
return ptd.get_errno().set(EILSEQ);
}
*destination = static_cast<char>(wchar);
if (return_value)
{
*return_value = 1;
}
return 0;
}
BOOL default_used{};
int const size = __acrt_WideCharToMultiByte(
locale->locinfo->_public._locale_lc_codepage,
0,
&wchar,
1,
destination,
static_cast<int>(destination_count),
nullptr,
&default_used);
if (size == 0 || default_used)
{
if (return_value)
{
*return_value = -1;
}
return ptd.get_errno().set(EILSEQ);
}
if (return_value)
{
*return_value = size;
}
return 0;
}
/***
*errno_t wcrtomb_s(retValue, destination, destination_count, wchar, state) - translate wchar_t to multibyte, restartably
*
*Purpose:
*
*Entry:
*
*Exit:
*
*Exceptions:
*
*******************************************************************************/
static errno_t __cdecl wcrtomb_s_internal(
size_t* const return_value,
char* const destination,
size_t const destination_count,
wchar_t const wchar,
mbstate_t* const state,
__crt_cached_ptd_host& ptd
)
{
// Note that we do not force destination_count > 0 in the destination !=
// nullptr case because we do not need to add a null terminator, due to
// the fact that the destination will receive a character and not a string.
_UCRT_VALIDATE_RETURN_ERRCODE(ptd, (destination == nullptr && destination_count == 0) || (destination != nullptr), EINVAL);
errno_t e = 0;
int int_return_value = -1;
if (destination == nullptr)
{
char buf[MB_LEN_MAX];
e = _wcrtomb_internal(&int_return_value, buf, MB_LEN_MAX, wchar, state, ptd);
}
else
{
e = _wcrtomb_internal(&int_return_value, destination, destination_count, wchar, state, ptd);
}
if (return_value != nullptr)
{
*return_value = static_cast<size_t>(int_return_value);
}
return e;
}
extern "C" errno_t __cdecl wcrtomb_s(
size_t* const return_value,
char* const destination,
size_t const destination_count,
wchar_t const wchar,
mbstate_t* const state
)
{
__crt_cached_ptd_host ptd;
return wcrtomb_s_internal(return_value, destination, destination_count, wchar, state, ptd);
}
extern "C" size_t __cdecl wcrtomb(
char* const destination,
wchar_t const wchar,
mbstate_t* const state
)
{
size_t return_value = static_cast<size_t>(-1);
wcrtomb_s(&return_value, destination, (destination == nullptr ? 0 : MB_LEN_MAX), wchar, state);
return return_value;
}
/***
*errno_t wcsrtombs_s(retValue, destination, destination_count, pwcs, n, state) - translate wide char string to multibyte
* string
*
*Purpose:
*
*Entry:
*
*Exit:
*
*Exceptions:
*
*******************************************************************************/
/* Helper shared by secure and non-secure functions. */
static size_t __cdecl _wcsrtombs_internal(
_Pre_maybenull_ _Post_z_ char* destination,
_Inout_ _Deref_prepost_z_ wchar_t const** const source,
_In_ size_t n,
_Out_opt_ mbstate_t* const state,
_Inout_ __crt_cached_ptd_host& ptd
) throw()
{
/* validation section */
_UCRT_VALIDATE_RETURN(ptd, source != nullptr, EINVAL, (size_t)-1);
_locale_t const locale = ptd.get_locale();
if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8)
{
return __wcsrtombs_utf8(destination, source, n, state, ptd);
}
char buf[MB_LEN_MAX];
int i = 0;
size_t nc = 0;
wchar_t const* wcs = *source;
if (!destination)
{
for (; ; nc += i, ++wcs)
{
/* translate but don't store */
_wcrtomb_internal(&i, buf, MB_LEN_MAX, *wcs, state, ptd);
if (i <= 0)
{
return static_cast<size_t>(-1);
}
else if (buf[i - 1] == '\0')
{
return nc + i - 1;
}
}
}
for (; 0 < n; nc += i, ++wcs, destination += i, n -= i)
{
/* translate and store */
char *t = nullptr;
if (n < (size_t)locale->locinfo->_public._locale_mb_cur_max)
{
t = buf;
}
else
{
t = destination;
}
_wcrtomb_internal(&i, t, MB_LEN_MAX, *wcs, state, ptd);
if (i <= 0)
{
/* encountered invalid sequence */
nc = (size_t)-1;
break;
}
if (destination == t)
{
/* do nothing */
}
else if (n < static_cast<size_t>(i))
{
break; // Won't all fit
}
else
{
memcpy_s(destination, n, buf, i);
}
if (destination[i - 1] == '\0')
{
// Encountered terminating null
*source = 0;
return nc + i - 1;
}
}
*source = wcs;
return nc;
}
extern "C" size_t __cdecl wcsrtombs(
char* const destination,
wchar_t const** const source,
size_t const n,
mbstate_t* const state
)
{
__crt_cached_ptd_host ptd;
return _wcsrtombs_internal(destination, source, n, state, ptd);
}
/***
*errno_t wcstombs_s() - Convert wide char string to multibyte char string.
*
*Purpose:
* Convert a wide char string into the equivalent multibyte char string,
* according to the LC_CTYPE category of the current locale.
*
* The destination string is always null terminated.
*
*Entry:
* size_t *return_value = Number of bytes modified including the terminating nullptr
* This pointer can be nullptr.
* char *destination = pointer to destination multibyte char string
* size_t destination_count = size of the destination buffer
* const wchar_t *source = pointer to source wide character string
* size_t n = maximum number of bytes to store in s (not including the terminating nullptr)
* mbstate_t *state = pointer to state
*
*Exit:
* The error code.
*
*Exceptions:
* Input parameters are validated. Refer to the validation section of the function.
*
*******************************************************************************/
extern "C" errno_t __cdecl wcsrtombs_s(
size_t* const return_value,
char* const destination,
size_t const destination_count,
wchar_t const** const source,
size_t const n,
mbstate_t* const state
)
{
__crt_cached_ptd_host ptd;
if (return_value != nullptr)
{
*return_value = static_cast<size_t>(-1);
}
_UCRT_VALIDATE_RETURN_ERRCODE(
ptd,
(destination == nullptr && destination_count == 0) ||
(destination != nullptr && destination_count > 0),
EINVAL);
if (destination != nullptr)
{
_RESET_STRING(destination, destination_count);
}
_UCRT_VALIDATE_RETURN_ERRCODE(ptd, source != nullptr, EINVAL);
size_t retsize = _wcsrtombs_internal(destination, source, (n > destination_count ? destination_count : n), state, ptd);
if (retsize == static_cast<size_t>(-1))
{
if (destination != nullptr)
{
_RESET_STRING(destination, destination_count);
}
return ptd.get_errno().value_or(0);
}
++retsize; // Account for the null terminator
if (destination != nullptr)
{
// Return error if the string does not fit:
if (retsize > destination_count)
{
_RESET_STRING(destination, destination_count);
_UCRT_VALIDATE_RETURN_ERRCODE(ptd, retsize <= destination_count, ERANGE);
}
// Ensure the string is null terminated:
destination[retsize - 1] = '\0';
}
if (return_value != nullptr)
{
*return_value = retsize;
}
return 0;
}
// Converts a wide character into a one-byte character
extern "C" int __cdecl wctob(wint_t const wchar)
{
__crt_cached_ptd_host ptd;
if (wchar == WEOF)
{
return EOF;
}
int return_value = -1;
char local_buffer[MB_LEN_MAX];
mbstate_t state{};
errno_t const e = _wcrtomb_internal(&return_value, local_buffer, MB_LEN_MAX, wchar, &state, ptd);
if (e == 0 && return_value == 1)
{
return local_buffer[0];
}
return EOF;
}
size_t __cdecl __crt_mbstring::__wcsrtombs_utf8(char* dst, const wchar_t** src, size_t len, mbstate_t* ps, __crt_cached_ptd_host& ptd)
{
const wchar_t* current_src = *src;
char buf[MB_LEN_MAX];
if (dst != nullptr)
{
char* current_dest = dst;
// Wide chars are actually UTF-16, so a code point might take 2 input units (a surrogate pair)
// In case of a failure, keep track of where the current code point began, which might be the previous
// wchar for a surrogate pair
const wchar_t* start_of_code_point = current_src;
for (;;)
{
// If we don't have at least 4 MB_CUR_LEN bytes available in the buffer
// the next char isn't guaranteed to fit, so put it into a temp buffer
char* temp;
if (len < 4)
{
temp = buf;
}
else
{
temp = current_dest;
}
const size_t retval = __c16rtomb_utf8(temp, *current_src, ps, ptd);
if (retval == __crt_mbstring::INVALID)
{
// Set src to the beginning of the invalid char
// If this was the second half of a surrogate pair, return the beginning of the surrogate pair
*src = start_of_code_point;
return retval;
}
if (temp == current_dest)
{
// We wrote in-place. Nothing to do.
}
else if (len < retval)
{
// Won't fit, so bail out
// If this was the second half of a surrogate pair, make sure we return that location
current_src = start_of_code_point;
break;
}
else
{
// Will fit in remaining buffer, so let's copy it over
memcpy(current_dest, temp, retval);
}
if (retval > 0 && current_dest[retval - 1] == '\0')
{
// Reached null terminator, so break out, but don't count that last terminating byte
current_src = nullptr;
current_dest += retval - 1;
break;
}
++current_src;
if (retval > 0)
{
start_of_code_point = current_src;
}
len -= retval;
current_dest += retval;
}
*src = current_src;
return current_dest - dst;
}
else
{
size_t total_count = 0;
for (;;)
{
const size_t retval = __c16rtomb_utf8(buf, *current_src, ps, ptd);
if (retval == __crt_mbstring::INVALID)
{
return retval;
}
else if (retval > 0 && buf[retval - 1] == '\0')
{
// Hit null terminator. Don't count it in the return value.
total_count += retval - 1;
break;
}
total_count += retval;
++current_src;
}
return total_count;
}
}