/*** *wcrtomb.cpp - Convert wide character to multibyte character, with locale. * * Copyright (c) Microsoft Corporation. All rights reserved. * *Purpose: * Convert a wide character into the equivalent multibyte character. * *******************************************************************************/ #include #include #include #include #include #include #include #include #include using namespace __crt_mbstring; /*** *errno_t _wcrtomb_internal() - Helper function to convert wide character to multibyte character. * *Purpose: * Convert a wide character into the equivalent multi-byte character, * according to the specified LC_CTYPE category, or the current locale. * [ANSI]. * * NOTE: Currently, the C libraries support the "C" locale only. * Non-C locale support now available under _INTL switch. *Entry: * int *return_value = the number of chars written (-1 in error case) * char *destination = pointer to multibyte character * size_t destination_count = size of the destinarion buffer * wchar_t wchar = source wide character * mbstate_t *state = pointer to state (not used) * _locale_t locale = locale info * *Exit: * Returns: * Value of errno if errors, 0 otherwise. *return_value is set to -1 in error case. * *Exceptions: * *******************************************************************************/ _Success_(return == 0) static errno_t __cdecl _wcrtomb_internal( int* const return_value, __out_bcount_z_opt(destination_count) char* const destination, size_t const destination_count, wchar_t const wchar, mbstate_t* const state, _Inout_ __crt_cached_ptd_host& ptd ) { _ASSERTE(destination != nullptr && destination_count > 0); _locale_t const locale = ptd.get_locale(); _ASSERTE( locale->locinfo->_public._locale_mb_cur_max == 1 || locale->locinfo->_public._locale_mb_cur_max == 2 || locale->locinfo->_public._locale_lc_codepage == CP_UTF8); if (state) { state->_Wchar = 0; } if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8) { // Unlike c16rtomb. wctomb/wcrtomb have no ability to process a partial code point. // So, we could call c16rtomb and check for a lone surrogate or other error, or for simplicity // We can instead just call c32rtomb and check for any error. I choose the latter. static mbstate_t local_state{}; int result = static_cast(__crt_mbstring::__c32rtomb_utf8(destination, static_cast(wchar), (state != nullptr ? state : &local_state), ptd)); if (return_value != nullptr) { *return_value = result; } if (result <= 4) { return 0; } else { return ptd.get_errno().value_or(0); } } if (!locale->locinfo->locale_name[LC_CTYPE]) { if (wchar > 255) // Validate high byte { if (return_value) *return_value = -1; return ptd.get_errno().set(EILSEQ); } *destination = static_cast(wchar); if (return_value) { *return_value = 1; } return 0; } BOOL default_used{}; int const size = __acrt_WideCharToMultiByte( locale->locinfo->_public._locale_lc_codepage, 0, &wchar, 1, destination, static_cast(destination_count), nullptr, &default_used); if (size == 0 || default_used) { if (return_value) { *return_value = -1; } return ptd.get_errno().set(EILSEQ); } if (return_value) { *return_value = size; } return 0; } /*** *errno_t wcrtomb_s(retValue, destination, destination_count, wchar, state) - translate wchar_t to multibyte, restartably * *Purpose: * *Entry: * *Exit: * *Exceptions: * *******************************************************************************/ static errno_t __cdecl wcrtomb_s_internal( size_t* const return_value, char* const destination, size_t const destination_count, wchar_t const wchar, mbstate_t* const state, __crt_cached_ptd_host& ptd ) { // Note that we do not force destination_count > 0 in the destination != // nullptr case because we do not need to add a null terminator, due to // the fact that the destination will receive a character and not a string. _UCRT_VALIDATE_RETURN_ERRCODE(ptd, (destination == nullptr && destination_count == 0) || (destination != nullptr), EINVAL); errno_t e = 0; int int_return_value = -1; if (destination == nullptr) { char buf[MB_LEN_MAX]; e = _wcrtomb_internal(&int_return_value, buf, MB_LEN_MAX, wchar, state, ptd); } else { e = _wcrtomb_internal(&int_return_value, destination, destination_count, wchar, state, ptd); } if (return_value != nullptr) { *return_value = static_cast(int_return_value); } return e; } extern "C" errno_t __cdecl wcrtomb_s( size_t* const return_value, char* const destination, size_t const destination_count, wchar_t const wchar, mbstate_t* const state ) { __crt_cached_ptd_host ptd; return wcrtomb_s_internal(return_value, destination, destination_count, wchar, state, ptd); } extern "C" size_t __cdecl wcrtomb( char* const destination, wchar_t const wchar, mbstate_t* const state ) { size_t return_value = static_cast(-1); wcrtomb_s(&return_value, destination, (destination == nullptr ? 0 : MB_LEN_MAX), wchar, state); return return_value; } /*** *errno_t wcsrtombs_s(retValue, destination, destination_count, pwcs, n, state) - translate wide char string to multibyte * string * *Purpose: * *Entry: * *Exit: * *Exceptions: * *******************************************************************************/ /* Helper shared by secure and non-secure functions. */ static size_t __cdecl _wcsrtombs_internal( _Pre_maybenull_ _Post_z_ char* destination, _Inout_ _Deref_prepost_z_ wchar_t const** const source, _In_ size_t n, _Out_opt_ mbstate_t* const state, _Inout_ __crt_cached_ptd_host& ptd ) throw() { /* validation section */ _UCRT_VALIDATE_RETURN(ptd, source != nullptr, EINVAL, (size_t)-1); _locale_t const locale = ptd.get_locale(); if (locale->locinfo->_public._locale_lc_codepage == CP_UTF8) { return __wcsrtombs_utf8(destination, source, n, state, ptd); } char buf[MB_LEN_MAX]; int i = 0; size_t nc = 0; wchar_t const* wcs = *source; if (!destination) { for (; ; nc += i, ++wcs) { /* translate but don't store */ _wcrtomb_internal(&i, buf, MB_LEN_MAX, *wcs, state, ptd); if (i <= 0) { return static_cast(-1); } else if (buf[i - 1] == '\0') { return nc + i - 1; } } } for (; 0 < n; nc += i, ++wcs, destination += i, n -= i) { /* translate and store */ char *t = nullptr; if (n < (size_t)locale->locinfo->_public._locale_mb_cur_max) { t = buf; } else { t = destination; } _wcrtomb_internal(&i, t, MB_LEN_MAX, *wcs, state, ptd); if (i <= 0) { /* encountered invalid sequence */ nc = (size_t)-1; break; } if (destination == t) { /* do nothing */ } else if (n < static_cast(i)) { break; // Won't all fit } else { memcpy_s(destination, n, buf, i); } if (destination[i - 1] == '\0') { // Encountered terminating null *source = 0; return nc + i - 1; } } *source = wcs; return nc; } extern "C" size_t __cdecl wcsrtombs( char* const destination, wchar_t const** const source, size_t const n, mbstate_t* const state ) { __crt_cached_ptd_host ptd; return _wcsrtombs_internal(destination, source, n, state, ptd); } /*** *errno_t wcstombs_s() - Convert wide char string to multibyte char string. * *Purpose: * Convert a wide char string into the equivalent multibyte char string, * according to the LC_CTYPE category of the current locale. * * The destination string is always null terminated. * *Entry: * size_t *return_value = Number of bytes modified including the terminating nullptr * This pointer can be nullptr. * char *destination = pointer to destination multibyte char string * size_t destination_count = size of the destination buffer * const wchar_t *source = pointer to source wide character string * size_t n = maximum number of bytes to store in s (not including the terminating nullptr) * mbstate_t *state = pointer to state * *Exit: * The error code. * *Exceptions: * Input parameters are validated. Refer to the validation section of the function. * *******************************************************************************/ extern "C" errno_t __cdecl wcsrtombs_s( size_t* const return_value, char* const destination, size_t const destination_count, wchar_t const** const source, size_t const n, mbstate_t* const state ) { __crt_cached_ptd_host ptd; if (return_value != nullptr) { *return_value = static_cast(-1); } _UCRT_VALIDATE_RETURN_ERRCODE( ptd, (destination == nullptr && destination_count == 0) || (destination != nullptr && destination_count > 0), EINVAL); if (destination != nullptr) { _RESET_STRING(destination, destination_count); } _UCRT_VALIDATE_RETURN_ERRCODE(ptd, source != nullptr, EINVAL); size_t retsize = _wcsrtombs_internal(destination, source, (n > destination_count ? destination_count : n), state, ptd); if (retsize == static_cast(-1)) { if (destination != nullptr) { _RESET_STRING(destination, destination_count); } return ptd.get_errno().value_or(0); } ++retsize; // Account for the null terminator if (destination != nullptr) { // Return error if the string does not fit: if (retsize > destination_count) { _RESET_STRING(destination, destination_count); _UCRT_VALIDATE_RETURN_ERRCODE(ptd, retsize <= destination_count, ERANGE); } // Ensure the string is null terminated: destination[retsize - 1] = '\0'; } if (return_value != nullptr) { *return_value = retsize; } return 0; } // Converts a wide character into a one-byte character extern "C" int __cdecl wctob(wint_t const wchar) { __crt_cached_ptd_host ptd; if (wchar == WEOF) { return EOF; } int return_value = -1; char local_buffer[MB_LEN_MAX]; mbstate_t state{}; errno_t const e = _wcrtomb_internal(&return_value, local_buffer, MB_LEN_MAX, wchar, &state, ptd); if (e == 0 && return_value == 1) { return local_buffer[0]; } return EOF; } size_t __cdecl __crt_mbstring::__wcsrtombs_utf8(char* dst, const wchar_t** src, size_t len, mbstate_t* ps, __crt_cached_ptd_host& ptd) { const wchar_t* current_src = *src; char buf[MB_LEN_MAX]; if (dst != nullptr) { char* current_dest = dst; // Wide chars are actually UTF-16, so a code point might take 2 input units (a surrogate pair) // In case of a failure, keep track of where the current code point began, which might be the previous // wchar for a surrogate pair const wchar_t* start_of_code_point = current_src; for (;;) { // If we don't have at least 4 MB_CUR_LEN bytes available in the buffer // the next char isn't guaranteed to fit, so put it into a temp buffer char* temp; if (len < 4) { temp = buf; } else { temp = current_dest; } const size_t retval = __c16rtomb_utf8(temp, *current_src, ps, ptd); if (retval == __crt_mbstring::INVALID) { // Set src to the beginning of the invalid char // If this was the second half of a surrogate pair, return the beginning of the surrogate pair *src = start_of_code_point; return retval; } if (temp == current_dest) { // We wrote in-place. Nothing to do. } else if (len < retval) { // Won't fit, so bail out // If this was the second half of a surrogate pair, make sure we return that location current_src = start_of_code_point; break; } else { // Will fit in remaining buffer, so let's copy it over memcpy(current_dest, temp, retval); } if (retval > 0 && current_dest[retval - 1] == '\0') { // Reached null terminator, so break out, but don't count that last terminating byte current_src = nullptr; current_dest += retval - 1; break; } ++current_src; if (retval > 0) { start_of_code_point = current_src; } len -= retval; current_dest += retval; } *src = current_src; return current_dest - dst; } else { size_t total_count = 0; for (;;) { const size_t retval = __c16rtomb_utf8(buf, *current_src, ps, ptd); if (retval == __crt_mbstring::INVALID) { return retval; } else if (retval > 0 && buf[retval - 1] == '\0') { // Hit null terminator. Don't count it in the return value. total_count += retval - 1; break; } total_count += retval; ++current_src; } return total_count; } }