[CONUTILS] Use the currently active console codepage to output text to files. If you want to output UTF-8 run "chcp 65001" before.

CORE-12122
This commit is contained in:
Hermès Bélusca-Maïto 2018-05-05 01:36:19 +02:00
parent 444553c7f0
commit 2b3fd31bc5
No known key found for this signature in database
GPG key ID: 3B2539C65E7B93D0
4 changed files with 33 additions and 37 deletions

View file

@ -103,7 +103,7 @@ ConWrite(
// if (IsConsoleHandle(Stream->hHandle)) // if (IsConsoleHandle(Stream->hHandle))
if (Stream->IsConsole) if (Stream->IsConsole)
{ {
// TODO: Check if (ConStream->Mode == WideText or UTF16Text) ?? // TODO: Check if (Stream->Mode == WideText or UTF16Text) ??
/* /*
* This code is inspired from _cputws, in particular from the fact that, * This code is inspired from _cputws, in particular from the fact that,
@ -136,15 +136,15 @@ ConWrite(
* *
* Implementation NOTE: * Implementation NOTE:
* MultiByteToWideChar (resp. WideCharToMultiByte) are equivalent to * MultiByteToWideChar (resp. WideCharToMultiByte) are equivalent to
* OemToCharBuffW (resp. CharToOemBuffW), but the latters uselessly * OemToCharBuffW (resp. CharToOemBuffW), but these latter functions
* depend on user32.dll, while MultiByteToWideChar and WideCharToMultiByte * uselessly depend on user32.dll, while MultiByteToWideChar and
* only need kernel32.dll. * WideCharToMultiByte only need kernel32.dll.
*/ */
if ((Stream->Mode == WideText) || (Stream->Mode == UTF16Text)) if ((Stream->Mode == WideText) || (Stream->Mode == UTF16Text))
{ {
#ifndef _UNICODE // UNICODE means that TCHAR == WCHAR == UTF-16 #ifndef _UNICODE // UNICODE means that TCHAR == WCHAR == UTF-16
/* Convert from the current process/thread's codepage to UTF-16 */ /* Convert from the current process/thread's code page to UTF-16 */
WCHAR *buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, len * sizeof(WCHAR)); PWCHAR buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, len * sizeof(WCHAR));
if (!buffer) if (!buffer)
{ {
SetLastError(ERROR_NOT_ENOUGH_MEMORY); SetLastError(ERROR_NOT_ENOUGH_MEMORY);
@ -200,18 +200,22 @@ ConWrite(
} }
else if ((Stream->Mode == UTF8Text) || (Stream->Mode == AnsiText)) else if ((Stream->Mode == UTF8Text) || (Stream->Mode == AnsiText))
{ {
CHAR *buffer; UINT CodePage;
PCHAR buffer;
/* /*
* Resolve the codepage cache if it was not assigned yet * Resolve the current code page if it has not been assigned yet
* (only if the stream is in ANSI mode; in UTF8 mode the * (we do this only if the stream is in ANSI mode; in UTF8 mode
* codepage was already set to CP_UTF8). * the code page is always set to CP_UTF8). Otherwise use the
* current stream's code page.
*/ */
if (/*(Stream->Mode == AnsiText) &&*/ (Stream->CodePage == INVALID_CP)) if (/*(Stream->Mode == AnsiText) &&*/ (Stream->CodePage == INVALID_CP))
Stream->CodePage = GetConsoleOutputCP(); // CP_ACP, CP_OEMCP CodePage = GetConsoleOutputCP(); // CP_ACP, CP_OEMCP
else
CodePage = Stream->CodePage;
#ifdef _UNICODE // UNICODE means that TCHAR == WCHAR == UTF-16 #ifdef _UNICODE // UNICODE means that TCHAR == WCHAR == UTF-16
/* Convert from UTF-16 to either UTF-8 or ANSI, using stream codepage */ /* Convert from UTF-16 to either UTF-8 or ANSI, using the stream code page */
// NOTE: MB_LEN_MAX defined either in limits.h or in stdlib.h . // NOTE: MB_LEN_MAX defined either in limits.h or in stdlib.h .
buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, len * MB_LEN_MAX); buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, len * MB_LEN_MAX);
if (!buffer) if (!buffer)
@ -219,14 +223,14 @@ ConWrite(
SetLastError(ERROR_NOT_ENOUGH_MEMORY); SetLastError(ERROR_NOT_ENOUGH_MEMORY);
return 0; return 0;
} }
len = WideCharToMultiByte(Stream->CodePage, 0, len = WideCharToMultiByte(CodePage, 0,
szStr, len, buffer, len * MB_LEN_MAX, szStr, len, buffer, len * MB_LEN_MAX,
NULL, NULL); NULL, NULL);
szStr = (PVOID)buffer; szStr = (PVOID)buffer;
#else #else
/* /*
* Convert from the current process/thread's codepage to either * Convert from the current process/thread's code page to either
* UTF-8 or ANSI, using stream codepage. * UTF-8 or ANSI, using the stream code page.
* We need to perform a double conversion, by going through UTF-16. * We need to perform a double conversion, by going through UTF-16.
*/ */
// TODO! // TODO!

View file

@ -73,9 +73,6 @@ static int ConToCRTMode[] =
_O_U16TEXT, // UTF16Text (UTF16 without BOM; translated) _O_U16TEXT, // UTF16Text (UTF16 without BOM; translated)
_O_U8TEXT, // UTF8Text (UTF8 without BOM; translated) _O_U8TEXT, // UTF8Text (UTF8 without BOM; translated)
}; };
#endif
#ifdef USE_CRT
/* /*
* See http://archives.miloush.net/michkap/archive/2008/03/18/8306597.html * See http://archives.miloush.net/michkap/archive/2008/03/18/8306597.html
@ -83,7 +80,7 @@ static int ConToCRTMode[] =
* for more details. * for more details.
*/ */
// NOTE: May the translated mode be cached somehow? // NOTE1: May the translated mode be cached somehow?
// NOTE2: We may also call IsConsoleHandle to directly set the mode to // NOTE2: We may also call IsConsoleHandle to directly set the mode to
// _O_U16TEXT if it's ok?? // _O_U16TEXT if it's ok??
// NOTE3: _setmode returns the previous mode, or -1 if failure. // NOTE3: _setmode returns the previous mode, or -1 if failure.
@ -99,16 +96,17 @@ do { \
#else /* defined(USE_CRT) */ #else /* defined(USE_CRT) */
/* /*
* We set Stream->CodePage to INVALID_CP (= -1) to signal that the codepage * We set Stream->CodePage to INVALID_CP (== -1) to signal that the code page
* is either not assigned (if the mode is Binary, WideText, or UTF16Text), or * is either not assigned (if the mode is Binary, WideText, or UTF16Text), or
* is not cached yet (if the mode is AnsiText). In this latter case the cache * is not cached (if the mode is AnsiText). In this latter case the code page
* is resolved inside ConWrite. Finally, if the mode is UTF8Text, the codepage * is resolved inside ConWrite. Finally, if the mode is UTF8Text, the code page
* cache is set to CP_UTF8. * cache is always set to CP_UTF8.
* The codepage cache can be reset by an explicit call to CON_STREAM_SET_MODE * The code page cache can be reset by an explicit call to CON_STREAM_SET_MODE
* (i.e. by calling ConStreamSetMode, or by reinitializing the stream with * (i.e. by calling ConStreamSetMode, or by reinitializing the stream with
* ConStreamInit(Ex)). * ConStreamInit(Ex)).
* *
* NOTE: the magic value could not be '0' since it is reserved for CP_ACP. * NOTE: the reserved values are: 0 (CP_ACP), 1 (CP_OEMCP), 2 (CP_MACCP),
* 3 (CP_THREAD_ACP), 42 (CP_SYMBOL), 65000 (CP_UTF7) and 65001 (CP_UTF8).
*/ */
#define CON_STREAM_SET_MODE(Stream, Mode, CacheCodePage) \ #define CON_STREAM_SET_MODE(Stream, Mode, CacheCodePage) \
do { \ do { \
@ -229,7 +227,7 @@ ConStreamSetCacheCodePage(
return FALSE; return FALSE;
/* /*
* Keep the original stream mode but set the correct file codepage * Keep the original stream mode but set the correct file code page
* (will be reset only if Mode == AnsiText). * (will be reset only if Mode == AnsiText).
*/ */
Mode = Stream->Mode; Mode = Stream->Mode;

View file

@ -113,20 +113,14 @@ do { \
} while(0) } while(0)
#endif /* defined(USE_CRT) */ #endif /* defined(USE_CRT) */
#ifdef _UNICODE
/* /*
* Use UTF8 by default for file output, because this mode is back-compatible * Use ANSI by default for file output, with no cached code page.
* with ANSI, and it displays nice on terminals that support UTF8 by default * Note that setting the stream mode to AnsiText and the code page value
* (not many terminals support UTF16 on the contrary). * to CP_UTF8 sets the stream to UTF8 mode, and has the same effect as if
* the stream mode UTF8Text had been specified instead.
*/ */
#define ConInitStdStreams() \
ConInitStdStreamsAndMode(UTF8Text, INVALID_CP)
/* Note that here the cache code page is unused */
#else
/* Use ANSI by default for file output */
#define ConInitStdStreams() \ #define ConInitStdStreams() \
ConInitStdStreamsAndMode(AnsiText, INVALID_CP) ConInitStdStreamsAndMode(AnsiText, INVALID_CP)
#endif /* defined(_UNICODE) */
/* Stream translation modes */ /* Stream translation modes */
BOOL BOOL

View file

@ -46,7 +46,7 @@ typedef struct _CON_STREAM
* when 'hHandle' refers to a file or a pipe. * when 'hHandle' refers to a file or a pipe.
*/ */
CON_STREAM_MODE Mode; CON_STREAM_MODE Mode;
UINT CodePage; // Used to convert UTF-16 text to some ANSI codepage. UINT CodePage; // Used to convert UTF-16 text to some ANSI code page.
#endif /* defined(USE_CRT) */ #endif /* defined(USE_CRT) */
} CON_STREAM, *PCON_STREAM; } CON_STREAM, *PCON_STREAM;