mirror of
https://github.com/reactos/reactos.git
synced 2025-04-29 18:48:53 +00:00
[NOTEPAD] Encoding detection (#1852)
CORE-15548 In notepad, if there is no BOM in the input file, then judge the text encoding.
This commit is contained in:
parent
f052817d84
commit
e85664a3d8
1 changed files with 31 additions and 0 deletions
|
@ -4,6 +4,7 @@
|
|||
* Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch>
|
||||
* Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr>
|
||||
* Copyright 2002 Andriy Palamarchuk
|
||||
* Copyright 2019 Katayama Hirofumi MZ <katayama.hirofumi.mz@gmail.com>
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
|
@ -47,6 +48,32 @@ static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, D
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize)
|
||||
{
|
||||
INT flags = IS_TEXT_UNICODE_STATISTICS;
|
||||
|
||||
if (dwSize <= 1)
|
||||
return ENCODING_ANSI;
|
||||
|
||||
if (IsTextUnicode(pBytes, dwSize, &flags))
|
||||
{
|
||||
return ENCODING_UTF16LE;
|
||||
}
|
||||
|
||||
if ((flags & IS_TEXT_UNICODE_REVERSE_MASK) && !(flags & IS_TEXT_UNICODE_ILLEGAL_CHARS))
|
||||
{
|
||||
return ENCODING_UTF16BE;
|
||||
}
|
||||
|
||||
/* is it UTF-8? */
|
||||
if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pBytes, dwSize, NULL, 0))
|
||||
{
|
||||
return ENCODING_UTF8;
|
||||
}
|
||||
|
||||
return ENCODING_ANSI;
|
||||
}
|
||||
|
||||
BOOL
|
||||
ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln)
|
||||
{
|
||||
|
@ -98,6 +125,10 @@ ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile,
|
|||
encFile = ENCODING_UTF8;
|
||||
dwPos += 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
encFile = AnalyzeEncoding((const char *)pBytes, dwSize);
|
||||
}
|
||||
|
||||
switch(encFile)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue