[NOTEPAD] Encoding detection (#1852)

CORE-15548
In notepad, if there is no BOM in the input file, then judge the text encoding.
This commit is contained in:
Katayama Hirofumi MZ 2019-08-18 22:46:56 +09:00 committed by GitHub
parent f052817d84
commit e85664a3d8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -4,6 +4,7 @@
* Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch>
* Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr>
* Copyright 2002 Andriy Palamarchuk
* Copyright 2019 Katayama Hirofumi MZ <katayama.hirofumi.mz@gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -47,6 +48,32 @@ static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, D
return TRUE;
}
ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize)
{
INT flags = IS_TEXT_UNICODE_STATISTICS;
if (dwSize <= 1)
return ENCODING_ANSI;
if (IsTextUnicode(pBytes, dwSize, &flags))
{
return ENCODING_UTF16LE;
}
if ((flags & IS_TEXT_UNICODE_REVERSE_MASK) && !(flags & IS_TEXT_UNICODE_ILLEGAL_CHARS))
{
return ENCODING_UTF16BE;
}
/* is it UTF-8? */
if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pBytes, dwSize, NULL, 0))
{
return ENCODING_UTF8;
}
return ENCODING_ANSI;
}
BOOL
ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln)
{
@ -98,6 +125,10 @@ ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile,
encFile = ENCODING_UTF8;
dwPos += 3;
}
else
{
encFile = AnalyzeEncoding((const char *)pBytes, dwSize);
}
switch(encFile)
{