[NOTEPAD] Speed up notepad loading (#5012)

- Use EM_GETHANDLE/EM_SETHANDLE message to get/set the internal buffer handle.
- Use LocalReAlloc to re-allocate the buffer.
- Use file mapping to speed up loading.
- Use also IS_TEXT_UNICODE_REVERSE_STATISTICS for IsTextUnicode.
CORE-14641
This commit is contained in:
Katayama Hirofumi MZ 2023-02-02 10:10:30 +09:00 committed by GitHub
parent b6aaf5e332
commit 0a851eadcb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 195 additions and 173 deletions

View file

@ -419,11 +419,9 @@ BOOL DoCloseFile(VOID)
VOID DoOpenFile(LPCTSTR szFileName)
{
static const TCHAR dotlog[] = _T(".LOG");
HANDLE hFile;
LPTSTR pszText = NULL;
DWORD dwTextLen;
TCHAR log[5];
HLOCAL hLocal;
/* Close any files and prompt to save changes */
if (!DoCloseFile())
@ -437,21 +435,22 @@ VOID DoOpenFile(LPCTSTR szFileName)
goto done;
}
if (!ReadText(hFile, (LPWSTR *)&pszText, &dwTextLen, &Globals.encFile, &Globals.iEoln))
/* To make loading file quicker, we use the internal handle of EDIT control */
hLocal = (HLOCAL)SendMessageW(Globals.hEdit, EM_GETHANDLE, 0, 0);
if (!ReadText(hFile, &hLocal, &Globals.encFile, &Globals.iEoln))
{
ShowLastError();
goto done;
}
SetWindowText(Globals.hEdit, pszText);
SendMessageW(Globals.hEdit, EM_SETHANDLE, (WPARAM)hLocal, 0);
/* No need of EM_SETMODIFY and EM_EMPTYUNDOBUFFER here. EM_SETHANDLE does instead. */
SendMessage(Globals.hEdit, EM_SETMODIFY, FALSE, 0);
SendMessage(Globals.hEdit, EM_EMPTYUNDOBUFFER, 0, 0);
SetFocus(Globals.hEdit);
/* If the file starts with .LOG, add a time/date at the end and set cursor after
* See http://support.microsoft.com/?kbid=260563
* See http://web.archive.org/web/20090627165105/http://support.microsoft.com/kb/260563
*/
if (GetWindowText(Globals.hEdit, log, ARRAY_SIZE(log)) && !_tcscmp(log, dotlog))
if (GetWindowText(Globals.hEdit, log, ARRAY_SIZE(log)) && !_tcscmp(log, _T(".LOG")))
{
static const TCHAR lf[] = _T("\r\n");
SendMessage(Globals.hEdit, EM_SETSEL, GetWindowTextLength(Globals.hEdit), -1);
@ -471,8 +470,6 @@ VOID DoOpenFile(LPCTSTR szFileName)
done:
if (hFile != INVALID_HANDLE_VALUE)
CloseHandle(hFile);
if (pszText)
HeapFree(GetProcessHeap(), 0, pszText);
}
VOID DIALOG_FileNew(VOID)
@ -590,7 +587,7 @@ DIALOG_FileSaveAs_Hook(HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam)
hCombo = GetDlgItem(hDlg, ID_EOLN);
if (hCombo)
Globals.iEoln = (int) SendMessage(hCombo, CB_GETCURSEL, 0, 0);
Globals.iEoln = (EOLN)SendMessage(hCombo, CB_GETCURSEL, 0, 0);
}
break;
}

View file

@ -47,9 +47,12 @@ typedef enum
// #define MIN_ENCODING 0
// #define MAX_ENCODING 3
#define EOLN_CRLF 0
#define EOLN_LF 1
#define EOLN_CR 2
typedef enum
{
EOLN_CRLF = 0, /* "\r\n" */
EOLN_LF = 1, /* "\n" */
EOLN_CR = 2 /* "\r" */
} EOLN; /* End of line (NewLine) type */
typedef struct
{
@ -76,7 +79,7 @@ typedef struct
TCHAR szStatusBarLineCol[MAX_PATH];
ENCODING encFile;
int iEoln;
EOLN iEoln;
FINDREPLACE find;
WNDPROC EditProc;
@ -89,8 +92,8 @@ extern NOTEPAD_GLOBALS Globals;
VOID SetFileName(LPCTSTR szFileName);
/* from text.c */
BOOL ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln);
BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int iEoln);
BOOL ReadText(HANDLE hFile, HLOCAL *phLocal, ENCODING *pencFile, EOLN *piEoln);
BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, EOLN iEoln);
/* from settings.c */
void NOTEPAD_LoadSettingsFromRegistry(void);

View file

@ -4,7 +4,7 @@
* Copyright 1998,99 Marcel Baur <mbaur@g26.ethz.ch>
* Copyright 2002 Sylvain Petreolle <spetreolle@yahoo.fr>
* Copyright 2002 Andriy Palamarchuk
* Copyright 2019 Katayama Hirofumi MZ <katayama.hirofumi.mz@gmail.com>
* Copyright 2019-2023 Katayama Hirofumi MZ <katayama.hirofumi.mz@gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@ -22,31 +22,7 @@
*/
#include "notepad.h"
static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen)
{
LPWSTR pszNewText;
if (dwAppendLen > 0)
{
if (*ppszText)
{
pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText, (*pdwTextLen + dwAppendLen) * sizeof(WCHAR));
}
else
{
pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen * sizeof(WCHAR));
}
if (!pszNewText)
return FALSE;
memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * sizeof(WCHAR));
*ppszText = pszNewText;
*pdwTextLen += dwAppendLen;
}
return TRUE;
}
#include <assert.h>
BOOL IsTextNonZeroASCII(const void *pText, DWORD dwSize)
{
@ -63,71 +39,156 @@ BOOL IsTextNonZeroASCII(const void *pText, DWORD dwSize)
ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize)
{
INT flags = IS_TEXT_UNICODE_STATISTICS;
INT flags = IS_TEXT_UNICODE_STATISTICS | IS_TEXT_UNICODE_REVERSE_STATISTICS;
if (dwSize <= 1)
if (dwSize <= 1 || IsTextNonZeroASCII(pBytes, dwSize))
return ENCODING_ANSI;
if (IsTextNonZeroASCII(pBytes, dwSize))
{
return ENCODING_ANSI;
}
if (IsTextUnicode(pBytes, dwSize, &flags))
{
return ENCODING_UTF16LE;
}
if ((flags & IS_TEXT_UNICODE_REVERSE_MASK) && !(flags & IS_TEXT_UNICODE_ILLEGAL_CHARS))
{
return ENCODING_UTF16BE;
}
/* is it UTF-8? */
if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pBytes, dwSize, NULL, 0))
{
return ENCODING_UTF8;
}
return ENCODING_ANSI;
}
BOOL
ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln)
static VOID
ReplaceNewLines(LPWSTR pszNew, SIZE_T cchNew, LPCWSTR pszOld, SIZE_T cchOld)
{
DWORD dwSize;
LPBYTE pBytes = NULL;
LPWSTR pszText;
LPWSTR pszAllocText = NULL;
DWORD dwPos, i;
DWORD dwCharCount;
BOOL bSuccess = FALSE;
BYTE b = 0;
ENCODING encFile = ENCODING_ANSI;
int iCodePage = 0;
WCHAR szCrlf[2] = {'\r', '\n'};
DWORD adwEolnCount[3] = {0, 0, 0};
BOOL bPrevCR = FALSE;
SIZE_T ichNew, ichOld;
*ppszText = NULL;
*pdwTextLen = 0;
for (ichOld = ichNew = 0; ichOld < cchOld; ++ichOld)
{
WCHAR ch = pszOld[ichOld];
if (ch == L'\n')
{
if (!bPrevCR)
{
pszNew[ichNew++] = L'\r';
pszNew[ichNew++] = L'\n';
}
}
else if (ch == '\r')
{
pszNew[ichNew++] = L'\r';
pszNew[ichNew++] = L'\n';
}
else
{
pszNew[ichNew++] = ch;
}
bPrevCR = (ch == L'\r');
}
pszNew[ichNew] = UNICODE_NULL;
assert(ichNew == cchNew);
}
static BOOL
ProcessNewLinesAndNulls(HLOCAL *phLocal, LPWSTR *ppszText, SIZE_T *pcchText, EOLN *piEoln)
{
SIZE_T ich, cchText = *pcchText, adwEolnCount[3] = { 0, 0, 0 }, cNonCRLFs;
LPWSTR pszText = *ppszText;
EOLN iEoln;
BOOL bPrevCR = FALSE;
/* Replace '\0' with SPACE. Count newlines. */
for (ich = 0; ich < cchText; ++ich)
{
WCHAR ch = pszText[ich];
if (ch == UNICODE_NULL)
pszText[ich] = L' ';
if (ch == L'\n')
{
if (bPrevCR)
{
adwEolnCount[EOLN_CR]--;
adwEolnCount[EOLN_CRLF]++;
}
else
{
adwEolnCount[EOLN_LF]++;
}
}
else if (ch == '\r')
{
adwEolnCount[EOLN_CR]++;
}
bPrevCR = (ch == L'\r');
}
/* Choose the newline code */
if (adwEolnCount[EOLN_CR] > adwEolnCount[EOLN_CRLF])
iEoln = EOLN_CR;
else if (adwEolnCount[EOLN_LF] > adwEolnCount[EOLN_CRLF])
iEoln = EOLN_LF;
else
iEoln = EOLN_CRLF;
cNonCRLFs = adwEolnCount[EOLN_CR] + adwEolnCount[EOLN_LF];
if (cNonCRLFs != 0)
{
/* Allocate a buffer for EM_SETHANDLE */
SIZE_T cchNew = cchText + cNonCRLFs;
HLOCAL hLocal = LocalAlloc(LMEM_MOVEABLE, (cchNew + 1) * sizeof(WCHAR));
LPWSTR pszNew = LocalLock(hLocal);
if (!pszNew)
{
LocalFree(hLocal);
return FALSE; /* Failure */
}
ReplaceNewLines(pszNew, cchNew, pszText, cchText);
/* Replace with new data */
LocalUnlock(*phLocal);
LocalFree(*phLocal);
*phLocal = hLocal;
*ppszText = pszNew;
*pcchText = cchNew;
}
*piEoln = iEoln;
return TRUE;
}
BOOL
ReadText(HANDLE hFile, HLOCAL *phLocal, ENCODING *pencFile, EOLN *piEoln)
{
PCHAR pBytes = NULL;
LPWSTR pszText, pszNewText = NULL;
DWORD dwSize, dwPos;
SIZE_T i, cchText, cbContent;
BOOL bSuccess = FALSE;
ENCODING encFile = ENCODING_ANSI;
UINT iCodePage;
HANDLE hMapping = INVALID_HANDLE_VALUE;
HLOCAL hNewLocal;
dwSize = GetFileSize(hFile, NULL);
if (dwSize == INVALID_FILE_SIZE)
goto done;
pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2);
hMapping = CreateFileMappingW(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
if (hMapping == NULL)
goto done;
pBytes = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, dwSize);
if (!pBytes)
goto done;
if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL))
goto done;
dwPos = 0;
/* Make sure that there is a NUL character at the end, in any encoding */
pBytes[dwSize + 0] = '\0';
pBytes[dwSize + 1] = '\0';
/* Look for Byte Order Marks */
dwPos = 0;
if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE))
{
encFile = ENCODING_UTF16LE;
@ -151,124 +212,85 @@ ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile,
switch(encFile)
{
case ENCODING_UTF16BE:
for (i = dwPos; i < dwSize-1; i += 2)
{
b = pBytes[i+0];
pBytes[i+0] = pBytes[i+1];
pBytes[i+1] = b;
}
/* fall through */
case ENCODING_UTF16LE:
{
/* Re-allocate the buffer for EM_SETHANDLE */
pszText = (LPWSTR) &pBytes[dwPos];
dwCharCount = (dwSize - dwPos) / sizeof(WCHAR);
cchText = (dwSize - dwPos) / sizeof(WCHAR);
hNewLocal = LocalReAlloc(*phLocal, (cchText + 1) * sizeof(WCHAR), LMEM_MOVEABLE);
pszNewText = LocalLock(hNewLocal);
if (pszNewText == NULL)
goto done;
*phLocal = hNewLocal;
CopyMemory(pszNewText, pszText, cchText * sizeof(WCHAR));
if (encFile == ENCODING_UTF16BE) /* big endian; Swap bytes */
{
BYTE tmp, *pb = (LPBYTE)pszNewText;
for (i = 0; i < cchText * 2; i += 2)
{
tmp = pb[i];
pb[i] = pb[i + 1];
pb[i + 1] = tmp;
}
}
break;
}
case ENCODING_ANSI:
case ENCODING_UTF8:
case ENCODING_UTF8BOM:
if (encFile == ENCODING_UTF8 || encFile == ENCODING_UTF8BOM)
iCodePage = CP_UTF8;
else
iCodePage = CP_ACP;
{
iCodePage = ((encFile == ENCODING_UTF8 || encFile == ENCODING_UTF8BOM) ? CP_UTF8 : CP_ACP);
if ((dwSize - dwPos) > 0)
/* Get ready for ANSI-to-Wide conversion */
cbContent = dwSize - dwPos;
cchText = 0;
if (cbContent > 0)
{
dwCharCount = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, NULL, 0);
if (dwCharCount == 0)
cchText = MultiByteToWideChar(iCodePage, 0, &pBytes[dwPos], (INT)cbContent, NULL, 0);
if (cchText == 0)
goto done;
}
else
{
/* special case for files with no characters (other than BOMs) */
dwCharCount = 0;
}
pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 1) * sizeof(WCHAR));
if (!pszAllocText)
/* Re-allocate the buffer for EM_SETHANDLE */
hNewLocal = LocalReAlloc(*phLocal, (cchText + 1) * sizeof(WCHAR), LMEM_MOVEABLE);
pszNewText = LocalLock(hNewLocal);
if (!pszNewText)
goto done;
*phLocal = hNewLocal;
if ((dwSize - dwPos) > 0)
/* Do ANSI-to-Wide conversion */
if (cbContent > 0)
{
if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, pszAllocText, dwCharCount))
if (!MultiByteToWideChar(iCodePage, 0,
&pBytes[dwPos], (INT)cbContent, pszNewText, (INT)cchText))
{
goto done;
}
}
pszAllocText[dwCharCount] = '\0';
pszText = pszAllocText;
break;
}
DEFAULT_UNREACHABLE;
}
dwPos = 0;
for (i = 0; i < dwCharCount; i++)
{
switch(pszText[i])
{
case '\r':
if ((i < dwCharCount-1) && (pszText[i+1] == '\n'))
{
i++;
adwEolnCount[EOLN_CRLF]++;
break;
}
/* fall through */
pszNewText[cchText] = UNICODE_NULL;
case '\n':
if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos))
return FALSE;
if (!Append(ppszText, pdwTextLen, szCrlf, ARRAY_SIZE(szCrlf)))
return FALSE;
dwPos = i + 1;
if (!ProcessNewLinesAndNulls(phLocal, &pszNewText, &cchText, piEoln))
goto done;
if (pszText[i] == '\r')
adwEolnCount[EOLN_CR]++;
else
adwEolnCount[EOLN_LF]++;
break;
case '\0':
pszText[i] = ' ';
break;
}
}
if (!*ppszText && (pszText == pszAllocText))
{
/* special case; don't need to reallocate */
*ppszText = pszAllocText;
*pdwTextLen = dwCharCount;
pszAllocText = NULL;
}
else
{
/* append last remaining text */
if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1))
return FALSE;
}
/* chose which eoln to use */
*piEoln = EOLN_CRLF;
if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln])
*piEoln = EOLN_LF;
if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln])
*piEoln = EOLN_CR;
*pencFile = encFile;
bSuccess = TRUE;
done:
if (pBytes)
HeapFree(GetProcessHeap(), 0, pBytes);
if (pszAllocText)
HeapFree(GetProcessHeap(), 0, pszAllocText);
if (!bSuccess && *ppszText)
{
HeapFree(GetProcessHeap(), 0, *ppszText);
*ppszText = NULL;
*pdwTextLen = 0;
}
UnmapViewOfFile(pBytes);
if (hMapping != INVALID_HANDLE_VALUE)
CloseHandle(hMapping);
if (pszNewText)
LocalUnlock(*phLocal);
return bSuccess;
}
@ -367,7 +389,7 @@ done:
return bSuccess;
}
BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int iEoln)
BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, EOLN iEoln)
{
WCHAR wcBom;
LPCWSTR pszLF = L"\n";