From 0a851eadcb790e4a52bac1e05153d0913e63e896 Mon Sep 17 00:00:00 2001 From: Katayama Hirofumi MZ Date: Thu, 2 Feb 2023 10:10:30 +0900 Subject: [PATCH] [NOTEPAD] Speed up notepad loading (#5012) - Use EM_GETHANDLE/EM_SETHANDLE message to get/set the internal buffer handle. - Use LocalReAlloc to re-allocate the buffer. - Use file mapping to speed up loading. - Use also IS_TEXT_UNICODE_REVERSE_STATISTICS for IsTextUnicode. CORE-14641 --- base/applications/notepad/dialog.c | 21 +- base/applications/notepad/main.h | 15 +- base/applications/notepad/text.c | 332 +++++++++++++++-------------- 3 files changed, 195 insertions(+), 173 deletions(-) diff --git a/base/applications/notepad/dialog.c b/base/applications/notepad/dialog.c index 6e45d05798e..06618147121 100644 --- a/base/applications/notepad/dialog.c +++ b/base/applications/notepad/dialog.c @@ -419,11 +419,9 @@ BOOL DoCloseFile(VOID) VOID DoOpenFile(LPCTSTR szFileName) { - static const TCHAR dotlog[] = _T(".LOG"); HANDLE hFile; - LPTSTR pszText = NULL; - DWORD dwTextLen; TCHAR log[5]; + HLOCAL hLocal; /* Close any files and prompt to save changes */ if (!DoCloseFile()) @@ -437,21 +435,22 @@ VOID DoOpenFile(LPCTSTR szFileName) goto done; } - if (!ReadText(hFile, (LPWSTR *)&pszText, &dwTextLen, &Globals.encFile, &Globals.iEoln)) + /* To make loading file quicker, we use the internal handle of EDIT control */ + hLocal = (HLOCAL)SendMessageW(Globals.hEdit, EM_GETHANDLE, 0, 0); + if (!ReadText(hFile, &hLocal, &Globals.encFile, &Globals.iEoln)) { ShowLastError(); goto done; } - SetWindowText(Globals.hEdit, pszText); + SendMessageW(Globals.hEdit, EM_SETHANDLE, (WPARAM)hLocal, 0); + /* No need of EM_SETMODIFY and EM_EMPTYUNDOBUFFER here. EM_SETHANDLE does instead. */ - SendMessage(Globals.hEdit, EM_SETMODIFY, FALSE, 0); - SendMessage(Globals.hEdit, EM_EMPTYUNDOBUFFER, 0, 0); SetFocus(Globals.hEdit); /* If the file starts with .LOG, add a time/date at the end and set cursor after - * See http://support.microsoft.com/?kbid=260563 + * See http://web.archive.org/web/20090627165105/http://support.microsoft.com/kb/260563 */ - if (GetWindowText(Globals.hEdit, log, ARRAY_SIZE(log)) && !_tcscmp(log, dotlog)) + if (GetWindowText(Globals.hEdit, log, ARRAY_SIZE(log)) && !_tcscmp(log, _T(".LOG"))) { static const TCHAR lf[] = _T("\r\n"); SendMessage(Globals.hEdit, EM_SETSEL, GetWindowTextLength(Globals.hEdit), -1); @@ -471,8 +470,6 @@ VOID DoOpenFile(LPCTSTR szFileName) done: if (hFile != INVALID_HANDLE_VALUE) CloseHandle(hFile); - if (pszText) - HeapFree(GetProcessHeap(), 0, pszText); } VOID DIALOG_FileNew(VOID) @@ -590,7 +587,7 @@ DIALOG_FileSaveAs_Hook(HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam) hCombo = GetDlgItem(hDlg, ID_EOLN); if (hCombo) - Globals.iEoln = (int) SendMessage(hCombo, CB_GETCURSEL, 0, 0); + Globals.iEoln = (EOLN)SendMessage(hCombo, CB_GETCURSEL, 0, 0); } break; } diff --git a/base/applications/notepad/main.h b/base/applications/notepad/main.h index ce7fc850877..e2140c2b52b 100644 --- a/base/applications/notepad/main.h +++ b/base/applications/notepad/main.h @@ -47,9 +47,12 @@ typedef enum // #define MIN_ENCODING 0 // #define MAX_ENCODING 3 -#define EOLN_CRLF 0 -#define EOLN_LF 1 -#define EOLN_CR 2 +typedef enum +{ + EOLN_CRLF = 0, /* "\r\n" */ + EOLN_LF = 1, /* "\n" */ + EOLN_CR = 2 /* "\r" */ +} EOLN; /* End of line (NewLine) type */ typedef struct { @@ -76,7 +79,7 @@ typedef struct TCHAR szStatusBarLineCol[MAX_PATH]; ENCODING encFile; - int iEoln; + EOLN iEoln; FINDREPLACE find; WNDPROC EditProc; @@ -89,8 +92,8 @@ extern NOTEPAD_GLOBALS Globals; VOID SetFileName(LPCTSTR szFileName); /* from text.c */ -BOOL ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln); -BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int iEoln); +BOOL ReadText(HANDLE hFile, HLOCAL *phLocal, ENCODING *pencFile, EOLN *piEoln); +BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, EOLN iEoln); /* from settings.c */ void NOTEPAD_LoadSettingsFromRegistry(void); diff --git a/base/applications/notepad/text.c b/base/applications/notepad/text.c index 6a83a89d64f..3ce3a889bfe 100644 --- a/base/applications/notepad/text.c +++ b/base/applications/notepad/text.c @@ -4,7 +4,7 @@ * Copyright 1998,99 Marcel Baur * Copyright 2002 Sylvain Petreolle * Copyright 2002 Andriy Palamarchuk - * Copyright 2019 Katayama Hirofumi MZ + * Copyright 2019-2023 Katayama Hirofumi MZ * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -22,31 +22,7 @@ */ #include "notepad.h" - -static BOOL Append(LPWSTR *ppszText, DWORD *pdwTextLen, LPCWSTR pszAppendText, DWORD dwAppendLen) -{ - LPWSTR pszNewText; - - if (dwAppendLen > 0) - { - if (*ppszText) - { - pszNewText = (LPWSTR) HeapReAlloc(GetProcessHeap(), 0, *ppszText, (*pdwTextLen + dwAppendLen) * sizeof(WCHAR)); - } - else - { - pszNewText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, dwAppendLen * sizeof(WCHAR)); - } - - if (!pszNewText) - return FALSE; - - memcpy(pszNewText + *pdwTextLen, pszAppendText, dwAppendLen * sizeof(WCHAR)); - *ppszText = pszNewText; - *pdwTextLen += dwAppendLen; - } - return TRUE; -} +#include BOOL IsTextNonZeroASCII(const void *pText, DWORD dwSize) { @@ -63,71 +39,156 @@ BOOL IsTextNonZeroASCII(const void *pText, DWORD dwSize) ENCODING AnalyzeEncoding(const char *pBytes, DWORD dwSize) { - INT flags = IS_TEXT_UNICODE_STATISTICS; + INT flags = IS_TEXT_UNICODE_STATISTICS | IS_TEXT_UNICODE_REVERSE_STATISTICS; - if (dwSize <= 1) + if (dwSize <= 1 || IsTextNonZeroASCII(pBytes, dwSize)) return ENCODING_ANSI; - if (IsTextNonZeroASCII(pBytes, dwSize)) - { - return ENCODING_ANSI; - } - if (IsTextUnicode(pBytes, dwSize, &flags)) - { return ENCODING_UTF16LE; - } if ((flags & IS_TEXT_UNICODE_REVERSE_MASK) && !(flags & IS_TEXT_UNICODE_ILLEGAL_CHARS)) - { return ENCODING_UTF16BE; - } /* is it UTF-8? */ if (MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pBytes, dwSize, NULL, 0)) - { return ENCODING_UTF8; - } return ENCODING_ANSI; } -BOOL -ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, int *piEoln) +static VOID +ReplaceNewLines(LPWSTR pszNew, SIZE_T cchNew, LPCWSTR pszOld, SIZE_T cchOld) { - DWORD dwSize; - LPBYTE pBytes = NULL; - LPWSTR pszText; - LPWSTR pszAllocText = NULL; - DWORD dwPos, i; - DWORD dwCharCount; - BOOL bSuccess = FALSE; - BYTE b = 0; - ENCODING encFile = ENCODING_ANSI; - int iCodePage = 0; - WCHAR szCrlf[2] = {'\r', '\n'}; - DWORD adwEolnCount[3] = {0, 0, 0}; + BOOL bPrevCR = FALSE; + SIZE_T ichNew, ichOld; - *ppszText = NULL; - *pdwTextLen = 0; + for (ichOld = ichNew = 0; ichOld < cchOld; ++ichOld) + { + WCHAR ch = pszOld[ichOld]; + + if (ch == L'\n') + { + if (!bPrevCR) + { + pszNew[ichNew++] = L'\r'; + pszNew[ichNew++] = L'\n'; + } + } + else if (ch == '\r') + { + pszNew[ichNew++] = L'\r'; + pszNew[ichNew++] = L'\n'; + } + else + { + pszNew[ichNew++] = ch; + } + + bPrevCR = (ch == L'\r'); + } + + pszNew[ichNew] = UNICODE_NULL; + assert(ichNew == cchNew); +} + +static BOOL +ProcessNewLinesAndNulls(HLOCAL *phLocal, LPWSTR *ppszText, SIZE_T *pcchText, EOLN *piEoln) +{ + SIZE_T ich, cchText = *pcchText, adwEolnCount[3] = { 0, 0, 0 }, cNonCRLFs; + LPWSTR pszText = *ppszText; + EOLN iEoln; + BOOL bPrevCR = FALSE; + + /* Replace '\0' with SPACE. Count newlines. */ + for (ich = 0; ich < cchText; ++ich) + { + WCHAR ch = pszText[ich]; + if (ch == UNICODE_NULL) + pszText[ich] = L' '; + + if (ch == L'\n') + { + if (bPrevCR) + { + adwEolnCount[EOLN_CR]--; + adwEolnCount[EOLN_CRLF]++; + } + else + { + adwEolnCount[EOLN_LF]++; + } + } + else if (ch == '\r') + { + adwEolnCount[EOLN_CR]++; + } + + bPrevCR = (ch == L'\r'); + } + + /* Choose the newline code */ + if (adwEolnCount[EOLN_CR] > adwEolnCount[EOLN_CRLF]) + iEoln = EOLN_CR; + else if (adwEolnCount[EOLN_LF] > adwEolnCount[EOLN_CRLF]) + iEoln = EOLN_LF; + else + iEoln = EOLN_CRLF; + + cNonCRLFs = adwEolnCount[EOLN_CR] + adwEolnCount[EOLN_LF]; + if (cNonCRLFs != 0) + { + /* Allocate a buffer for EM_SETHANDLE */ + SIZE_T cchNew = cchText + cNonCRLFs; + HLOCAL hLocal = LocalAlloc(LMEM_MOVEABLE, (cchNew + 1) * sizeof(WCHAR)); + LPWSTR pszNew = LocalLock(hLocal); + if (!pszNew) + { + LocalFree(hLocal); + return FALSE; /* Failure */ + } + + ReplaceNewLines(pszNew, cchNew, pszText, cchText); + + /* Replace with new data */ + LocalUnlock(*phLocal); + LocalFree(*phLocal); + *phLocal = hLocal; + *ppszText = pszNew; + *pcchText = cchNew; + } + + *piEoln = iEoln; + return TRUE; +} + +BOOL +ReadText(HANDLE hFile, HLOCAL *phLocal, ENCODING *pencFile, EOLN *piEoln) +{ + PCHAR pBytes = NULL; + LPWSTR pszText, pszNewText = NULL; + DWORD dwSize, dwPos; + SIZE_T i, cchText, cbContent; + BOOL bSuccess = FALSE; + ENCODING encFile = ENCODING_ANSI; + UINT iCodePage; + HANDLE hMapping = INVALID_HANDLE_VALUE; + HLOCAL hNewLocal; dwSize = GetFileSize(hFile, NULL); if (dwSize == INVALID_FILE_SIZE) goto done; - pBytes = HeapAlloc(GetProcessHeap(), 0, dwSize + 2); + hMapping = CreateFileMappingW(hFile, NULL, PAGE_READONLY, 0, 0, NULL); + if (hMapping == NULL) + goto done; + + pBytes = MapViewOfFile(hMapping, FILE_MAP_READ, 0, 0, dwSize); if (!pBytes) goto done; - if (!ReadFile(hFile, pBytes, dwSize, &dwSize, NULL)) - goto done; - dwPos = 0; - - /* Make sure that there is a NUL character at the end, in any encoding */ - pBytes[dwSize + 0] = '\0'; - pBytes[dwSize + 1] = '\0'; - /* Look for Byte Order Marks */ + dwPos = 0; if ((dwSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE)) { encFile = ENCODING_UTF16LE; @@ -151,124 +212,85 @@ ReadText(HANDLE hFile, LPWSTR *ppszText, DWORD *pdwTextLen, ENCODING *pencFile, switch(encFile) { case ENCODING_UTF16BE: - for (i = dwPos; i < dwSize-1; i += 2) - { - b = pBytes[i+0]; - pBytes[i+0] = pBytes[i+1]; - pBytes[i+1] = b; - } - /* fall through */ - case ENCODING_UTF16LE: + { + /* Re-allocate the buffer for EM_SETHANDLE */ pszText = (LPWSTR) &pBytes[dwPos]; - dwCharCount = (dwSize - dwPos) / sizeof(WCHAR); + cchText = (dwSize - dwPos) / sizeof(WCHAR); + hNewLocal = LocalReAlloc(*phLocal, (cchText + 1) * sizeof(WCHAR), LMEM_MOVEABLE); + pszNewText = LocalLock(hNewLocal); + if (pszNewText == NULL) + goto done; + + *phLocal = hNewLocal; + CopyMemory(pszNewText, pszText, cchText * sizeof(WCHAR)); + + if (encFile == ENCODING_UTF16BE) /* big endian; Swap bytes */ + { + BYTE tmp, *pb = (LPBYTE)pszNewText; + for (i = 0; i < cchText * 2; i += 2) + { + tmp = pb[i]; + pb[i] = pb[i + 1]; + pb[i + 1] = tmp; + } + } break; + } case ENCODING_ANSI: case ENCODING_UTF8: case ENCODING_UTF8BOM: - if (encFile == ENCODING_UTF8 || encFile == ENCODING_UTF8BOM) - iCodePage = CP_UTF8; - else - iCodePage = CP_ACP; + { + iCodePage = ((encFile == ENCODING_UTF8 || encFile == ENCODING_UTF8BOM) ? CP_UTF8 : CP_ACP); - if ((dwSize - dwPos) > 0) + /* Get ready for ANSI-to-Wide conversion */ + cbContent = dwSize - dwPos; + cchText = 0; + if (cbContent > 0) { - dwCharCount = MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, NULL, 0); - if (dwCharCount == 0) + cchText = MultiByteToWideChar(iCodePage, 0, &pBytes[dwPos], (INT)cbContent, NULL, 0); + if (cchText == 0) goto done; } - else - { - /* special case for files with no characters (other than BOMs) */ - dwCharCount = 0; - } - pszAllocText = (LPWSTR) HeapAlloc(GetProcessHeap(), 0, (dwCharCount + 1) * sizeof(WCHAR)); - if (!pszAllocText) + /* Re-allocate the buffer for EM_SETHANDLE */ + hNewLocal = LocalReAlloc(*phLocal, (cchText + 1) * sizeof(WCHAR), LMEM_MOVEABLE); + pszNewText = LocalLock(hNewLocal); + if (!pszNewText) goto done; + *phLocal = hNewLocal; - if ((dwSize - dwPos) > 0) + /* Do ANSI-to-Wide conversion */ + if (cbContent > 0) { - if (!MultiByteToWideChar(iCodePage, 0, (LPCSTR)&pBytes[dwPos], dwSize - dwPos, pszAllocText, dwCharCount)) + if (!MultiByteToWideChar(iCodePage, 0, + &pBytes[dwPos], (INT)cbContent, pszNewText, (INT)cchText)) + { goto done; + } } - - pszAllocText[dwCharCount] = '\0'; - pszText = pszAllocText; break; + } + DEFAULT_UNREACHABLE; } - dwPos = 0; - for (i = 0; i < dwCharCount; i++) - { - switch(pszText[i]) - { - case '\r': - if ((i < dwCharCount-1) && (pszText[i+1] == '\n')) - { - i++; - adwEolnCount[EOLN_CRLF]++; - break; - } - /* fall through */ + pszNewText[cchText] = UNICODE_NULL; - case '\n': - if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos)) - return FALSE; - if (!Append(ppszText, pdwTextLen, szCrlf, ARRAY_SIZE(szCrlf))) - return FALSE; - dwPos = i + 1; + if (!ProcessNewLinesAndNulls(phLocal, &pszNewText, &cchText, piEoln)) + goto done; - if (pszText[i] == '\r') - adwEolnCount[EOLN_CR]++; - else - adwEolnCount[EOLN_LF]++; - break; - - case '\0': - pszText[i] = ' '; - break; - } - } - - if (!*ppszText && (pszText == pszAllocText)) - { - /* special case; don't need to reallocate */ - *ppszText = pszAllocText; - *pdwTextLen = dwCharCount; - pszAllocText = NULL; - } - else - { - /* append last remaining text */ - if (!Append(ppszText, pdwTextLen, &pszText[dwPos], i - dwPos + 1)) - return FALSE; - } - - /* chose which eoln to use */ - *piEoln = EOLN_CRLF; - if (adwEolnCount[EOLN_LF] > adwEolnCount[*piEoln]) - *piEoln = EOLN_LF; - if (adwEolnCount[EOLN_CR] > adwEolnCount[*piEoln]) - *piEoln = EOLN_CR; *pencFile = encFile; - bSuccess = TRUE; done: if (pBytes) - HeapFree(GetProcessHeap(), 0, pBytes); - if (pszAllocText) - HeapFree(GetProcessHeap(), 0, pszAllocText); - - if (!bSuccess && *ppszText) - { - HeapFree(GetProcessHeap(), 0, *ppszText); - *ppszText = NULL; - *pdwTextLen = 0; - } + UnmapViewOfFile(pBytes); + if (hMapping != INVALID_HANDLE_VALUE) + CloseHandle(hMapping); + if (pszNewText) + LocalUnlock(*phLocal); return bSuccess; } @@ -367,7 +389,7 @@ done: return bSuccess; } -BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, int iEoln) +BOOL WriteText(HANDLE hFile, LPCWSTR pszText, DWORD dwTextLen, ENCODING encFile, EOLN iEoln) { WCHAR wcBom; LPCWSTR pszLF = L"\n";