[CMDUTILS][FC] Implement text file comparison (#3625)

Implement text file comparison by using file mappings (both Unicode and ANSI). CORE-17500
This commit is contained in:
Katayama Hirofumi MZ 2021-05-04 18:05:57 +09:00 committed by GitHub
parent 5dd93a8a5b
commit d2c47132ad
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 913 additions and 211 deletions

View file

@ -4,87 +4,116 @@
* PURPOSE: Comparing files
* COPYRIGHT: Copyright 2021 Katayama Hirofumi MZ (katayama.hirofumi.mz@gmail.com)
*/
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <windef.h>
#include <winbase.h>
#include <winuser.h>
#include <winnls.h>
#include <conutils.h>
#include "resource.h"
#include "fc.h"
// See also: https://stackoverflow.com/questions/33125766/compare-files-with-a-cmd
typedef enum FCRET { // return code of FC command
FCRET_INVALID = -1,
FCRET_IDENTICAL = 0,
FCRET_DIFFERENT = 1,
FCRET_CANT_FIND = 2
} FCRET;
#ifdef _WIN64
#define MAX_VIEW_SIZE (256 * 1024 * 1024) // 256 MB
#ifdef __REACTOS__
#include <conutils.h>
#else
#define MAX_VIEW_SIZE (64 * 1024 * 1024) // 64 MB
#include <stdio.h>
#define ConInitStdStreams() /* empty */
#define StdOut stdout
#define StdErr stderr
void ConPuts(FILE *fp, LPCWSTR psz)
{
fputws(psz, fp);
}
void ConPrintf(FILE *fp, LPCWSTR psz, ...)
{
va_list va;
va_start(va, psz);
vfwprintf(fp, psz, va);
va_end(va);
}
void ConResPuts(FILE *fp, UINT nID)
{
WCHAR sz[MAX_PATH];
LoadStringW(NULL, nID, sz, _countof(sz));
fputws(sz, fp);
}
void ConResPrintf(FILE *fp, UINT nID, ...)
{
va_list va;
WCHAR sz[MAX_PATH];
va_start(va, nID);
LoadStringW(NULL, nID, sz, _countof(sz));
vfwprintf(fp, sz, va);
va_end(va);
}
#endif
#define FLAG_A (1 << 0)
#define FLAG_B (1 << 1)
#define FLAG_C (1 << 2)
#define FLAG_L (1 << 3)
#define FLAG_LBn (1 << 4)
#define FLAG_N (1 << 5)
#define FLAG_OFFLINE (1 << 6)
#define FLAG_T (1 << 7)
#define FLAG_U (1 << 8)
#define FLAG_W (1 << 9)
#define FLAG_nnnn (1 << 10)
#define FLAG_HELP (1 << 11)
typedef struct FILECOMPARE
{
DWORD dwFlags; // FLAG_...
INT n, nnnn;
LPCWSTR file1, file2;
} FILECOMPARE;
static FCRET NoDifference(VOID)
FCRET NoDifference(VOID)
{
ConResPuts(StdOut, IDS_NO_DIFFERENCE);
return FCRET_IDENTICAL;
}
static FCRET Different(LPCWSTR file1, LPCWSTR file2)
FCRET Different(LPCWSTR file0, LPCWSTR file1)
{
ConResPrintf(StdOut, IDS_DIFFERENT, file1, file2);
ConResPrintf(StdOut, IDS_DIFFERENT, file0, file1);
return FCRET_DIFFERENT;
}
static FCRET LongerThan(LPCWSTR file1, LPCWSTR file2)
FCRET LongerThan(LPCWSTR file0, LPCWSTR file1)
{
ConResPrintf(StdOut, IDS_LONGER_THAN, file1, file2);
ConResPrintf(StdOut, IDS_LONGER_THAN, file0, file1);
return FCRET_DIFFERENT;
}
static FCRET OutOfMemory(VOID)
FCRET OutOfMemory(VOID)
{
ConResPuts(StdErr, IDS_OUT_OF_MEMORY);
return FCRET_INVALID;
}
static FCRET CannotRead(LPCWSTR file)
FCRET CannotRead(LPCWSTR file)
{
ConResPrintf(StdErr, IDS_CANNOT_READ, file);
return FCRET_INVALID;
}
static FCRET InvalidSwitch(VOID)
FCRET InvalidSwitch(VOID)
{
ConResPuts(StdErr, IDS_INVALID_SWITCH);
return FCRET_INVALID;
}
static HANDLE DoOpenFileForInput(LPCWSTR file)
FCRET ResyncFailed(VOID)
{
ConResPuts(StdOut, IDS_RESYNC_FAILED);
return FCRET_DIFFERENT;
}
VOID PrintCaption(LPCWSTR file)
{
ConPrintf(StdOut, L"***** %ls\n", file);
}
VOID PrintEndOfDiff(VOID)
{
ConPuts(StdOut, L"*****\n\n");
}
VOID PrintDots(VOID)
{
ConPuts(StdOut, L"...\n");
}
VOID PrintLineW(const FILECOMPARE *pFC, DWORD lineno, LPCWSTR psz)
{
if (pFC->dwFlags & FLAG_N)
ConPrintf(StdOut, L"%5d: %ls\n", lineno, psz);
else
ConPrintf(StdOut, L"%ls\n", psz);
}
VOID PrintLineA(const FILECOMPARE *pFC, DWORD lineno, LPCSTR psz)
{
if (pFC->dwFlags & FLAG_N)
ConPrintf(StdOut, L"%5d: %hs\n", lineno, psz);
else
ConPrintf(StdOut, L"%hs\n", psz);
}
HANDLE DoOpenFileForInput(LPCWSTR file)
{
HANDLE hFile = CreateFileW(file, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL);
if (hFile == INVALID_HANDLE_VALUE)
@ -97,54 +126,54 @@ static HANDLE DoOpenFileForInput(LPCWSTR file)
static FCRET BinaryFileCompare(FILECOMPARE *pFC)
{
FCRET ret;
HANDLE hFile1, hFile2, hMapping1 = NULL, hMapping2 = NULL;
LPBYTE pb1 = NULL, pb2 = NULL;
LARGE_INTEGER ib, cb1, cb2, cbCommon;
HANDLE hFile0, hFile1, hMapping0 = NULL, hMapping1 = NULL;
LPBYTE pb0 = NULL, pb1 = NULL;
LARGE_INTEGER ib, cb0, cb1, cbCommon;
DWORD cbView, ibView;
BOOL fDifferent = FALSE;
hFile1 = DoOpenFileForInput(pFC->file1);
if (hFile1 == INVALID_HANDLE_VALUE)
hFile0 = DoOpenFileForInput(pFC->file[0]);
if (hFile0 == INVALID_HANDLE_VALUE)
return FCRET_CANT_FIND;
hFile2 = DoOpenFileForInput(pFC->file2);
if (hFile2 == INVALID_HANDLE_VALUE)
hFile1 = DoOpenFileForInput(pFC->file[1]);
if (hFile1 == INVALID_HANDLE_VALUE)
{
CloseHandle(hFile1);
CloseHandle(hFile0);
return FCRET_CANT_FIND;
}
do
{
if (_wcsicmp(pFC->file1, pFC->file2) == 0)
if (_wcsicmp(pFC->file[0], pFC->file[1]) == 0)
{
ret = NoDifference();
break;
}
if (!GetFileSizeEx(hFile0, &cb0))
{
ret = CannotRead(pFC->file[0]);
break;
}
if (!GetFileSizeEx(hFile1, &cb1))
{
ret = CannotRead(pFC->file1);
ret = CannotRead(pFC->file[1]);
break;
}
if (!GetFileSizeEx(hFile2, &cb2))
{
ret = CannotRead(pFC->file2);
break;
}
cbCommon.QuadPart = min(cb1.QuadPart, cb2.QuadPart);
cbCommon.QuadPart = min(cb0.QuadPart, cb1.QuadPart);
if (cbCommon.QuadPart > 0)
{
hMapping0 = CreateFileMappingW(hFile0, NULL, PAGE_READONLY,
cb0.HighPart, cb0.LowPart, NULL);
if (hMapping0 == NULL)
{
ret = CannotRead(pFC->file[0]);
break;
}
hMapping1 = CreateFileMappingW(hFile1, NULL, PAGE_READONLY,
cb1.HighPart, cb1.LowPart, NULL);
if (hMapping1 == NULL)
{
ret = CannotRead(pFC->file1);
break;
}
hMapping2 = CreateFileMappingW(hFile2, NULL, PAGE_READONLY,
cb2.HighPart, cb2.LowPart, NULL);
if (hMapping2 == NULL)
{
ret = CannotRead(pFC->file2);
ret = CannotRead(pFC->file[1]);
break;
}
@ -152,197 +181,121 @@ static FCRET BinaryFileCompare(FILECOMPARE *pFC)
for (ib.QuadPart = 0; ib.QuadPart < cbCommon.QuadPart; )
{
cbView = (DWORD)min(cbCommon.QuadPart - ib.QuadPart, MAX_VIEW_SIZE);
pb0 = MapViewOfFile(hMapping0, FILE_MAP_READ, ib.HighPart, ib.LowPart, cbView);
pb1 = MapViewOfFile(hMapping1, FILE_MAP_READ, ib.HighPart, ib.LowPart, cbView);
pb2 = MapViewOfFile(hMapping2, FILE_MAP_READ, ib.HighPart, ib.LowPart, cbView);
if (!pb1 || !pb2)
if (!pb0 || !pb1)
{
ret = OutOfMemory();
break;
}
for (ibView = 0; ibView < cbView; ++ib.QuadPart, ++ibView)
{
if (pb1[ibView] == pb2[ibView])
if (pb0[ibView] == pb1[ibView])
continue;
fDifferent = TRUE;
if (cbCommon.QuadPart > MAXDWORD)
{
ConPrintf(StdOut, L"%016I64X: %02X %02X\n", ib.QuadPart,
pb1[ibView], pb2[ibView]);
pb0[ibView], pb1[ibView]);
}
else
{
ConPrintf(StdOut, L"%08lX: %02X %02X\n", ib.LowPart,
pb1[ibView], pb2[ibView]);
pb0[ibView], pb1[ibView]);
}
}
UnmapViewOfFile(pb0);
UnmapViewOfFile(pb1);
UnmapViewOfFile(pb2);
pb1 = pb2 = NULL;
pb0 = pb1 = NULL;
}
if (ret != FCRET_IDENTICAL)
break;
}
if (cb1.QuadPart < cb2.QuadPart)
ret = LongerThan(pFC->file2, pFC->file1);
else if (cb1.QuadPart > cb2.QuadPart)
ret = LongerThan(pFC->file1, pFC->file2);
if (cb0.QuadPart < cb1.QuadPart)
ret = LongerThan(pFC->file[1], pFC->file[0]);
else if (cb0.QuadPart > cb1.QuadPart)
ret = LongerThan(pFC->file[0], pFC->file[1]);
else if (fDifferent)
ret = Different(pFC->file1, pFC->file2);
ret = Different(pFC->file[0], pFC->file[1]);
else
ret = NoDifference();
} while (0);
UnmapViewOfFile(pb0);
UnmapViewOfFile(pb1);
UnmapViewOfFile(pb2);
CloseHandle(hMapping0);
CloseHandle(hMapping1);
CloseHandle(hMapping2);
CloseHandle(hFile0);
CloseHandle(hFile1);
CloseHandle(hFile2);
return ret;
}
static FCRET
UnicodeTextCompare(FILECOMPARE *pFC, HANDLE hMapping1, const LARGE_INTEGER *pcb1,
HANDLE hMapping2, const LARGE_INTEGER *pcb2)
{
FCRET ret;
BOOL fIgnoreCase = !!(pFC->dwFlags & FLAG_C);
DWORD dwCmpFlags = (fIgnoreCase ? NORM_IGNORECASE : 0);
LPCWSTR psz1, psz2;
LARGE_INTEGER cch1 = { .QuadPart = pcb1->QuadPart / sizeof(WCHAR) };
LARGE_INTEGER cch2 = { .QuadPart = pcb1->QuadPart / sizeof(WCHAR) };
do
{
psz1 = MapViewOfFile(hMapping1, FILE_MAP_READ, 0, 0, pcb1->LowPart);
psz2 = MapViewOfFile(hMapping2, FILE_MAP_READ, 0, 0, pcb2->LowPart);
if (!psz1 || !psz2)
{
ret = OutOfMemory();
break;
}
if (cch1.QuadPart < MAXLONG && cch2.QuadPart < MAXLONG)
{
if (CompareStringW(0, dwCmpFlags, psz1, cch1.LowPart,
psz2, cch2.LowPart) == CSTR_EQUAL)
{
ret = NoDifference();
break;
}
}
// TODO: compare each lines
// TODO: large file support
ret = Different(pFC->file1, pFC->file2);
} while (0);
UnmapViewOfFile(psz1);
UnmapViewOfFile(psz2);
return ret;
}
static FCRET
AnsiTextCompare(FILECOMPARE *pFC, HANDLE hMapping1, const LARGE_INTEGER *pcb1,
HANDLE hMapping2, const LARGE_INTEGER *pcb2)
{
FCRET ret;
BOOL fIgnoreCase = !!(pFC->dwFlags & FLAG_C);
DWORD dwCmpFlags = (fIgnoreCase ? NORM_IGNORECASE : 0);
LPSTR psz1, psz2;
do
{
psz1 = MapViewOfFile(hMapping1, FILE_MAP_READ, 0, 0, pcb1->LowPart);
psz2 = MapViewOfFile(hMapping2, FILE_MAP_READ, 0, 0, pcb2->LowPart);
if (!psz1 || !psz2)
{
ret = OutOfMemory();
break;
}
if (pcb1->QuadPart < MAXLONG && pcb2->QuadPart < MAXLONG)
{
if (CompareStringA(0, dwCmpFlags, psz1, pcb1->LowPart,
psz2, pcb2->LowPart) == CSTR_EQUAL)
{
ret = NoDifference();
break;
}
}
// TODO: compare each lines
// TODO: large file support
ret = Different(pFC->file1, pFC->file2);
} while (0);
UnmapViewOfFile(psz1);
UnmapViewOfFile(psz2);
return ret;
}
static FCRET TextFileCompare(FILECOMPARE *pFC)
{
FCRET ret;
HANDLE hFile1, hFile2, hMapping1 = NULL, hMapping2 = NULL;
LARGE_INTEGER cb1, cb2;
HANDLE hFile0, hFile1, hMapping0 = NULL, hMapping1 = NULL;
LARGE_INTEGER cb0, cb1;
BOOL fUnicode = !!(pFC->dwFlags & FLAG_U);
hFile1 = DoOpenFileForInput(pFC->file1);
if (hFile1 == INVALID_HANDLE_VALUE)
hFile0 = DoOpenFileForInput(pFC->file[0]);
if (hFile0 == INVALID_HANDLE_VALUE)
return FCRET_CANT_FIND;
hFile2 = DoOpenFileForInput(pFC->file2);
if (hFile2 == INVALID_HANDLE_VALUE)
hFile1 = DoOpenFileForInput(pFC->file[1]);
if (hFile1 == INVALID_HANDLE_VALUE)
{
CloseHandle(hFile1);
CloseHandle(hFile0);
return FCRET_CANT_FIND;
}
do
{
if (_wcsicmp(pFC->file1, pFC->file2) == 0)
if (_wcsicmp(pFC->file[0], pFC->file[1]) == 0)
{
ret = NoDifference();
break;
}
if (!GetFileSizeEx(hFile0, &cb0))
{
ret = CannotRead(pFC->file[0]);
break;
}
if (!GetFileSizeEx(hFile1, &cb1))
{
ret = CannotRead(pFC->file1);
ret = CannotRead(pFC->file[1]);
break;
}
if (!GetFileSizeEx(hFile2, &cb2))
{
ret = CannotRead(pFC->file2);
break;
}
if (cb1.QuadPart == 0 && cb2.QuadPart == 0)
if (cb0.QuadPart == 0 && cb1.QuadPart == 0)
{
ret = NoDifference();
break;
}
hMapping0 = CreateFileMappingW(hFile0, NULL, PAGE_READONLY,
cb0.HighPart, cb0.LowPart, NULL);
if (hMapping0 == NULL)
{
ret = CannotRead(pFC->file[0]);
break;
}
hMapping1 = CreateFileMappingW(hFile1, NULL, PAGE_READONLY,
cb1.HighPart, cb1.LowPart, NULL);
if (hMapping1 == NULL)
{
ret = CannotRead(pFC->file1);
break;
}
hMapping2 = CreateFileMappingW(hFile2, NULL, PAGE_READONLY,
cb2.HighPart, cb2.LowPart, NULL);
if (hMapping2 == NULL)
{
ret = CannotRead(pFC->file2);
ret = CannotRead(pFC->file[1]);
break;
}
if (fUnicode)
ret = UnicodeTextCompare(pFC, hMapping1, &cb1, hMapping2, &cb2);
ret = TextCompareW(pFC, &hMapping0, &cb0, &hMapping1, &cb1);
else
ret = AnsiTextCompare(pFC, hMapping1, &cb1, hMapping2, &cb2);
ret = TextCompareA(pFC, &hMapping0, &cb0, &hMapping1, &cb1);
} while (0);
CloseHandle(hMapping0);
CloseHandle(hMapping1);
CloseHandle(hMapping2);
CloseHandle(hFile0);
CloseHandle(hFile1);
CloseHandle(hFile2);
return ret;
}
@ -352,17 +305,17 @@ static BOOL IsBinaryExt(LPCWSTR filename)
// See also: https://docs.microsoft.com/en-us/windows-server/administration/windows-commands/fc
static const LPCWSTR s_exts[] = { L"EXE", L"COM", L"SYS", L"OBJ", L"LIB", L"BIN" };
size_t iext;
LPCWSTR pch, ext, pch1 = wcsrchr(filename, L'\\'), pch2 = wcsrchr(filename, L'/');
if (!pch1 && !pch2)
LPCWSTR pch, ext, pch0 = wcsrchr(filename, L'\\'), pch1 = wcsrchr(filename, L'/');
if (!pch0 && !pch1)
pch = filename;
else if (!pch1 && pch2)
pch = pch2;
else if (pch1 && !pch2)
else if (!pch0 && pch1)
pch = pch1;
else if (pch0 && !pch1)
pch = pch0;
else if (pch0 < pch1)
pch = pch1;
else if (pch1 < pch2)
pch = pch2;
else
pch = pch1;
pch = pch0;
ext = wcsrchr(pch, L'.');
if (ext)
@ -382,10 +335,10 @@ static BOOL IsBinaryExt(LPCWSTR filename)
static FCRET FileCompare(FILECOMPARE *pFC)
{
ConResPrintf(StdOut, IDS_COMPARING, pFC->file1, pFC->file2);
ConResPrintf(StdOut, IDS_COMPARING, pFC->file[0], pFC->file[1]);
if (!(pFC->dwFlags & FLAG_L) &&
((pFC->dwFlags & FLAG_B) || IsBinaryExt(pFC->file1) || IsBinaryExt(pFC->file2)))
((pFC->dwFlags & FLAG_B) || IsBinaryExt(pFC->file[0]) || IsBinaryExt(pFC->file[1])))
{
return BinaryFileCompare(pFC);
}
@ -394,25 +347,29 @@ static FCRET FileCompare(FILECOMPARE *pFC)
static FCRET WildcardFileCompare(FILECOMPARE *pFC)
{
FCRET ret;
if (pFC->dwFlags & FLAG_HELP)
{
ConResPuts(StdOut, IDS_USAGE);
return FCRET_INVALID;
}
if (!pFC->file1 || !pFC->file2)
if (!pFC->file[0] || !pFC->file[1])
{
ConResPuts(StdErr, IDS_NEEDS_FILES);
return FCRET_INVALID;
}
if (HasWildcard(pFC->file1) || HasWildcard(pFC->file2))
if (HasWildcard(pFC->file[0]) || HasWildcard(pFC->file[1]))
{
// TODO: wildcard
ConResPuts(StdErr, IDS_CANT_USE_WILDCARD);
}
return FileCompare(pFC);
ret = FileCompare(pFC);
ConPuts(StdOut, L"\n");
return ret;
}
int wmain(int argc, WCHAR **argv)
@ -428,10 +385,10 @@ int wmain(int argc, WCHAR **argv)
{
if (argv[i][0] != L'/')
{
if (!fc.file1)
fc.file1 = argv[i];
else if (!fc.file2)
fc.file2 = argv[i];
if (!fc.file[0])
fc.file[0] = argv[i];
else if (!fc.file[1])
fc.file[1] = argv[i];
else
return InvalidSwitch();
continue;
@ -479,6 +436,9 @@ int wmain(int argc, WCHAR **argv)
case L'T':
fc.dwFlags |= FLAG_T;
break;
case L'U':
fc.dwFlags |= FLAG_U;
break;
case L'W':
fc.dwFlags |= FLAG_W;
break;
@ -498,3 +458,14 @@ int wmain(int argc, WCHAR **argv)
}
return WildcardFileCompare(&fc);
}
#ifndef __REACTOS__
int main(int argc, char **argv)
{
INT my_argc;
LPWSTR *my_argv = CommandLineToArgvW(GetCommandLineW(), &my_argc);
INT ret = wmain(my_argc, my_argv);
LocalFree(my_argv);
return ret;
}
#endif