[CMDUTILS][FC] Implement text file comparison (#3625)

Implement text file comparison by using file mappings (both Unicode and ANSI). CORE-17500
This commit is contained in:
Katayama Hirofumi MZ 2021-05-04 18:05:57 +09:00 committed by GitHub
parent 5dd93a8a5b
commit d2c47132ad
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 913 additions and 211 deletions

View file

@ -1,7 +1,7 @@
include_directories(${REACTOS_SOURCE_DIR}/sdk/lib/conutils)
add_executable(fc fc.c fc.rc)
add_executable(fc fc.c texta.c textw.c fc.rc)
set_module_type(fc win32cui UNICODE)
target_link_libraries(fc conutils ${PSEH_LIB})
add_importlibs(fc msvcrt user32 kernel32)
target_link_libraries(fc conutils wine ${PSEH_LIB})
add_importlibs(fc msvcrt user32 kernel32 ntdll)
add_cd_file(TARGET fc DESTINATION reactos/system32 FOR all)

View file

@ -4,87 +4,116 @@
* PURPOSE: Comparing files
* COPYRIGHT: Copyright 2021 Katayama Hirofumi MZ (katayama.hirofumi.mz@gmail.com)
*/
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <windef.h>
#include <winbase.h>
#include <winuser.h>
#include <winnls.h>
#include <conutils.h>
#include "resource.h"
#include "fc.h"
// See also: https://stackoverflow.com/questions/33125766/compare-files-with-a-cmd
typedef enum FCRET { // return code of FC command
FCRET_INVALID = -1,
FCRET_IDENTICAL = 0,
FCRET_DIFFERENT = 1,
FCRET_CANT_FIND = 2
} FCRET;
#ifdef _WIN64
#define MAX_VIEW_SIZE (256 * 1024 * 1024) // 256 MB
#ifdef __REACTOS__
#include <conutils.h>
#else
#define MAX_VIEW_SIZE (64 * 1024 * 1024) // 64 MB
#include <stdio.h>
#define ConInitStdStreams() /* empty */
#define StdOut stdout
#define StdErr stderr
void ConPuts(FILE *fp, LPCWSTR psz)
{
fputws(psz, fp);
}
void ConPrintf(FILE *fp, LPCWSTR psz, ...)
{
va_list va;
va_start(va, psz);
vfwprintf(fp, psz, va);
va_end(va);
}
void ConResPuts(FILE *fp, UINT nID)
{
WCHAR sz[MAX_PATH];
LoadStringW(NULL, nID, sz, _countof(sz));
fputws(sz, fp);
}
void ConResPrintf(FILE *fp, UINT nID, ...)
{
va_list va;
WCHAR sz[MAX_PATH];
va_start(va, nID);
LoadStringW(NULL, nID, sz, _countof(sz));
vfwprintf(fp, sz, va);
va_end(va);
}
#endif
#define FLAG_A (1 << 0)
#define FLAG_B (1 << 1)
#define FLAG_C (1 << 2)
#define FLAG_L (1 << 3)
#define FLAG_LBn (1 << 4)
#define FLAG_N (1 << 5)
#define FLAG_OFFLINE (1 << 6)
#define FLAG_T (1 << 7)
#define FLAG_U (1 << 8)
#define FLAG_W (1 << 9)
#define FLAG_nnnn (1 << 10)
#define FLAG_HELP (1 << 11)
typedef struct FILECOMPARE
{
DWORD dwFlags; // FLAG_...
INT n, nnnn;
LPCWSTR file1, file2;
} FILECOMPARE;
static FCRET NoDifference(VOID)
FCRET NoDifference(VOID)
{
ConResPuts(StdOut, IDS_NO_DIFFERENCE);
return FCRET_IDENTICAL;
}
static FCRET Different(LPCWSTR file1, LPCWSTR file2)
FCRET Different(LPCWSTR file0, LPCWSTR file1)
{
ConResPrintf(StdOut, IDS_DIFFERENT, file1, file2);
ConResPrintf(StdOut, IDS_DIFFERENT, file0, file1);
return FCRET_DIFFERENT;
}
static FCRET LongerThan(LPCWSTR file1, LPCWSTR file2)
FCRET LongerThan(LPCWSTR file0, LPCWSTR file1)
{
ConResPrintf(StdOut, IDS_LONGER_THAN, file1, file2);
ConResPrintf(StdOut, IDS_LONGER_THAN, file0, file1);
return FCRET_DIFFERENT;
}
static FCRET OutOfMemory(VOID)
FCRET OutOfMemory(VOID)
{
ConResPuts(StdErr, IDS_OUT_OF_MEMORY);
return FCRET_INVALID;
}
static FCRET CannotRead(LPCWSTR file)
FCRET CannotRead(LPCWSTR file)
{
ConResPrintf(StdErr, IDS_CANNOT_READ, file);
return FCRET_INVALID;
}
static FCRET InvalidSwitch(VOID)
FCRET InvalidSwitch(VOID)
{
ConResPuts(StdErr, IDS_INVALID_SWITCH);
return FCRET_INVALID;
}
static HANDLE DoOpenFileForInput(LPCWSTR file)
FCRET ResyncFailed(VOID)
{
ConResPuts(StdOut, IDS_RESYNC_FAILED);
return FCRET_DIFFERENT;
}
VOID PrintCaption(LPCWSTR file)
{
ConPrintf(StdOut, L"***** %ls\n", file);
}
VOID PrintEndOfDiff(VOID)
{
ConPuts(StdOut, L"*****\n\n");
}
VOID PrintDots(VOID)
{
ConPuts(StdOut, L"...\n");
}
VOID PrintLineW(const FILECOMPARE *pFC, DWORD lineno, LPCWSTR psz)
{
if (pFC->dwFlags & FLAG_N)
ConPrintf(StdOut, L"%5d: %ls\n", lineno, psz);
else
ConPrintf(StdOut, L"%ls\n", psz);
}
VOID PrintLineA(const FILECOMPARE *pFC, DWORD lineno, LPCSTR psz)
{
if (pFC->dwFlags & FLAG_N)
ConPrintf(StdOut, L"%5d: %hs\n", lineno, psz);
else
ConPrintf(StdOut, L"%hs\n", psz);
}
HANDLE DoOpenFileForInput(LPCWSTR file)
{
HANDLE hFile = CreateFileW(file, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL);
if (hFile == INVALID_HANDLE_VALUE)
@ -97,54 +126,54 @@ static HANDLE DoOpenFileForInput(LPCWSTR file)
static FCRET BinaryFileCompare(FILECOMPARE *pFC)
{
FCRET ret;
HANDLE hFile1, hFile2, hMapping1 = NULL, hMapping2 = NULL;
LPBYTE pb1 = NULL, pb2 = NULL;
LARGE_INTEGER ib, cb1, cb2, cbCommon;
HANDLE hFile0, hFile1, hMapping0 = NULL, hMapping1 = NULL;
LPBYTE pb0 = NULL, pb1 = NULL;
LARGE_INTEGER ib, cb0, cb1, cbCommon;
DWORD cbView, ibView;
BOOL fDifferent = FALSE;
hFile1 = DoOpenFileForInput(pFC->file1);
if (hFile1 == INVALID_HANDLE_VALUE)
hFile0 = DoOpenFileForInput(pFC->file[0]);
if (hFile0 == INVALID_HANDLE_VALUE)
return FCRET_CANT_FIND;
hFile2 = DoOpenFileForInput(pFC->file2);
if (hFile2 == INVALID_HANDLE_VALUE)
hFile1 = DoOpenFileForInput(pFC->file[1]);
if (hFile1 == INVALID_HANDLE_VALUE)
{
CloseHandle(hFile1);
CloseHandle(hFile0);
return FCRET_CANT_FIND;
}
do
{
if (_wcsicmp(pFC->file1, pFC->file2) == 0)
if (_wcsicmp(pFC->file[0], pFC->file[1]) == 0)
{
ret = NoDifference();
break;
}
if (!GetFileSizeEx(hFile0, &cb0))
{
ret = CannotRead(pFC->file[0]);
break;
}
if (!GetFileSizeEx(hFile1, &cb1))
{
ret = CannotRead(pFC->file1);
ret = CannotRead(pFC->file[1]);
break;
}
if (!GetFileSizeEx(hFile2, &cb2))
{
ret = CannotRead(pFC->file2);
break;
}
cbCommon.QuadPart = min(cb1.QuadPart, cb2.QuadPart);
cbCommon.QuadPart = min(cb0.QuadPart, cb1.QuadPart);
if (cbCommon.QuadPart > 0)
{
hMapping0 = CreateFileMappingW(hFile0, NULL, PAGE_READONLY,
cb0.HighPart, cb0.LowPart, NULL);
if (hMapping0 == NULL)
{
ret = CannotRead(pFC->file[0]);
break;
}
hMapping1 = CreateFileMappingW(hFile1, NULL, PAGE_READONLY,
cb1.HighPart, cb1.LowPart, NULL);
if (hMapping1 == NULL)
{
ret = CannotRead(pFC->file1);
break;
}
hMapping2 = CreateFileMappingW(hFile2, NULL, PAGE_READONLY,
cb2.HighPart, cb2.LowPart, NULL);
if (hMapping2 == NULL)
{
ret = CannotRead(pFC->file2);
ret = CannotRead(pFC->file[1]);
break;
}
@ -152,197 +181,121 @@ static FCRET BinaryFileCompare(FILECOMPARE *pFC)
for (ib.QuadPart = 0; ib.QuadPart < cbCommon.QuadPart; )
{
cbView = (DWORD)min(cbCommon.QuadPart - ib.QuadPart, MAX_VIEW_SIZE);
pb0 = MapViewOfFile(hMapping0, FILE_MAP_READ, ib.HighPart, ib.LowPart, cbView);
pb1 = MapViewOfFile(hMapping1, FILE_MAP_READ, ib.HighPart, ib.LowPart, cbView);
pb2 = MapViewOfFile(hMapping2, FILE_MAP_READ, ib.HighPart, ib.LowPart, cbView);
if (!pb1 || !pb2)
if (!pb0 || !pb1)
{
ret = OutOfMemory();
break;
}
for (ibView = 0; ibView < cbView; ++ib.QuadPart, ++ibView)
{
if (pb1[ibView] == pb2[ibView])
if (pb0[ibView] == pb1[ibView])
continue;
fDifferent = TRUE;
if (cbCommon.QuadPart > MAXDWORD)
{
ConPrintf(StdOut, L"%016I64X: %02X %02X\n", ib.QuadPart,
pb1[ibView], pb2[ibView]);
pb0[ibView], pb1[ibView]);
}
else
{
ConPrintf(StdOut, L"%08lX: %02X %02X\n", ib.LowPart,
pb1[ibView], pb2[ibView]);
pb0[ibView], pb1[ibView]);
}
}
UnmapViewOfFile(pb0);
UnmapViewOfFile(pb1);
UnmapViewOfFile(pb2);
pb1 = pb2 = NULL;
pb0 = pb1 = NULL;
}
if (ret != FCRET_IDENTICAL)
break;
}
if (cb1.QuadPart < cb2.QuadPart)
ret = LongerThan(pFC->file2, pFC->file1);
else if (cb1.QuadPart > cb2.QuadPart)
ret = LongerThan(pFC->file1, pFC->file2);
if (cb0.QuadPart < cb1.QuadPart)
ret = LongerThan(pFC->file[1], pFC->file[0]);
else if (cb0.QuadPart > cb1.QuadPart)
ret = LongerThan(pFC->file[0], pFC->file[1]);
else if (fDifferent)
ret = Different(pFC->file1, pFC->file2);
ret = Different(pFC->file[0], pFC->file[1]);
else
ret = NoDifference();
} while (0);
UnmapViewOfFile(pb0);
UnmapViewOfFile(pb1);
UnmapViewOfFile(pb2);
CloseHandle(hMapping0);
CloseHandle(hMapping1);
CloseHandle(hMapping2);
CloseHandle(hFile0);
CloseHandle(hFile1);
CloseHandle(hFile2);
return ret;
}
static FCRET
UnicodeTextCompare(FILECOMPARE *pFC, HANDLE hMapping1, const LARGE_INTEGER *pcb1,
HANDLE hMapping2, const LARGE_INTEGER *pcb2)
{
FCRET ret;
BOOL fIgnoreCase = !!(pFC->dwFlags & FLAG_C);
DWORD dwCmpFlags = (fIgnoreCase ? NORM_IGNORECASE : 0);
LPCWSTR psz1, psz2;
LARGE_INTEGER cch1 = { .QuadPart = pcb1->QuadPart / sizeof(WCHAR) };
LARGE_INTEGER cch2 = { .QuadPart = pcb1->QuadPart / sizeof(WCHAR) };
do
{
psz1 = MapViewOfFile(hMapping1, FILE_MAP_READ, 0, 0, pcb1->LowPart);
psz2 = MapViewOfFile(hMapping2, FILE_MAP_READ, 0, 0, pcb2->LowPart);
if (!psz1 || !psz2)
{
ret = OutOfMemory();
break;
}
if (cch1.QuadPart < MAXLONG && cch2.QuadPart < MAXLONG)
{
if (CompareStringW(0, dwCmpFlags, psz1, cch1.LowPart,
psz2, cch2.LowPart) == CSTR_EQUAL)
{
ret = NoDifference();
break;
}
}
// TODO: compare each lines
// TODO: large file support
ret = Different(pFC->file1, pFC->file2);
} while (0);
UnmapViewOfFile(psz1);
UnmapViewOfFile(psz2);
return ret;
}
static FCRET
AnsiTextCompare(FILECOMPARE *pFC, HANDLE hMapping1, const LARGE_INTEGER *pcb1,
HANDLE hMapping2, const LARGE_INTEGER *pcb2)
{
FCRET ret;
BOOL fIgnoreCase = !!(pFC->dwFlags & FLAG_C);
DWORD dwCmpFlags = (fIgnoreCase ? NORM_IGNORECASE : 0);
LPSTR psz1, psz2;
do
{
psz1 = MapViewOfFile(hMapping1, FILE_MAP_READ, 0, 0, pcb1->LowPart);
psz2 = MapViewOfFile(hMapping2, FILE_MAP_READ, 0, 0, pcb2->LowPart);
if (!psz1 || !psz2)
{
ret = OutOfMemory();
break;
}
if (pcb1->QuadPart < MAXLONG && pcb2->QuadPart < MAXLONG)
{
if (CompareStringA(0, dwCmpFlags, psz1, pcb1->LowPart,
psz2, pcb2->LowPart) == CSTR_EQUAL)
{
ret = NoDifference();
break;
}
}
// TODO: compare each lines
// TODO: large file support
ret = Different(pFC->file1, pFC->file2);
} while (0);
UnmapViewOfFile(psz1);
UnmapViewOfFile(psz2);
return ret;
}
static FCRET TextFileCompare(FILECOMPARE *pFC)
{
FCRET ret;
HANDLE hFile1, hFile2, hMapping1 = NULL, hMapping2 = NULL;
LARGE_INTEGER cb1, cb2;
HANDLE hFile0, hFile1, hMapping0 = NULL, hMapping1 = NULL;
LARGE_INTEGER cb0, cb1;
BOOL fUnicode = !!(pFC->dwFlags & FLAG_U);
hFile1 = DoOpenFileForInput(pFC->file1);
if (hFile1 == INVALID_HANDLE_VALUE)
hFile0 = DoOpenFileForInput(pFC->file[0]);
if (hFile0 == INVALID_HANDLE_VALUE)
return FCRET_CANT_FIND;
hFile2 = DoOpenFileForInput(pFC->file2);
if (hFile2 == INVALID_HANDLE_VALUE)
hFile1 = DoOpenFileForInput(pFC->file[1]);
if (hFile1 == INVALID_HANDLE_VALUE)
{
CloseHandle(hFile1);
CloseHandle(hFile0);
return FCRET_CANT_FIND;
}
do
{
if (_wcsicmp(pFC->file1, pFC->file2) == 0)
if (_wcsicmp(pFC->file[0], pFC->file[1]) == 0)
{
ret = NoDifference();
break;
}
if (!GetFileSizeEx(hFile0, &cb0))
{
ret = CannotRead(pFC->file[0]);
break;
}
if (!GetFileSizeEx(hFile1, &cb1))
{
ret = CannotRead(pFC->file1);
ret = CannotRead(pFC->file[1]);
break;
}
if (!GetFileSizeEx(hFile2, &cb2))
{
ret = CannotRead(pFC->file2);
break;
}
if (cb1.QuadPart == 0 && cb2.QuadPart == 0)
if (cb0.QuadPart == 0 && cb1.QuadPart == 0)
{
ret = NoDifference();
break;
}
hMapping0 = CreateFileMappingW(hFile0, NULL, PAGE_READONLY,
cb0.HighPart, cb0.LowPart, NULL);
if (hMapping0 == NULL)
{
ret = CannotRead(pFC->file[0]);
break;
}
hMapping1 = CreateFileMappingW(hFile1, NULL, PAGE_READONLY,
cb1.HighPart, cb1.LowPart, NULL);
if (hMapping1 == NULL)
{
ret = CannotRead(pFC->file1);
break;
}
hMapping2 = CreateFileMappingW(hFile2, NULL, PAGE_READONLY,
cb2.HighPart, cb2.LowPart, NULL);
if (hMapping2 == NULL)
{
ret = CannotRead(pFC->file2);
ret = CannotRead(pFC->file[1]);
break;
}
if (fUnicode)
ret = UnicodeTextCompare(pFC, hMapping1, &cb1, hMapping2, &cb2);
ret = TextCompareW(pFC, &hMapping0, &cb0, &hMapping1, &cb1);
else
ret = AnsiTextCompare(pFC, hMapping1, &cb1, hMapping2, &cb2);
ret = TextCompareA(pFC, &hMapping0, &cb0, &hMapping1, &cb1);
} while (0);
CloseHandle(hMapping0);
CloseHandle(hMapping1);
CloseHandle(hMapping2);
CloseHandle(hFile0);
CloseHandle(hFile1);
CloseHandle(hFile2);
return ret;
}
@ -352,17 +305,17 @@ static BOOL IsBinaryExt(LPCWSTR filename)
// See also: https://docs.microsoft.com/en-us/windows-server/administration/windows-commands/fc
static const LPCWSTR s_exts[] = { L"EXE", L"COM", L"SYS", L"OBJ", L"LIB", L"BIN" };
size_t iext;
LPCWSTR pch, ext, pch1 = wcsrchr(filename, L'\\'), pch2 = wcsrchr(filename, L'/');
if (!pch1 && !pch2)
LPCWSTR pch, ext, pch0 = wcsrchr(filename, L'\\'), pch1 = wcsrchr(filename, L'/');
if (!pch0 && !pch1)
pch = filename;
else if (!pch1 && pch2)
pch = pch2;
else if (pch1 && !pch2)
else if (!pch0 && pch1)
pch = pch1;
else if (pch0 && !pch1)
pch = pch0;
else if (pch0 < pch1)
pch = pch1;
else if (pch1 < pch2)
pch = pch2;
else
pch = pch1;
pch = pch0;
ext = wcsrchr(pch, L'.');
if (ext)
@ -382,10 +335,10 @@ static BOOL IsBinaryExt(LPCWSTR filename)
static FCRET FileCompare(FILECOMPARE *pFC)
{
ConResPrintf(StdOut, IDS_COMPARING, pFC->file1, pFC->file2);
ConResPrintf(StdOut, IDS_COMPARING, pFC->file[0], pFC->file[1]);
if (!(pFC->dwFlags & FLAG_L) &&
((pFC->dwFlags & FLAG_B) || IsBinaryExt(pFC->file1) || IsBinaryExt(pFC->file2)))
((pFC->dwFlags & FLAG_B) || IsBinaryExt(pFC->file[0]) || IsBinaryExt(pFC->file[1])))
{
return BinaryFileCompare(pFC);
}
@ -394,25 +347,29 @@ static FCRET FileCompare(FILECOMPARE *pFC)
static FCRET WildcardFileCompare(FILECOMPARE *pFC)
{
FCRET ret;
if (pFC->dwFlags & FLAG_HELP)
{
ConResPuts(StdOut, IDS_USAGE);
return FCRET_INVALID;
}
if (!pFC->file1 || !pFC->file2)
if (!pFC->file[0] || !pFC->file[1])
{
ConResPuts(StdErr, IDS_NEEDS_FILES);
return FCRET_INVALID;
}
if (HasWildcard(pFC->file1) || HasWildcard(pFC->file2))
if (HasWildcard(pFC->file[0]) || HasWildcard(pFC->file[1]))
{
// TODO: wildcard
ConResPuts(StdErr, IDS_CANT_USE_WILDCARD);
}
return FileCompare(pFC);
ret = FileCompare(pFC);
ConPuts(StdOut, L"\n");
return ret;
}
int wmain(int argc, WCHAR **argv)
@ -428,10 +385,10 @@ int wmain(int argc, WCHAR **argv)
{
if (argv[i][0] != L'/')
{
if (!fc.file1)
fc.file1 = argv[i];
else if (!fc.file2)
fc.file2 = argv[i];
if (!fc.file[0])
fc.file[0] = argv[i];
else if (!fc.file[1])
fc.file[1] = argv[i];
else
return InvalidSwitch();
continue;
@ -479,6 +436,9 @@ int wmain(int argc, WCHAR **argv)
case L'T':
fc.dwFlags |= FLAG_T;
break;
case L'U':
fc.dwFlags |= FLAG_U;
break;
case L'W':
fc.dwFlags |= FLAG_W;
break;
@ -498,3 +458,14 @@ int wmain(int argc, WCHAR **argv)
}
return WildcardFileCompare(&fc);
}
#ifndef __REACTOS__
int main(int argc, char **argv)
{
INT my_argc;
LPWSTR *my_argv = CommandLineToArgvW(GetCommandLineW(), &my_argc);
INT ret = wmain(my_argc, my_argv);
LocalFree(my_argv);
return ret;
}
#endif

View file

@ -0,0 +1,97 @@
/*
* PROJECT: ReactOS FC Command
* LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later)
* PURPOSE: Comparing files
* COPYRIGHT: Copyright 2021 Katayama Hirofumi MZ (katayama.hirofumi.mz@gmail.com)
*/
#pragma once
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#ifdef __REACTOS__
#include <windef.h>
#include <winbase.h>
#include <winuser.h>
#include <winnls.h>
#else
#include <windows.h>
#endif
#include <wine/list.h>
#include "resource.h"
// See also: https://stackoverflow.com/questions/33125766/compare-files-with-a-cmd
typedef enum FCRET // return code of FC command
{
FCRET_INVALID = -1,
FCRET_IDENTICAL = 0,
FCRET_DIFFERENT = 1,
FCRET_CANT_FIND = 2,
FCRET_NO_MORE_DATA = 3 // (extension)
} FCRET;
typedef struct NODE_W
{
struct list entry;
LPWSTR pszLine;
LPWSTR pszComp; // compressed
DWORD lineno;
DWORD hash;
} NODE_W;
typedef struct NODE_A
{
struct list entry;
LPSTR pszLine;
LPSTR pszComp; // compressed
DWORD lineno;
DWORD hash;
} NODE_A;
#define FLAG_A (1 << 0) // abbreviation
#define FLAG_B (1 << 1) // binary
#define FLAG_C (1 << 2) // ignore cases
#define FLAG_L (1 << 3) // ASCII mode
#define FLAG_LBn (1 << 4) // line buffers
#define FLAG_N (1 << 5) // show line numbers
#define FLAG_OFFLINE (1 << 6) // ???
#define FLAG_T (1 << 7) // prevent fc from converting tabs to spaces
#define FLAG_U (1 << 8) // Unicode
#define FLAG_W (1 << 9) // compress white space
#define FLAG_nnnn (1 << 10) // ???
#define FLAG_HELP (1 << 11) // show usage
typedef struct FILECOMPARE
{
DWORD dwFlags; // FLAG_...
INT n; // # of line buffers
INT nnnn; // retry count before resynch
LPCWSTR file[2];
struct list list[2];
} FILECOMPARE;
// text.h
FCRET TextCompareW(FILECOMPARE *pFC,
HANDLE *phMapping0, const LARGE_INTEGER *pcb0,
HANDLE *phMapping1, const LARGE_INTEGER *pcb1);
FCRET TextCompareA(FILECOMPARE *pFC,
HANDLE *phMapping0, const LARGE_INTEGER *pcb0,
HANDLE *phMapping1, const LARGE_INTEGER *pcb1);
// fc.c
VOID PrintLineW(const FILECOMPARE *pFC, DWORD lineno, LPCWSTR psz);
VOID PrintLineA(const FILECOMPARE *pFC, DWORD lineno, LPCSTR psz);
VOID PrintCaption(LPCWSTR file);
VOID PrintEndOfDiff(VOID);
VOID PrintDots(VOID);
FCRET NoDifference(VOID);
FCRET Different(LPCWSTR file0, LPCWSTR file1);
FCRET LongerThan(LPCWSTR file0, LPCWSTR file1);
FCRET OutOfMemory(VOID);
FCRET CannotRead(LPCWSTR file);
FCRET InvalidSwitch(VOID);
FCRET ResyncFailed(VOID);
HANDLE DoOpenFileForInput(LPCWSTR file);
#ifdef _WIN64
#define MAX_VIEW_SIZE (256 * 1024 * 1024) // 256 MB
#else
#define MAX_VIEW_SIZE (64 * 1024 * 1024) // 64 MB
#endif

View file

@ -0,0 +1,620 @@
/*
* PROJECT: ReactOS FC Command
* LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later)
* PURPOSE: Comparing text files
* COPYRIGHT: Copyright 2021 Katayama Hirofumi MZ (katayama.hirofumi.mz@gmail.com)
*/
#include "fc.h"
#include <stdio.h>
#ifdef __REACTOS__
#include <wine/debug.h>
WINE_DEFAULT_DEBUG_CHANNEL(fc);
#else
#define ERR /*empty*/
#define WARN /*empty*/
#define TRACE /*empty*/
#endif
#define IS_SPACE(ch) ((ch) == TEXT(' ') || (ch) == TEXT('\t'))
#ifdef UNICODE
#define NODE NODE_W
#define PrintLine PrintLineW
#define TextCompare TextCompareW
#else
#define NODE NODE_A
#define PrintLine PrintLineA
#define TextCompare TextCompareA
#endif
static LPTSTR AllocLine(LPCTSTR pch, DWORD cch)
{
LPTSTR pszNew = malloc((cch + 1) * sizeof(TCHAR));
if (!pszNew)
return NULL;
memcpy(pszNew, pch, cch * sizeof(TCHAR));
pszNew[cch] = 0;
return pszNew;
}
static NODE *AllocNode(LPTSTR psz, DWORD lineno)
{
NODE *node;
if (!psz)
return NULL;
node = calloc(1, sizeof(NODE));
if (!node)
{
free(psz);
return NULL;
}
node->pszLine = psz;
node->lineno = lineno;
return node;
}
static __inline VOID DeleteNode(NODE *node)
{
if (node)
{
free(node->pszLine);
free(node->pszComp);
free(node);
}
}
static VOID DeleteList(struct list *list)
{
struct list *ptr;
NODE *node;
while ((ptr = list_head(list)) != NULL)
{
list_remove(ptr);
node = LIST_ENTRY(ptr, NODE, entry);
DeleteNode(node);
}
}
static __inline LPCTSTR SkipSpace(LPCTSTR pch)
{
while (IS_SPACE(*pch))
++pch;
return pch;
}
static __inline LPCTSTR FindLastNonSpace(LPCTSTR pch)
{
LPCTSTR pchLast = NULL;
while (*pch)
{
if (!IS_SPACE(*pch))
pchLast = pch;
++pch;
}
return pchLast;
}
static VOID DeleteDuplicateSpaces(LPTSTR psz)
{
LPTSTR pch0, pch1;
for (pch0 = pch1 = psz; *pch0; ++pch0)
{
*pch1++ = *pch0;
if (IS_SPACE(*pch0))
{
do
{
++pch0;
} while (IS_SPACE(*pch0));
--pch0;
}
}
*pch1 = 0;
}
static LPTSTR CompressSpace(LPCTSTR line)
{
LPTSTR pszNew;
LPCTSTR pchLast;
line = SkipSpace(line);
pchLast = FindLastNonSpace(line);
if (pchLast == NULL)
return AllocLine(NULL, 0);
pszNew = AllocLine(line, (DWORD)(pchLast - line) + 1);
if (!pszNew)
return NULL;
DeleteDuplicateSpaces(pszNew);
return pszNew;
}
#define TAB_WIDTH 8
static INT ExpandTabLength(LPCTSTR line)
{
LPCTSTR pch;
INT cch = 0;
for (pch = line; *pch; ++pch)
{
if (*pch == TEXT('\t'))
cch += TAB_WIDTH - (cch % TAB_WIDTH);
else
++cch;
}
return cch;
}
static LPTSTR ExpandTab(LPCTSTR line)
{
INT spaces, cch = ExpandTabLength(line), ich;
LPTSTR pszNew = malloc((cch + 1) * sizeof(TCHAR));
LPCTSTR pch;
if (!pszNew)
return NULL;
ich = 0;
for (pch = line; *pch; ++pch)
{
if (*pch == TEXT('\t'))
{
spaces = TAB_WIDTH - (ich % TAB_WIDTH);
while (spaces-- > 0)
{
pszNew[ich++] = TEXT(' ');
}
}
else
{
pszNew[ich++] = *pch;
}
}
pszNew[ich] = 0;
return pszNew;
}
#define HASH_EOF 0xFFFFFFFF
#define HASH_MASK 0x7FFFFFFF
static DWORD GetHash(LPCTSTR psz, BOOL bIgnoreCase)
{
DWORD ret = 0xDEADFACE;
while (*psz)
{
ret += (bIgnoreCase ? towupper(*psz) : *psz);
ret <<= 2;
++psz;
}
return (ret & HASH_MASK);
}
static NODE *AllocEOFNode(DWORD lineno)
{
NODE *node = AllocNode(AllocLine(NULL, 0), 0);
if (node == NULL)
return NULL;
node->pszComp = AllocLine(NULL, 0);
if (node->pszComp == NULL)
{
DeleteNode(node);
return NULL;
}
node->lineno = lineno;
node->hash = HASH_EOF;
return node;
}
static __inline BOOL IsEOFNode(NODE *node)
{
return !node || node->hash == HASH_EOF;
}
static BOOL ConvertNode(const FILECOMPARE *pFC, NODE *node)
{
if (!(pFC->dwFlags & FLAG_T))
{
LPTSTR tmp = ExpandTab(node->pszLine);
if (!tmp)
return FALSE;
free(node->pszLine);
node->pszLine = tmp;
if (!(pFC->dwFlags & FLAG_W))
node->hash = GetHash(node->pszLine, !!(pFC->dwFlags & FLAG_C));
}
if (pFC->dwFlags & FLAG_W)
{
node->pszComp = CompressSpace(node->pszLine);
if (!node->pszComp)
return FALSE;
node->hash = GetHash(node->pszComp, !!(pFC->dwFlags & FLAG_C));
}
return TRUE;
}
static FCRET CompareNode(const FILECOMPARE *pFC, const NODE *node0, const NODE *node1)
{
DWORD dwCmpFlags;
LPTSTR psz0, psz1;
INT ret;
if (node0->hash != node1->hash)
return FCRET_DIFFERENT;
psz0 = (pFC->dwFlags & FLAG_W) ? node0->pszComp : node0->pszLine;
psz1 = (pFC->dwFlags & FLAG_W) ? node1->pszComp : node1->pszLine;
dwCmpFlags = ((pFC->dwFlags & FLAG_C) ? NORM_IGNORECASE : 0);
ret = CompareString(LOCALE_USER_DEFAULT, dwCmpFlags, psz0, -1, psz1, -1);
return (ret == CSTR_EQUAL) ? FCRET_IDENTICAL : FCRET_DIFFERENT;
}
static BOOL FindNextLine(LPCTSTR pch, DWORD ich, DWORD cch, LPDWORD pich)
{
while (ich < cch)
{
if (pch[ich] == TEXT('\n') || pch[ich] == TEXT('\0'))
{
*pich = ich;
return TRUE;
}
++ich;
}
*pich = cch;
return FALSE;
}
static FCRET
ParseLines(const FILECOMPARE *pFC, HANDLE *phMapping,
LARGE_INTEGER *pib, const LARGE_INTEGER *pcb, struct list *list)
{
DWORD lineno = 1, ich, cch, ichNext, cbView, cchNode;
LPTSTR psz, pszLine;
BOOL fLast, bCR;
NODE *node;
if (*phMapping == NULL)
return FCRET_NO_MORE_DATA;
if (pib->QuadPart >= pcb->QuadPart)
{
CloseHandle(*phMapping);
*phMapping = NULL;
return FCRET_NO_MORE_DATA;
}
cbView = (DWORD)min(pcb->QuadPart - pib->QuadPart, MAX_VIEW_SIZE);
psz = MapViewOfFile(*phMapping, FILE_MAP_READ, pib->HighPart, pib->LowPart, cbView);
if (!psz)
{
return OutOfMemory();
}
ich = 0;
cch = cbView / sizeof(TCHAR);
fLast = (pib->QuadPart + cbView >= pcb->QuadPart);
while (ich < cch &&
(FindNextLine(psz, ich, cch, &ichNext) ||
(ichNext == cch && (fLast || ich == 0))))
{
bCR = (ichNext > 0) && (psz[ichNext - 1] == TEXT('\r'));
cchNode = ichNext - ich - bCR;
TRACE("ich:%ld, cch:%ld, ichNext:%ld, cchNode:%ld\n", ich, cch, ichNext, cchNode);
pszLine = AllocLine(&psz[ich], cchNode);
node = AllocNode(pszLine, lineno++);
if (!node || !ConvertNode(pFC, node))
{
DeleteNode(node);
UnmapViewOfFile(psz);
return OutOfMemory();
}
list_add_tail(list, &node->entry);
ich = ichNext + 1;
}
UnmapViewOfFile(psz);
pib->QuadPart += ichNext * sizeof(WCHAR);
if (pib->QuadPart < pcb->QuadPart)
return FCRET_IDENTICAL;
// append EOF node
node = AllocEOFNode(lineno);
if (!node)
return OutOfMemory();
list_add_tail(list, &node->entry);
return FCRET_NO_MORE_DATA;
}
static VOID
ShowDiff(FILECOMPARE *pFC, INT i, struct list *begin, struct list *end)
{
NODE* node;
struct list *list = &pFC->list[i];
struct list *first = NULL, *last = NULL;
PrintCaption(pFC->file[i]);
if (begin && end && list_prev(list, begin))
begin = list_prev(list, begin);
while (begin != end)
{
node = LIST_ENTRY(begin, NODE, entry);
if (IsEOFNode(node))
break;
if (!first)
first = begin;
last = begin;
if (!(pFC->dwFlags & FLAG_A))
PrintLine(pFC, node->lineno, node->pszLine);
begin = list_next(list, begin);
}
if ((pFC->dwFlags & FLAG_A) && first)
{
node = LIST_ENTRY(first, NODE, entry);
PrintLine(pFC, node->lineno, node->pszLine);
first = list_next(list, first);
if (first != last)
{
if (list_next(list, first) == last)
{
node = LIST_ENTRY(first, NODE, entry);
PrintLine(pFC, node->lineno, node->pszLine);
}
else
{
PrintDots();
}
}
node = LIST_ENTRY(last, NODE, entry);
PrintLine(pFC, node->lineno, node->pszLine);
}
}
static VOID
SkipIdentical(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1)
{
struct list *ptr0 = *pptr0, *ptr1 = *pptr1;
while (ptr0 && ptr1)
{
NODE *node0 = LIST_ENTRY(ptr0, NODE, entry);
NODE *node1 = LIST_ENTRY(ptr1, NODE, entry);
if (CompareNode(pFC, node0, node1) != FCRET_IDENTICAL)
break;
ptr0 = list_next(&pFC->list[0], ptr0);
ptr1 = list_next(&pFC->list[1], ptr1);
}
*pptr0 = ptr0;
*pptr1 = ptr1;
}
static DWORD
SkipIdenticalN(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1,
DWORD nnnn, DWORD lineno0, DWORD lineno1)
{
struct list *ptr0 = *pptr0, *ptr1 = *pptr1;
DWORD count = 0;
while (ptr0 && ptr1)
{
NODE *node0 = LIST_ENTRY(ptr0, NODE, entry);
NODE *node1 = LIST_ENTRY(ptr1, NODE, entry);
if (node0->lineno >= lineno0)
break;
if (node1->lineno >= lineno1)
break;
if (CompareNode(pFC, node0, node1) != FCRET_IDENTICAL)
break;
ptr0 = list_next(&pFC->list[0], ptr0);
ptr1 = list_next(&pFC->list[1], ptr1);
++count;
if (count >= nnnn)
break;
}
*pptr0 = ptr0;
*pptr1 = ptr1;
return count;
}
static FCRET
ScanDiff(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1,
DWORD lineno0, DWORD lineno1)
{
struct list *ptr0 = *pptr0, *ptr1 = *pptr1, *tmp0, *tmp1;
NODE *node0, *node1;
INT count;
while (ptr0 && ptr1)
{
node0 = LIST_ENTRY(ptr0, NODE, entry);
node1 = LIST_ENTRY(ptr1, NODE, entry);
if (node0->lineno >= lineno0)
return FCRET_DIFFERENT;
if (node1->lineno >= lineno1)
return FCRET_DIFFERENT;
tmp0 = ptr0;
tmp1 = ptr1;
count = SkipIdenticalN(pFC, &tmp0, &tmp1, pFC->nnnn, lineno0, lineno1);
if (count >= pFC->nnnn)
break;
if (count > 0)
{
ptr0 = tmp0;
ptr1 = tmp1;
}
else
{
ptr0 = list_next(&pFC->list[0], ptr0);
ptr1 = list_next(&pFC->list[1], ptr1);
}
}
*pptr0 = ptr0;
*pptr1 = ptr1;
return FCRET_IDENTICAL;
}
static FCRET
Resync(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1)
{
FCRET ret;
struct list *ptr0, *ptr1, *save0 = NULL, *save1 = NULL;
NODE *node0, *node1;
struct list *list0 = &pFC->list[0], *list1 = &pFC->list[1];
DWORD lineno0, lineno1;
INT penalty, i0, i1, min_penalty = MAXLONG;
node0 = LIST_ENTRY(*pptr0, NODE, entry);
node1 = LIST_ENTRY(*pptr1, NODE, entry);
lineno0 = node0->lineno + pFC->n;
lineno1 = node1->lineno + pFC->n;
// ``If the files that you are comparing have more than pFC->n consecutive
// differing lines, FC cancels the comparison,,
// ``If the number of matching lines in the files is less than pFC->nnnn,
// FC displays the matching lines as differences,,
for (ptr1 = list_next(list1, *pptr1), i1 = 0; ptr1; ptr1 = list_next(list1, ptr1), ++i1)
{
node1 = LIST_ENTRY(ptr1, NODE, entry);
if (node1->lineno >= lineno1)
break;
for (ptr0 = list_next(list0, *pptr0), i0 = 0; ptr0; ptr0 = list_next(list0, ptr0), ++i0)
{
node0 = LIST_ENTRY(ptr0, NODE, entry);
if (node0->lineno >= lineno0)
break;
if (CompareNode(pFC, node0, node1) == FCRET_IDENTICAL)
{
penalty = min(i0, i1) + abs(i1 - i0);
if (min_penalty > penalty)
{
min_penalty = penalty;
save0 = ptr0;
save1 = ptr1;
}
}
}
}
if (save0 && save1)
{
*pptr0 = save0;
*pptr1 = save1;
ret = ScanDiff(pFC, &save0, &save1, lineno0, lineno1);
if (save0 && save1)
{
*pptr0 = save0;
*pptr1 = save1;
}
return ret;
}
for (ptr0 = *pptr0; ptr0; ptr0 = list_next(list0, ptr0))
{
node0 = LIST_ENTRY(ptr0, NODE, entry);
if (node0->lineno == lineno0)
break;
}
for (ptr1 = *pptr1; ptr1; ptr1 = list_next(list1, ptr1))
{
node1 = LIST_ENTRY(ptr1, NODE, entry);
if (node1->lineno == lineno1)
break;
}
*pptr0 = ptr0;
*pptr1 = ptr1;
return FCRET_DIFFERENT;
}
static FCRET
Finalize(FILECOMPARE* pFC, struct list *ptr0, struct list* ptr1, BOOL fDifferent)
{
if (!ptr0 || !ptr1)
{
if (fDifferent)
return FCRET_DIFFERENT;
return NoDifference();
}
else
{
ShowDiff(pFC, 0, ptr0, NULL);
ShowDiff(pFC, 1, ptr1, NULL);
PrintEndOfDiff();
return FCRET_DIFFERENT;
}
}
FCRET TextCompare(FILECOMPARE *pFC, HANDLE *phMapping0, const LARGE_INTEGER *pcb0,
HANDLE *phMapping1, const LARGE_INTEGER *pcb1)
{
FCRET ret, ret0, ret1;
struct list *ptr0, *ptr1, *save0, *save1, *next0, *next1;
NODE* node0, * node1;
BOOL fDifferent = FALSE;
LARGE_INTEGER ib0 = { .QuadPart = 0 }, ib1 = { .QuadPart = 0 };
struct list *list0 = &pFC->list[0], *list1 = &pFC->list[1];
list_init(list0);
list_init(list1);
do
{
ret0 = ParseLines(pFC, phMapping0, &ib0, pcb0, list0);
if (ret0 == FCRET_INVALID)
{
ret = ret0;
goto cleanup;
}
ret1 = ParseLines(pFC, phMapping1, &ib1, pcb1, list1);
if (ret1 == FCRET_INVALID)
{
ret = ret1;
goto cleanup;
}
ptr0 = list_head(list0);
ptr1 = list_head(list1);
for (;;)
{
if (!ptr0 || !ptr1)
goto quit;
// skip identical (sync'ed)
SkipIdentical(pFC, &ptr0, &ptr1);
if (ptr0 || ptr1)
fDifferent = TRUE;
node0 = LIST_ENTRY(ptr0, NODE, entry);
node1 = LIST_ENTRY(ptr1, NODE, entry);
if (IsEOFNode(node0) || IsEOFNode(node1))
goto quit;
// try to resync
save0 = ptr0;
save1 = ptr1;
ret = Resync(pFC, &ptr0, &ptr1);
if (ret == FCRET_INVALID)
goto cleanup;
if (ret == FCRET_DIFFERENT)
{
// resync failed
ret = ResyncFailed();
// show the difference
ShowDiff(pFC, 0, save0, ptr0);
ShowDiff(pFC, 1, save1, ptr1);
PrintEndOfDiff();
goto cleanup;
}
// show the difference
fDifferent = TRUE;
next0 = ptr0 ? list_next(list0, ptr0) : ptr0;
next1 = ptr1 ? list_next(list1, ptr1) : ptr1;
ShowDiff(pFC, 0, save0, (next0 ? next0 : ptr0));
ShowDiff(pFC, 1, save1, (next1 ? next1 : ptr1));
PrintEndOfDiff();
// now resync'ed
}
} while (ret0 != FCRET_NO_MORE_DATA || ret1 != FCRET_NO_MORE_DATA);
quit:
ret = Finalize(pFC, ptr0, ptr1, fDifferent);
cleanup:
DeleteList(list0);
DeleteList(list1);
return ret;
}

View file

@ -0,0 +1,6 @@
#undef UNICODE
#undef _UNICODE
#ifndef _MBCS
#define _MBCS
#endif
#include "text.h"

View file

@ -0,0 +1,8 @@
#ifndef UNICODE
#define UNICODE
#endif
#ifndef _UNICODE
#define _UNICODE
#endif
#undef _MBCS
#include "text.h"