reactos/base/applications/cmdutils/more/more.c

/*
 * COPYRIGHT:       See COPYING in the top level directory
 * PROJECT:         ReactOS More Command
 * FILE:            base/applications/cmdutils/more/more.c
 * PURPOSE:         Displays text stream from STDIN or from an arbitrary number
 *                  of files to STDOUT, with screen capabilities (more than CAT,
 *                  but less than LESS ^^).
 * PROGRAMMERS:     Paolo Pantaleo
 *                  Timothy Schepens
 *                  Hermes Belusca-Maito (hermes.belusca@sfr.fr)
 */
/*
 * MORE.C - external command.
 *
 * clone from 4nt more command
 *
 * 26 Sep 1999 - Paolo Pantaleo <paolopan@freemail.it>
 *     started
 *
 * Oct 2003 - Timothy Schepens <tischepe at fastmail dot fm>
 *     use window size instead of buffer size.
 */

#include <stdio.h>

#include <windef.h>
#include <winbase.h>
#include <winnls.h>
#include <winuser.h>

#include <conutils.h>

#include "resource.h"

/* PagePrompt statistics for the current file */
DWORD dwFileSize; // In bytes
DWORD dwSumReadBytes, dwSumReadChars;
// The average number of bytes per character is equal to
// dwSumReadBytes / dwSumReadChars. Note that dwSumReadChars
// will never be == 0 when ConWritePaging (and possibly PagePrompt)
// is called.

/* Handles for file and console */
HANDLE hFile = INVALID_HANDLE_VALUE;
HANDLE hStdIn, hStdOut;
HANDLE hKeyboard;


static BOOL
__stdcall
PagePrompt(PCON_PAGER Pager, DWORD Done, DWORD Total)
{
    HANDLE hInput = ConStreamGetOSHandle(StdIn);
    DWORD dwMode;
    KEY_EVENT_RECORD KeyEvent;

    /*
     * Just use the simple prompt if the file being displayed is the STDIN,
     * otherwise use the prompt with progress percentage.
     *
     * The progress percentage is evaluated as follows.
     * So far we have read a total of 'dwSumReadBytes' bytes from the file.
     * Amongst those is the latest read chunk of 'dwReadBytes' bytes, to which
     * correspond a number of 'dwReadChars' characters with which we have called
     * ConWritePaging who called PagePrompt. We then have: Total == dwReadChars.
     * During this ConWritePaging call the PagePrompt was called after 'Done'
     * number of characters over 'Total'.
     * It should be noted that for 'dwSumReadBytes' number of bytes read it
     * *roughly* corresponds 'dwSumReadChars' number of characters. This is
     * because there may be some failures happening during the conversion of
     * the bytes read to the character string for a given encoding.
     * Therefore the number of characters displayed on screen is equal to:
     *   dwSumReadChars - Total + Done ,
     * but the best corresponding approximed number of bytes would be:
     *   dwSumReadBytes - (Total - Done) * (dwSumReadBytes / dwSumReadChars) ,
     * where the ratio is the average number of bytes per character.
     * The percentage is then computed relative to the total file size.
     */
    if (hFile == hStdIn)
    {
        ConResPuts(Pager->Screen->Stream, IDS_CONTINUE);
    }
    else
    {
        ConResPrintf(Pager->Screen->Stream, IDS_CONTINUE_PROGRESS,
                     // (dwSumReadChars - Total + Done) * 100 / dwFileSize
                     (dwSumReadBytes - (Total - Done) *
                        (dwSumReadBytes / dwSumReadChars)) * 100 / dwFileSize
                     );
    }

    // TODO: Implement prompt read line!

    // FIXME: Does not support TTY yet!

    /* RemoveBreakHandler */
    SetConsoleCtrlHandler(NULL, TRUE);
    /* ConInDisable */
    GetConsoleMode(hInput, &dwMode);
    dwMode &= ~ENABLE_PROCESSED_INPUT;
    SetConsoleMode(hInput, dwMode);

    do
    {
        // FIXME: Does not support TTY yet!

        // ConInKey(&KeyEvent);
        INPUT_RECORD ir;
        DWORD dwRead;
        do
        {
            ReadConsoleInput(hInput, &ir, 1, &dwRead);
        }
        while ((ir.EventType != KEY_EVENT) || (!ir.Event.KeyEvent.bKeyDown));

        /* Got our key, return to caller */
        KeyEvent = ir.Event.KeyEvent;
    }
    while ((KeyEvent.wVirtualKeyCode == VK_SHIFT) ||
           (KeyEvent.wVirtualKeyCode == VK_MENU) ||
           (KeyEvent.wVirtualKeyCode == VK_CONTROL));

    /* AddBreakHandler */
    SetConsoleCtrlHandler(NULL, FALSE);
    /* ConInEnable */
    GetConsoleMode(hInput, &dwMode);
    dwMode |= ENABLE_PROCESSED_INPUT;
    SetConsoleMode(hInput, dwMode);

    /*
     * Erase the full line where the cursor is, and move
     * the cursor back to the beginning of the line.
     */
    ConClearLine(Pager->Screen->Stream);

    if ((KeyEvent.wVirtualKeyCode == VK_ESCAPE) ||
        ((KeyEvent.wVirtualKeyCode == L'C') &&
         (KeyEvent.dwControlKeyState & (LEFT_CTRL_PRESSED | RIGHT_CTRL_PRESSED))))
    {
        /* We break, output a newline */
        WCHAR ch = L'\n';
        ConStreamWrite(Pager->Screen->Stream, &ch, 1);
        return FALSE;
    }

    return TRUE;
}

/*
 * See base/applications/cmdutils/clip/clip.c!IsDataUnicode()
 * and base/applications/notepad/text.c!ReadText() for more details.
 * Also some good code example can be found at:
 * https://github.com/AutoIt/text-encoding-detect
 */
typedef enum
{
    ENCODING_ANSI    =  0,
    ENCODING_UTF16LE =  1,
    ENCODING_UTF16BE =  2,
    ENCODING_UTF8    =  3
} ENCODING;

static BOOL
IsDataUnicode(
    IN PVOID Buffer,
    IN DWORD BufferSize,
    OUT ENCODING* Encoding OPTIONAL,
    OUT PDWORD SkipBytes OPTIONAL)
{
    PBYTE pBytes = Buffer;
    ENCODING encFile = ENCODING_ANSI;
    DWORD dwPos = 0;

    /*
     * See http://archives.miloush.net/michkap/archive/2007/04/22/2239345.html
     * for more details about the algorithm and the pitfalls behind it.
     * Of course it would be actually great to make a nice function that
     * would work, once and for all, and put it into a library.
     */

    /* Look for Byte Order Marks */
    if ((BufferSize >= 2) && (pBytes[0] == 0xFF) && (pBytes[1] == 0xFE))
    {
        encFile = ENCODING_UTF16LE;
        dwPos = 2;
    }
    else if ((BufferSize >= 2) && (pBytes[0] == 0xFE) && (pBytes[1] == 0xFF))
    {
        encFile = ENCODING_UTF16BE;
        dwPos = 2;
    }
    else if ((BufferSize >= 3) && (pBytes[0] == 0xEF) && (pBytes[1] == 0xBB) && (pBytes[2] == 0xBF))
    {
        encFile = ENCODING_UTF8;
        dwPos = 3;
    }
    else
    {
        /*
         * Try using statistical analysis. Do not rely on the return value of
         * IsTextUnicode as we can get FALSE even if the text is in UTF-16 BE
         * (i.e. we have some of the IS_TEXT_UNICODE_REVERSE_MASK bits set).
         * Instead, set all the tests we want to perform, then just check
         * the passed tests and try to deduce the string properties.
         */

/*
 * This mask contains the 3 highest bits from IS_TEXT_UNICODE_NOT_ASCII_MASK
 * and the 1st highest bit from IS_TEXT_UNICODE_NOT_UNICODE_MASK.
 */
#define IS_TEXT_UNKNOWN_FLAGS_MASK  ((7 << 13) | (1 << 11))

        /* Flag out the unknown flags here, the passed tests will not have them either */
        INT Tests = (IS_TEXT_UNICODE_NOT_ASCII_MASK   |
                     IS_TEXT_UNICODE_NOT_UNICODE_MASK |
                     IS_TEXT_UNICODE_REVERSE_MASK | IS_TEXT_UNICODE_UNICODE_MASK)
                        & ~IS_TEXT_UNKNOWN_FLAGS_MASK;
        INT Results;

        IsTextUnicode(Buffer, BufferSize, &Tests);
        Results = Tests;

        /*
         * As the IS_TEXT_UNICODE_NULL_BYTES or IS_TEXT_UNICODE_ILLEGAL_CHARS
         * flags are expected to be potentially present in the result without
         * modifying our expectations, filter them out now.
         */
        Results &= ~(IS_TEXT_UNICODE_NULL_BYTES | IS_TEXT_UNICODE_ILLEGAL_CHARS);

        /*
         * NOTE: The flags IS_TEXT_UNICODE_ASCII16 and
         * IS_TEXT_UNICODE_REVERSE_ASCII16 are not reliable.
         *
         * NOTE2: Check for potential "bush hid the facts" effect by also
         * checking the original results (in 'Tests') for the absence of
         * the IS_TEXT_UNICODE_NULL_BYTES flag, as we may presumably expect
         * that in UTF-16 text there will be at some point some NULL bytes.
         * If not, fall back to ANSI. This shows the limitations of using the
         * IsTextUnicode API to perform such tests, and the usage of a more
         * improved encoding detection algorithm would be really welcome.
         */
        if (!(Results & IS_TEXT_UNICODE_NOT_UNICODE_MASK) &&
            !(Results & IS_TEXT_UNICODE_REVERSE_MASK)     &&
             (Results & IS_TEXT_UNICODE_UNICODE_MASK)     &&
             (Tests   & IS_TEXT_UNICODE_NULL_BYTES))
        {
            encFile = ENCODING_UTF16LE;
            dwPos = (Results & IS_TEXT_UNICODE_SIGNATURE) ? 2 : 0;
        }
        else
        if (!(Results & IS_TEXT_UNICODE_NOT_UNICODE_MASK) &&
            !(Results & IS_TEXT_UNICODE_UNICODE_MASK)     &&
             (Results & IS_TEXT_UNICODE_REVERSE_MASK)     &&
             (Tests   & IS_TEXT_UNICODE_NULL_BYTES))
        {
            encFile = ENCODING_UTF16BE;
            dwPos = (Results & IS_TEXT_UNICODE_REVERSE_SIGNATURE) ? 2 : 0;
        }
        else
        {
            /*
             * Either 'Results' has neither of those masks set, as it can be
             * the case for UTF-8 text (or ANSI), or it has both as can be the
             * case when analysing pure binary data chunk. This is therefore
             * invalid and we fall back to ANSI encoding.
             * FIXME: In case of failure, assume ANSI (as long as we do not have
             * correct tests for UTF8, otherwise we should do them, and at the
             * very end, assume ANSI).
             */
            encFile = ENCODING_ANSI; // ENCODING_UTF8;
            dwPos = 0;
        }
    }

    if (Encoding)
        *Encoding = encFile;
    if (SkipBytes)
        *SkipBytes = dwPos;

    return (encFile != ENCODING_ANSI);
}

/*
 * Adapted from base/shell/cmd/misc.c!FileGetString(), but with correct
 * text encoding support. Also please note that similar code should be
 * also used in the CMD.EXE 'TYPE' command.
 * Contrary to CMD's FileGetString() we do not stop at new-lines.
 *
 * Read text data from a file and convert it from a given encoding to UTF-16.
 *
 *   IN OUT PVOID pCacheBuffer and IN DWORD CacheBufferLength :
 *     Implementation detail so that the function uses an external user-provided
 *     buffer to store the data temporarily read from the file. The function
 *     could have used an internal buffer instead. The length is in number of bytes.
 *
 *   IN OUT PWSTR* pBuffer and IN OUT PDWORD pnBufferLength :
 *     Reallocated buffer containing the string data converted to UTF-16.
 *     In input, contains a pointer to the original buffer and its length.
 *     In output, contains a pointer to the reallocated buffer and its length.
 *     The length is in number of characters.
 *
 *     At first call to this function, pBuffer can be set to NULL, in which case
 *     when the function returns the pointer will point to a valid buffer.
 *     After the last call to this function, free the pBuffer pointer with:
 *     HeapFree(GetProcessHeap(), 0, *pBuffer);
 *
 *     If Encoding is set to ENCODING_UTF16LE or ENCODING_UTF16BE, since we are
 *     compiled in UNICODE, no extra conversion is performed and therefore
 *     pBuffer is unused (remains unallocated) and one can directly use the
 *     contents of pCacheBuffer as it is expected to contain valid UTF-16 text.
 *
 *   OUT PDWORD pdwReadBytes : Number of bytes read from the file (optional).
 *   OUT PDWORD pdwReadChars : Corresponding number of characters read (optional).
 */
static BOOL
FileGetString(
    IN HANDLE hFile,
    IN ENCODING Encoding,
    IN OUT PVOID pCacheBuffer,
    IN DWORD CacheBufferLength,
    IN OUT PWCHAR* pBuffer,
    IN OUT PDWORD pnBufferLength,
    OUT PDWORD pdwReadBytes OPTIONAL,
    OUT PDWORD pdwReadChars OPTIONAL)
{
    BOOL Success;
    UINT CodePage = (UINT)-1;
    DWORD dwReadBytes;
    INT len;

    // ASSERT(pCacheBuffer && (CacheBufferLength > 0));
    // ASSERT(CacheBufferLength % 2 == 0); // Cache buffer length MUST BE even!
    // ASSERT(pBuffer && pnBufferLength);

    /* Always reset the retrieved number of bytes/characters */
    if (pdwReadBytes) *pdwReadBytes = 0;
    if (pdwReadChars) *pdwReadChars = 0;

    Success = ReadFile(hFile, pCacheBuffer, CacheBufferLength, &dwReadBytes, NULL);
    if (!Success || dwReadBytes == 0)
        return FALSE;

    if (pdwReadBytes) *pdwReadBytes = dwReadBytes;

    if ((Encoding == ENCODING_ANSI) || (Encoding == ENCODING_UTF8))
    {
        /* Conversion is needed */

        if (Encoding == ENCODING_ANSI)
            CodePage = GetConsoleCP(); // CP_ACP; // FIXME: Cache GetConsoleCP() value.
        else // if (Encoding == ENCODING_UTF8)
            CodePage = CP_UTF8;

        /* Retrieve the needed buffer size */
        len = MultiByteToWideChar(CodePage, 0, pCacheBuffer, dwReadBytes,
                                  NULL, 0);
        if (len == 0)
        {
            /* Failure, bail out */
            return FALSE;
        }

        /* Initialize the conversion buffer if needed... */
        if (*pBuffer == NULL)
        {
            *pnBufferLength = len;
            *pBuffer = HeapAlloc(GetProcessHeap(), 0, *pnBufferLength * sizeof(WCHAR));
            if (*pBuffer == NULL)
            {
                // *pBuffer = NULL;
                *pnBufferLength = 0;
                // WARN("DEBUG: Cannot allocate memory for *pBuffer!\n");
                // ConErrFormatMessage(GetLastError());
                return FALSE;
            }
        }
        /* ... or reallocate only if the new length is greater than the old one */
        else if (len > *pnBufferLength)
        {
            PWSTR OldBuffer = *pBuffer;

            *pnBufferLength = len;
            *pBuffer = HeapReAlloc(GetProcessHeap(), 0, *pBuffer, *pnBufferLength * sizeof(WCHAR));
            if (*pBuffer == NULL)
            {
                /* Do not leak old buffer */
                HeapFree(GetProcessHeap(), 0, OldBuffer);
                // *pBuffer = NULL;
                *pnBufferLength = 0;
                // WARN("DEBUG: Cannot reallocate memory for *pBuffer!\n");
                // ConErrFormatMessage(GetLastError());
                return FALSE;
            }
        }

        /* Now perform the conversion proper */
        len = MultiByteToWideChar(CodePage, 0, pCacheBuffer, dwReadBytes,
                                  *pBuffer, len);
        dwReadBytes = len;
    }
    else
    {
        /*
         * No conversion needed, just convert from big to little endian if needed.
         * pBuffer and pnBufferLength are left untouched and pCacheBuffer can be
         * directly used.
         */
        PWCHAR pWChars = pCacheBuffer;
        DWORD i;

        dwReadBytes /= sizeof(WCHAR);

        if (Encoding == ENCODING_UTF16BE)
        {
            for (i = 0; i < dwReadBytes; i++)
            {
                /* Equivalent to RtlUshortByteSwap: reverse high/low bytes */
                pWChars[i] = MAKEWORD(HIBYTE(pWChars[i]), LOBYTE(pWChars[i]));
            }
        }
        // else if (Encoding == ENCODING_UTF16LE), we are good, nothing to do.
    }

    /* Return the number of characters (dwReadBytes is converted) */
    if (pdwReadChars) *pdwReadChars = dwReadBytes;

    return TRUE;
}

// INT CommandMore(LPTSTR cmd, LPTSTR param)
int wmain(int argc, WCHAR* argv[])
{
    // FIXME this stuff!
    CON_SCREEN Screen = {StdOut};
    CON_PAGER Pager = {&Screen, 0};

    int i;

    BOOL bRet, bContinue;

    ENCODING Encoding;
    DWORD SkipBytes = 0;

#define FileCacheBufferSize 4096
    PVOID FileCacheBuffer = NULL;
    PWCHAR StringBuffer = NULL;
    DWORD StringBufferLength = 0;
    DWORD dwReadBytes, dwReadChars;

    TCHAR szFullPath[MAX_PATH];

    hStdIn = GetStdHandle(STD_INPUT_HANDLE);
    hStdOut = GetStdHandle(STD_OUTPUT_HANDLE);

    /* Initialize the Console Standard Streams */
    ConStreamInit(StdIn , GetStdHandle(STD_INPUT_HANDLE) , UTF8Text, INVALID_CP);
    ConStreamInit(StdOut, GetStdHandle(STD_OUTPUT_HANDLE), UTF8Text, INVALID_CP);
    ConStreamInit(StdErr, GetStdHandle(STD_ERROR_HANDLE) , UTF8Text, INVALID_CP);

    /*
     * Bad usage (too much options) or we use the /? switch.
     * Display help for the MORE command.
     */
    if (argc > 1 && wcscmp(argv[1], L"/?") == 0)
    {
        ConResPuts(StdOut, IDS_USAGE);
        return 0;
    }

    // FIXME: Parse all the remaining parameters.
    // Then the file list can be found at the very end.
    // FIXME2: Use the PARSER api that can be found in EVENTCREATE.

    // NOTE: We might try to duplicate the ConOut for read access... ?
    hKeyboard = CreateFileW(L"CONIN$", GENERIC_READ|GENERIC_WRITE,
                            FILE_SHARE_READ|FILE_SHARE_WRITE, NULL,
                            OPEN_EXISTING, 0, NULL);
    FlushConsoleInputBuffer(hKeyboard);
    ConStreamSetOSHandle(StdIn, hKeyboard);


    FileCacheBuffer = HeapAlloc(GetProcessHeap(), 0, FileCacheBufferSize);
    if (!FileCacheBuffer)
    {
        ConPuts(StdErr, L"Error: no memory\n");
        CloseHandle(hKeyboard);
        return 1;
    }

    /* Special case where we run 'MORE' without any argument: we use STDIN */
    if (argc <= 1)
    {
        /*
         * Assign STDIN handle to hFile so that the page prompt function will
         * know the data comes from STDIN, and will take different actions.
         */
        hFile = hStdIn;

        /* Update the statistics for PagePrompt */
        dwFileSize = 0;
        dwSumReadBytes = dwSumReadChars = 0;

        /* We suppose we read text from the file */

        /* For STDIN we always suppose we are in ANSI mode */
        // SetFilePointer(hFile, 0, NULL, FILE_BEGIN);
        Encoding = ENCODING_ANSI; // ENCODING_UTF8;

        bContinue = ConPutsPaging(&Pager, PagePrompt, TRUE, L"");
        if (!bContinue)
            goto Quit;

        do
        {
            bRet = FileGetString(hFile, Encoding,
                                 FileCacheBuffer, FileCacheBufferSize,
                                 &StringBuffer, &StringBufferLength,
                                 &dwReadBytes, &dwReadChars);
            if (!bRet || dwReadBytes == 0 || dwReadChars == 0)
            {
                /* We failed at reading the file, bail out */
                break;
            }

            /* Update the statistics for PagePrompt */
            dwSumReadBytes += dwReadBytes;
            dwSumReadChars += dwReadChars;

            bContinue = ConWritePaging(&Pager, PagePrompt, FALSE,
                                       StringBuffer, dwReadChars);
            /* If we Ctrl-C/Ctrl-Break, stop everything */
            if (!bContinue)
                goto Quit;
        }
        while (bRet && dwReadBytes > 0);
        goto Quit;
    }

    /* We have files: read them and output them to STDOUT */
    for (i = 1; i < argc; i++)
    {
        GetFullPathNameW(argv[i], ARRAYSIZE(szFullPath), szFullPath, NULL);
        hFile = CreateFileW(szFullPath,
                            GENERIC_READ,
                            FILE_SHARE_READ,
                            NULL,
                            OPEN_EXISTING,
                            0, // FILE_ATTRIBUTE_NORMAL,
                            NULL);
        if (hFile == INVALID_HANDLE_VALUE)
        {
            ConResPrintf(StdErr, IDS_FILE_ACCESS, szFullPath);
            continue;
        }

        /* We currently do not support files too big */
        dwFileSize = GetFileSize(hFile, NULL);
        if (dwFileSize == INVALID_FILE_SIZE)
        {
            ConPuts(StdErr, L"ERROR: Invalid file size!\n");
            CloseHandle(hFile);
            continue;
        }

        /* We suppose we read text from the file */

        /* Check whether the file is UNICODE and retrieve its encoding */
        SetFilePointer(hFile, 0, NULL, FILE_BEGIN);
        bRet = ReadFile(hFile, FileCacheBuffer, FileCacheBufferSize, &dwReadBytes, NULL);
        IsDataUnicode(FileCacheBuffer, dwReadBytes, &Encoding, &SkipBytes);
        SetFilePointer(hFile, SkipBytes, NULL, FILE_BEGIN);

        /* Update the statistics for PagePrompt */
        dwSumReadBytes = dwSumReadChars = 0;

        bContinue = ConPutsPaging(&Pager, PagePrompt, TRUE, L"");
        if (!bContinue)
        {
            CloseHandle(hFile);
            goto Quit;
        }

        do
        {
            bRet = FileGetString(hFile, Encoding,
                                 FileCacheBuffer, FileCacheBufferSize,
                                 &StringBuffer, &StringBufferLength,
                                 &dwReadBytes, &dwReadChars);
            if (!bRet || dwReadBytes == 0 || dwReadChars == 0)
            {
                /*
                 * We failed at reading the file, bail out and
                 * continue with the other files.
                 */
                break;
            }

            /* Update the statistics for PagePrompt */
            dwSumReadBytes += dwReadBytes;
            dwSumReadChars += dwReadChars;

            if ((Encoding == ENCODING_UTF16LE) || (Encoding == ENCODING_UTF16BE))
            {
                bContinue = ConWritePaging(&Pager, PagePrompt, FALSE,
                                           FileCacheBuffer, dwReadChars);
            }
            else
            {
                bContinue = ConWritePaging(&Pager, PagePrompt, FALSE,
                                           StringBuffer, dwReadChars);
            }
            /* If we Ctrl-C/Ctrl-Break, stop everything */
            if (!bContinue)
            {
                CloseHandle(hFile);
                goto Quit;
            }
        }
        while (bRet && dwReadBytes > 0);

        CloseHandle(hFile);
    }

Quit:
    if (StringBuffer) HeapFree(GetProcessHeap(), 0, StringBuffer);
    HeapFree(GetProcessHeap(), 0, FileCacheBuffer);
    CloseHandle(hKeyboard);
    return 0;
}

/* EOF */