mirror of
https://github.com/reactos/reactos.git
synced 2025-04-22 05:00:27 +00:00
400 lines
14 KiB
C++
400 lines
14 KiB
C++
//
|
|
// ftell.cpp
|
|
//
|
|
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
//
|
|
// Defines the ftell() family of functions, which computes the current position
|
|
// of the file pointer of a stream.
|
|
//
|
|
#include <corecrt_internal_stdio.h>
|
|
#include <corecrt_internal_ptd_propagation.h>
|
|
|
|
|
|
static bool __cdecl buffer_contains_wide_characters(__crt_lowio_text_mode const text_mode) throw()
|
|
{
|
|
return text_mode == __crt_lowio_text_mode::utf8
|
|
|| text_mode == __crt_lowio_text_mode::utf16le;
|
|
}
|
|
|
|
static size_t __cdecl buffer_character_size(__crt_lowio_text_mode const text_mode) throw()
|
|
{
|
|
return buffer_contains_wide_characters(text_mode) ? sizeof(wchar_t) : sizeof(char);
|
|
}
|
|
|
|
// This function counts the number of newlines (LFs) in the buffer that contains
|
|
// elements of type Character. When sizeof(Character) != 1, the caller must
|
|
// ensure that the buffer consists of a whole number of Character elements.
|
|
template <typename Character>
|
|
static __int64 __cdecl count_newlines_of_type(
|
|
_In_reads_(buffer_last - buffer_first) char const* const buffer_first,
|
|
_In_reads_(0) char const* const buffer_last
|
|
) throw()
|
|
{
|
|
// This invariant is maintained by the lowio library. When Character==wchar_t,
|
|
// all writes to the buffer are in wide characters.
|
|
_ASSERTE((buffer_last - buffer_first) % sizeof(Character) == 0);
|
|
|
|
Character const* const typed_first = reinterpret_cast<Character const*>(buffer_first);
|
|
Character const* const typed_last = reinterpret_cast<Character const*>(buffer_last);
|
|
|
|
__int64 newline_count = 0;
|
|
for (Character const* it = typed_first; it != typed_last; ++it)
|
|
{
|
|
if (*it == '\n')
|
|
{
|
|
++newline_count;
|
|
}
|
|
}
|
|
|
|
return newline_count;
|
|
}
|
|
|
|
static __int64 __cdecl count_newline_bytes(
|
|
_In_reads_(buffer_last - buffer_first) char const* const buffer_first,
|
|
_In_reads_(0) char const* const buffer_last,
|
|
_In_ __crt_lowio_text_mode const text_mode
|
|
) throw()
|
|
{
|
|
if (buffer_contains_wide_characters(text_mode))
|
|
{
|
|
return count_newlines_of_type<wchar_t>(buffer_first, buffer_last) * sizeof(wchar_t);
|
|
}
|
|
else
|
|
{
|
|
return count_newlines_of_type<char>(buffer_first, buffer_last);
|
|
}
|
|
}
|
|
|
|
// This function handles the case where the file is open in UTF-8 text mode and
|
|
// the translated UTF-16 form of the text has a different number of characters
|
|
// than the original UTF-8 text (remember: when reading a file in UTF-8 mode, the
|
|
// lowio library converts the UTF-8 to UTF-16).
|
|
static __int64 __cdecl common_ftell_translated_utf8_nolock(
|
|
__crt_stdio_stream const stream,
|
|
__int64 const lowio_position,
|
|
__crt_cached_ptd_host& ptd
|
|
) throw()
|
|
{
|
|
int const fh = _fileno(stream.public_stream());
|
|
|
|
// If the buffer has been exhausted, then the current lowio position is also
|
|
// the current stdio position:
|
|
if (stream->_cnt == 0)
|
|
{
|
|
return lowio_position;
|
|
}
|
|
|
|
__int64 const current_buffer_position = (stream->_ptr - stream->_base) / static_cast<__int64>(sizeof(wchar_t));
|
|
|
|
// Otherwise, we have to re-read the buffer, in binary mode, so that we can
|
|
// analyze the original UTF-8 text to compute the actual position in the
|
|
// file. To do this, we seek the lowio pointer back to the beginning of
|
|
// the stdio buffer, re-read the buffer, then seek the lowio pointer back
|
|
// to its original location:
|
|
__int64 const base_buffer_position = _lseeki64_internal(fh, _startpos(fh), SEEK_SET, ptd);
|
|
if (base_buffer_position != _startpos(fh))
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
DWORD bytes_read;
|
|
char raw_buffer[_INTERNAL_BUFSIZ];
|
|
if (!ReadFile(reinterpret_cast<HANDLE>(_osfhnd(fh)), raw_buffer, _INTERNAL_BUFSIZ, &bytes_read, nullptr))
|
|
return -1;
|
|
|
|
// Seek back to where we were, to ensure the stdio stream is left in a
|
|
// consistent state (and "unmodified" from before the call):
|
|
if (_lseeki64_internal(fh, lowio_position, SEEK_SET, ptd) < 0)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
// This should not normally happen: we should always read enough bytes:
|
|
if (current_buffer_position > static_cast<__int64>(bytes_read))
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
// Scan the raw, untranslated buffer to find the current position, updating
|
|
// the file pointer to account for newline translation in the buffer:
|
|
char const* const raw_first = raw_buffer;
|
|
#pragma warning(disable:__WARNING_UNUSED_POINTER_ASSIGNMENT) // 28930
|
|
char const* const raw_last = raw_buffer + bytes_read;
|
|
|
|
char const* raw_it = raw_first;
|
|
for (__int64 i = 0; i != current_buffer_position && raw_it < raw_last; ++i, ++raw_it)
|
|
{
|
|
if (*raw_it == CR)
|
|
{
|
|
if (raw_it < raw_last - 1 && *(raw_it + 1) == LF)
|
|
++raw_it;
|
|
}
|
|
else
|
|
{
|
|
raw_it += _utf8_no_of_trailbytes(static_cast<const unsigned char>(*raw_it));
|
|
}
|
|
}
|
|
|
|
return base_buffer_position + (raw_it - raw_first);
|
|
}
|
|
|
|
|
|
|
|
// This function handles the extra adjustments that need to be made to the file
|
|
// position returned by ftell when a stream is opened in read mode.
|
|
static __int64 __cdecl common_ftell_read_mode_nolock(
|
|
__crt_stdio_stream const stream,
|
|
__int64 const lowio_position,
|
|
__int64 const buffer_offset,
|
|
__crt_cached_ptd_host& ptd
|
|
) throw()
|
|
{
|
|
int const fh = _fileno(stream.public_stream());
|
|
|
|
// We will need to adjust the file position of UTF-8 files to account for
|
|
// UTF-8 to UTF-16 translation:
|
|
__crt_lowio_text_mode const text_mode = _textmode(fh);
|
|
|
|
__int64 const translation_factor = text_mode == __crt_lowio_text_mode::utf8
|
|
? static_cast<__int64>(sizeof(wchar_t))
|
|
: static_cast<__int64>(sizeof(char));
|
|
|
|
// If the buffer has been exhausted, then the current lowio position is also
|
|
// the current stdio position:
|
|
if (stream->_cnt == 0)
|
|
{
|
|
return lowio_position;
|
|
}
|
|
|
|
// The lowio position points one-past-the-end of the current stdio buffer.
|
|
// We need to find the position of the beginning of the buffer. To start,
|
|
// we compute the number of bytes in the buffer. Note that we cannot just
|
|
// use the buffer size, because the buffer will not be full if EOF is
|
|
// readhed before the buffer is full.
|
|
__int64 bytes_read = stream->_cnt + static_cast<__int64>(stream->_ptr - stream->_base);
|
|
|
|
// If this is a binary mode stream, we can simply subtract this from the
|
|
// lowio position, and combine it with the buffer offset to get the stdio
|
|
// position:
|
|
if ((_osfile(fh) & FTEXT) == 0)
|
|
{
|
|
return lowio_position
|
|
- (bytes_read / translation_factor)
|
|
+ (buffer_offset / translation_factor);
|
|
}
|
|
|
|
// If this is a text mode stream, we need to adjust the number of bytes that
|
|
// were read into the buffer to account for newline translation.
|
|
//
|
|
// If we are _not_ at EOF, the number of untranslated characters read is the
|
|
// buffer size. However, if we are not at EOF, the buffer may not be full,
|
|
// so we need to scan the buffer to count newline characters. (Note: we
|
|
// only count newline characters if the stream is at EOF, because doing so
|
|
// is more expensive than seeking to the end and seeking back).
|
|
|
|
// Seek to the end of the file. If the current position is the end of the
|
|
// file, then scan the buffer for newlines and adjust bytes_read:
|
|
if (_lseeki64_internal(fh, 0, SEEK_END, ptd) == lowio_position)
|
|
{
|
|
bytes_read += count_newline_bytes(stream->_base, stream->_base + bytes_read, text_mode);
|
|
|
|
// If the last byte was a ^Z, that character will not be present in the
|
|
// buffer (it is omitted by lowio):
|
|
if (stream.ctrl_z())
|
|
{
|
|
bytes_read += buffer_character_size(text_mode);
|
|
}
|
|
}
|
|
// Otherwise, the current position is not at the end of the file; we need to
|
|
// seek back to the original position and compute the size of the buffer:
|
|
else
|
|
{
|
|
if (_lseeki64_internal(fh, lowio_position, SEEK_SET, ptd) == -1)
|
|
return -1;
|
|
|
|
// If the number of bytes read is smaller than the small buffer and was
|
|
// not user-provided, the buffer size was set to _SMALL_BUFSIZ during
|
|
// the last call to __acrt_stdio_refill_and_read_{narrow,wide}_nolock:
|
|
if (bytes_read <= _SMALL_BUFSIZ &&
|
|
stream.has_crt_buffer() &&
|
|
!stream.has_setvbuf_buffer())
|
|
{
|
|
bytes_read = _SMALL_BUFSIZ;
|
|
}
|
|
// Otherwise, the buffer size is what is stated in the stream object:
|
|
else
|
|
{
|
|
bytes_read = stream->_bufsiz;
|
|
}
|
|
|
|
// If the first byte in the untranslated buffer was a '\n', we assume it
|
|
// was preceded by a '\r', which was discarded by the previous read
|
|
// operation:
|
|
if (_osfile(fh) & FCRLF)
|
|
{
|
|
bytes_read += buffer_character_size(text_mode);
|
|
}
|
|
}
|
|
|
|
return lowio_position
|
|
- (bytes_read / translation_factor)
|
|
+ (buffer_offset / translation_factor);
|
|
}
|
|
|
|
|
|
|
|
template <typename Integer>
|
|
static Integer __cdecl common_ftell_nolock(__crt_stdio_stream, __crt_cached_ptd_host& ptd) throw();
|
|
|
|
template <>
|
|
__int64 __cdecl common_ftell_nolock(__crt_stdio_stream const stream, __crt_cached_ptd_host& ptd) throw()
|
|
{
|
|
_UCRT_VALIDATE_RETURN(ptd, stream.public_stream(), EINVAL, -1);
|
|
|
|
int const fh = _fileno(stream.public_stream());
|
|
|
|
if (stream->_cnt < 0)
|
|
{
|
|
stream->_cnt = 0;
|
|
}
|
|
|
|
// Get the current lowio file position. If stdio is buffering the stream,
|
|
// this position will point one past the end of the current stdio buffer.
|
|
__int64 const lowio_position = _lseeki64_internal(fh, 0, SEEK_CUR, ptd);
|
|
if (lowio_position < 0)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
// If the stream is unbuffered or no buffering is designated, we can simply
|
|
// compute the stdio position via the remaining stdio stream count:
|
|
if (!stream.has_big_buffer())
|
|
{
|
|
return lowio_position - stream->_cnt;
|
|
}
|
|
|
|
// The above lseek validates the handle, so it's okay to get the text mode:
|
|
__crt_lowio_text_mode const text_mode = _textmode(fh);
|
|
|
|
// This is the current offset into the stdio buffer; we will adjust this to
|
|
// account for translation and updates as this function progresses:
|
|
__int64 buffer_offset = stream->_ptr - stream->_base;
|
|
|
|
// If the file is in read or write mode, we need special handling for UTF-8
|
|
// and text mode files, to account for newline translation and UTF-8 to
|
|
// UTF-16 conversion:
|
|
if (stream.has_any_of(_IOWRITE | _IOREAD))
|
|
{
|
|
if (text_mode == __crt_lowio_text_mode::utf8 && _utf8translations(fh))
|
|
{
|
|
return common_ftell_translated_utf8_nolock(stream, lowio_position, ptd);
|
|
}
|
|
|
|
// For text mode files, adjust the buffer offset to account for newline
|
|
// translation:
|
|
if (_osfile(fh) & FTEXT)
|
|
{
|
|
buffer_offset += count_newline_bytes(stream->_base, stream->_ptr, text_mode);
|
|
}
|
|
}
|
|
// Otherwise, if the file is not in read/write mode, ftell cannot proceed:
|
|
else if (!stream.has_all_of(_IOUPDATE))
|
|
{
|
|
ptd.get_errno().set(EINVAL);
|
|
return -1;
|
|
}
|
|
|
|
// If the current lowio position is at the beginning of the file, the stdio
|
|
// position is whatever the offset is:
|
|
if (lowio_position == 0)
|
|
{
|
|
return buffer_offset;
|
|
}
|
|
|
|
if (stream.has_all_of(_IOREAD))
|
|
{
|
|
return common_ftell_read_mode_nolock(stream, lowio_position, buffer_offset, ptd);
|
|
}
|
|
|
|
if (text_mode == __crt_lowio_text_mode::utf8)
|
|
{
|
|
buffer_offset /= sizeof(wchar_t);
|
|
}
|
|
|
|
return lowio_position + buffer_offset;
|
|
}
|
|
|
|
template <>
|
|
long __cdecl common_ftell_nolock(__crt_stdio_stream const stream, __crt_cached_ptd_host& ptd) throw()
|
|
{
|
|
__int64 const result = common_ftell_nolock<__int64>(stream, ptd);
|
|
if (result > LONG_MAX)
|
|
{
|
|
ptd.get_errno().set(EINVAL);
|
|
return -1;
|
|
}
|
|
|
|
return static_cast<long>(result);
|
|
}
|
|
|
|
|
|
|
|
// Queries the position of the file pointer of a stream. This function computes
|
|
// the position of the pointer, accounting for stdio buffering. This is not the
|
|
// same as fseek(stream, 0, SEEK_SET), because fseek will remove an ungetc and
|
|
// may flush buffers.
|
|
//
|
|
// Returns the present file position on success; returns -1 and sets errno on
|
|
// failure.
|
|
template <typename Integer>
|
|
static Integer __cdecl common_ftell(__crt_stdio_stream const stream, __crt_cached_ptd_host& ptd) throw()
|
|
{
|
|
_UCRT_VALIDATE_RETURN(ptd, stream.valid(), EINVAL, -1);
|
|
|
|
Integer return_value = 0;
|
|
|
|
_lock_file(stream.public_stream());
|
|
__try
|
|
{
|
|
return_value = common_ftell_nolock<Integer>(stream, ptd);
|
|
}
|
|
__finally
|
|
{
|
|
_unlock_file(stream.public_stream());
|
|
}
|
|
__endtry
|
|
|
|
return return_value;
|
|
}
|
|
|
|
|
|
|
|
|
|
extern "C" long __cdecl ftell(FILE* const public_stream)
|
|
{
|
|
__crt_cached_ptd_host ptd;
|
|
return common_ftell<long>(__crt_stdio_stream(public_stream), ptd);
|
|
}
|
|
|
|
extern "C" long __cdecl _ftell_nolock(FILE* const public_stream)
|
|
{
|
|
__crt_cached_ptd_host ptd;
|
|
return common_ftell_nolock<long>(__crt_stdio_stream(public_stream), ptd);
|
|
}
|
|
|
|
extern "C" __int64 __cdecl _ftelli64(FILE* const public_stream)
|
|
{
|
|
__crt_cached_ptd_host ptd;
|
|
return common_ftell<__int64>(__crt_stdio_stream(public_stream), ptd);
|
|
}
|
|
|
|
extern "C" __int64 __cdecl _ftelli64_nolock(FILE* const public_stream)
|
|
{
|
|
__crt_cached_ptd_host ptd;
|
|
return common_ftell_nolock<__int64>(__crt_stdio_stream(public_stream), ptd);
|
|
}
|
|
|
|
extern "C" __int64 __cdecl _ftelli64_nolock_internal(FILE* const public_stream, __crt_cached_ptd_host& ptd)
|
|
{
|
|
return common_ftell_nolock<__int64>(__crt_stdio_stream(public_stream), ptd);
|
|
}
|