- Use a helper function for copying little chunks of memory (1, 2, 4 and 8 bytes) because a profiling of ntvdm showed that especially 1 and 2 bytes of memory were read the most, and calling RtlCopy/MoveMemory for intensively copying 1 or 2 bytes was shown to be inefficient. We also don't use directly intrinsics/builtins because the compiler cannot know in advance the size of the memory to be copied, it cannot perform the required optimizations. It was checked that using the builtin-memcpy or memmove of GCC when compiling the program in release+full optimization mode just embedded a call to _memcpy, and naively using the movsX intrinsics of MSVC does not do the job of "moving" memory taking into account for the possible overlaps. Therefore, for small sizes (<= 8 bytes), we use copy assignments, whereas for large sizes (and for 3, 5, 7, 9+ bytes) we use the regular method of calling RtlMoveMemory. We gain ~=10% speed with this optimization.
- Also I use >> and & for dividing by 4 and 2 instead of the regular / and % operations because they are not optimized otherwise by default by MSVC (they are however, if you explicitely enable optimizations).

svn path=/trunk/; revision=64792
This commit is contained in:
Hermès Bélusca-Maïto 2014-10-17 22:08:51 +00:00
parent 44a73d10bc
commit f9ec8698fc
2 changed files with 88 additions and 28 deletions

View file

@ -60,6 +60,75 @@ LPCWSTR ExceptionName[] =
/* PRIVATE FUNCTIONS **********************************************************/
static inline VOID
EmulatorMoveMemory(OUT VOID UNALIGNED *Destination,
IN const VOID UNALIGNED *Source,
IN SIZE_T Length)
{
#if 1
/*
* We use a switch here to detect small moves of memory, as these
* constitute the bulk of our moves.
* Using RtlMoveMemory for all these small moves would be slow otherwise.
*/
switch (Length)
{
case 0:
return;
case sizeof(UCHAR):
*(PUCHAR)Destination = *(PUCHAR)Source;
return;
case sizeof(USHORT):
*(PUSHORT)Destination = *(PUSHORT)Source;
return;
case sizeof(ULONG):
*(PULONG)Destination = *(PULONG)Source;
return;
case sizeof(ULONGLONG):
*(PULONGLONG)Destination = *(PULONGLONG)Source;
return;
default:
#if defined(__GNUC__)
__builtin_memmove(Destination, Source, Length);
#else
RtlMoveMemory(Destination, Source, Length);
#endif
}
#else // defined(_MSC_VER)
PUCHAR Dest = (PUCHAR)Destination;
PUCHAR Src = (PUCHAR)Source;
SIZE_T Count, NewSize = Length;
/* Move dword */
Count = NewSize >> 2; // NewSize / sizeof(ULONG);
NewSize = NewSize & 3; // NewSize % sizeof(ULONG);
__movsd(Dest, Src, Count);
Dest += Count << 2; // Count * sizeof(ULONG);
Src += Count << 2;
/* Move word */
Count = NewSize >> 1; // NewSize / sizeof(USHORT);
NewSize = NewSize & 1; // NewSize % sizeof(USHORT);
__movsw(Dest, Src, Count);
Dest += Count << 1; // Count * sizeof(USHORT);
Src += Count << 1;
/* Move byte */
Count = NewSize; // NewSize / sizeof(UCHAR);
// NewSize = NewSize; // NewSize % sizeof(UCHAR);
__movsb(Dest, Src, Count);
#endif
}
VOID WINAPI EmulatorReadMemory(PFAST486_STATE State, ULONG Address, PVOID Buffer, ULONG Size)
{
UNREFERENCED_PARAMETER(State);
@ -91,7 +160,7 @@ VOID WINAPI EmulatorReadMemory(PFAST486_STATE State, ULONG Address, PVOID Buffer
}
/* Read the data from the virtual address space and store it in the buffer */
RtlCopyMemory(Buffer, REAL_TO_PHYS(Address), Size);
EmulatorMoveMemory(Buffer, REAL_TO_PHYS(Address), Size);
}
VOID WINAPI EmulatorWriteMemory(PFAST486_STATE State, ULONG Address, PVOID Buffer, ULONG Size)
@ -112,7 +181,7 @@ VOID WINAPI EmulatorWriteMemory(PFAST486_STATE State, ULONG Address, PVOID Buffe
if ((Address + Size) >= ROM_AREA_START && (Address < ROM_AREA_END)) return;
/* Read the data from the buffer and store it in the virtual address space */
RtlCopyMemory(REAL_TO_PHYS(Address), Buffer, Size);
EmulatorMoveMemory(REAL_TO_PHYS(Address), Buffer, Size);
/*
* Check if we modified the VGA memory.

View file

@ -100,8 +100,7 @@ IOReadStrB(ULONG Port,
}
else
{
while (Count--)
*Buffer++ = IOReadB(Port);
while (Count--) *Buffer++ = IOReadB(Port);
}
}
@ -196,8 +195,7 @@ IOReadStrW(ULONG Port,
}
else
{
while (Count--)
*Buffer++ = IOReadW(Port);
while (Count--) *Buffer++ = IOReadW(Port);
}
}
@ -278,8 +276,7 @@ IOReadStrD(ULONG Port,
}
else
{
while (Count--)
*Buffer++ = IOReadD(Port);
while (Count--) *Buffer++ = IOReadD(Port);
}
}
@ -379,7 +376,7 @@ EmulatorReadIo(PFAST486_STATE State,
}
else
{
PBYTE Address = (PBYTE)Buffer;
PUCHAR Address = (PUCHAR)Buffer;
while (DataCount--)
{
@ -388,8 +385,8 @@ EmulatorReadIo(PFAST486_STATE State,
UCHAR NewDataSize = DataSize;
/* Read dword */
Count = NewDataSize / sizeof(ULONG);
NewDataSize = NewDataSize % sizeof(ULONG);
Count = NewDataSize >> 2; // NewDataSize / sizeof(ULONG);
NewDataSize = NewDataSize & 3; // NewDataSize % sizeof(ULONG);
while (Count--)
{
*(PULONG)Address = IOReadD(CurrentPort);
@ -398,8 +395,8 @@ EmulatorReadIo(PFAST486_STATE State,
}
/* Read word */
Count = NewDataSize / sizeof(USHORT);
NewDataSize = NewDataSize % sizeof(USHORT);
Count = NewDataSize >> 1; // NewDataSize / sizeof(USHORT);
NewDataSize = NewDataSize & 1; // NewDataSize % sizeof(USHORT);
while (Count--)
{
*(PUSHORT)Address = IOReadW(CurrentPort);
@ -408,17 +405,14 @@ EmulatorReadIo(PFAST486_STATE State,
}
/* Read byte */
Count = NewDataSize / sizeof(UCHAR);
NewDataSize = NewDataSize % sizeof(UCHAR);
Count = NewDataSize; // NewDataSize / sizeof(UCHAR);
// NewDataSize = NewDataSize % sizeof(UCHAR);
while (Count--)
{
*(PUCHAR)Address = IOReadB(CurrentPort);
CurrentPort += sizeof(UCHAR);
Address += sizeof(UCHAR);
}
ASSERT(Count == 0);
ASSERT(NewDataSize == 0);
}
}
}
@ -457,7 +451,7 @@ EmulatorWriteIo(PFAST486_STATE State,
}
else
{
PBYTE Address = (PBYTE)Buffer;
PUCHAR Address = (PUCHAR)Buffer;
while (DataCount--)
{
@ -466,8 +460,8 @@ EmulatorWriteIo(PFAST486_STATE State,
UCHAR NewDataSize = DataSize;
/* Write dword */
Count = NewDataSize / sizeof(ULONG);
NewDataSize = NewDataSize % sizeof(ULONG);
Count = NewDataSize >> 2; // NewDataSize / sizeof(ULONG);
NewDataSize = NewDataSize & 3; // NewDataSize % sizeof(ULONG);
while (Count--)
{
IOWriteD(CurrentPort, *(PULONG)Address);
@ -476,8 +470,8 @@ EmulatorWriteIo(PFAST486_STATE State,
}
/* Write word */
Count = NewDataSize / sizeof(USHORT);
NewDataSize = NewDataSize % sizeof(USHORT);
Count = NewDataSize >> 1; // NewDataSize / sizeof(USHORT);
NewDataSize = NewDataSize & 1; // NewDataSize % sizeof(USHORT);
while (Count--)
{
IOWriteW(CurrentPort, *(PUSHORT)Address);
@ -486,17 +480,14 @@ EmulatorWriteIo(PFAST486_STATE State,
}
/* Write byte */
Count = NewDataSize / sizeof(UCHAR);
NewDataSize = NewDataSize % sizeof(UCHAR);
Count = NewDataSize; // NewDataSize / sizeof(UCHAR);
// NewDataSize = NewDataSize % sizeof(UCHAR);
while (Count--)
{
IOWriteB(CurrentPort, *(PUCHAR)Address);
CurrentPort += sizeof(UCHAR);
Address += sizeof(UCHAR);
}
ASSERT(Count == 0);
ASSERT(NewDataSize == 0);
}
}
}