[UMKM_APITEST] Add test for extended state saving

This commit is contained in:
Timo Kreuzer 2025-02-04 12:19:22 +02:00
parent d8bfa93f21
commit 84c205bd06
4 changed files with 555 additions and 0 deletions

View file

@ -3,15 +3,18 @@ include_directories(${REACTOS_SOURCE_DIR}/ntoskrnl/include)
list(APPEND SOURCE
SystemCall.c
XStateSave.c
precomp.h)
if(ARCH STREQUAL "i386")
add_asm_files(umkm_apitest_asm
i386/SystemCall_asm.s
i386/XState_asm.s
)
elseif(ARCH STREQUAL "amd64")
add_asm_files(umkm_apitest_asm
amd64/SystemCall_asm.s
i386/XState_asm.s
)
endif()

View file

@ -0,0 +1,355 @@
/*
* PROJECT: ReactOS API Tests
* LICENSE: MIT (https://spdx.org/licenses/MIT)
* PURPOSE: Tests for extended state
* COPYRIGHT: Copyright 2025 Timo Kreuzer <timo.kreuzer@reactos.org>
*/
#include "precomp.h"
#include <windows.h>
static ULONG s_ProcessorNumber;
static
VOID
AdjustAffinity(VOID)
{
KAFFINITY OldAffinity, NewAffinity;
/* Set affinity to the current test processor */
NewAffinity = (KAFFINITY)1 << s_ProcessorNumber;
OldAffinity = SetThreadAffinityMask(GetCurrentThread(), NewAffinity);
ok(OldAffinity != 0, "SetThreadAffinityMask(0x%Ix) failed\n", NewAffinity);
}
typedef struct _DECLSPEC_INTRIN_TYPE _CRT_ALIGN(16) _M128U64
{
unsigned __int64 u64[2];
} M128U64;
typedef struct _DECLSPEC_INTRIN_TYPE _CRT_ALIGN(32) _M256U64
{
unsigned __int64 u64[4];
} M256U64;
typedef struct _DECLSPEC_INTRIN_TYPE _CRT_ALIGN(64) _M512U64
{
unsigned __int64 u64[8];
} M512U64;
#ifdef _M_IX86
#define REG_COUNT_SSE 8
#define REG_COUNT_AVX 8
#define REG_COUNT_AVX512 8
#else
#define REG_COUNT_SSE 16
#define REG_COUNT_AVX 16
#define REG_COUNT_AVX512 32
#endif
void __fastcall set_SSE_state(const M128U64 data[REG_COUNT_SSE]);
void __fastcall get_SSE_state(M128U64 data[REG_COUNT_SSE]);
void __fastcall set_AVX_state(const M256U64 data[REG_COUNT_AVX]);
void __fastcall get_AVX_state(M256U64 data[REG_COUNT_AVX]);
void __fastcall set_AVX512_state(const M512U64 data[REG_COUNT_AVX512]);
void __fastcall get_AVX512_state(M512U64 data[REG_COUNT_AVX512]);
BOOL ok_eq_m128i_(M128U64 a, M128U64 b, const char* variable, unsigned int line)
{
BOOL equal = !memcmp(&a, &b, sizeof(M128U64));
ok_(__FILE__, line)(equal, "Variable %s Expected %I64x %I64x, got %I64x %I64x\n",
variable, b.u64[1], b.u64[0], a.u64[1], a.u64[0]);
return TRUE;
}
#define ok_eq_m128i(a, b) ok_eq_m128i_(a, b, #a, __LINE__)
BOOL ok_eq_m256i_(M256U64 a, M256U64 b, const char* variable, unsigned int line)
{
BOOL equal = !memcmp(&a, &b, sizeof(M256U64));
ok_(__FILE__, line)(equal, "Variable %s Expected %I64x %I64x %I64x %I64x, got %I64x %I64x %I64x %I64x\n",
variable, b.u64[3], b.u64[2], b.u64[1], b.u64[0],
a.u64[3], a.u64[2], a.u64[1], a.u64[0]);
return TRUE;
}
#define ok_eq_m256i(a, b) ok_eq_m256i_(a, b, #a, __LINE__)
BOOL ok_eq_m512i_(M512U64 a, M512U64 b, const char* variable, unsigned int line)
{
BOOL equal = !memcmp(&a, &b, sizeof(M512U64));
ok_(__FILE__, line)(equal, "Variable %s Expected %I64x %I64x %I64x %I64x %I64x %I64x %I64x %I64x, got %I64x %I64x %I64x %I64x %I64x %I64x %I64x %I64x\n",
variable,
b.u64[7], b.u64[6], b.u64[5], b.u64[4],
b.u64[3], b.u64[2], b.u64[1], b.u64[0],
a.u64[7], a.u64[6], a.u64[5], a.u64[4],
a.u64[3], a.u64[2], a.u64[1], a.u64[0]);
return TRUE;
}
#define ok_eq_m512i(a, b) ok_eq_m512i_(a, b, #a, __LINE__)
ULONG g_randomSeed = 0x12345678;
ULONG64 GenRandom64(VOID)
{
ULONG Low32 = RtlRandom(&g_randomSeed);
ULONG High32 = RtlRandom(&g_randomSeed);
return ((ULONG64)High32 << 32) | Low32;
}
VOID RandomFill128(M128U64* Data128, ULONG Count)
{
for (ULONG i = 0; i < Count; i++)
{
Data128[i].u64[0] = GenRandom64();
Data128[i].u64[1] = GenRandom64();
}
}
VOID RandomFill256(M256U64* Data256, ULONG Count)
{
for (ULONG i = 0; i < Count; i++)
{
Data256[i].u64[0] = GenRandom64();
Data256[i].u64[1] = GenRandom64();
Data256[i].u64[2] = GenRandom64();
Data256[i].u64[3] = GenRandom64();
}
}
VOID RandomFill512(M512U64* Data512, ULONG Count)
{
for (ULONG i = 0; i < Count; i++)
{
Data512[i].u64[0] = GenRandom64();
Data512[i].u64[1] = GenRandom64();
Data512[i].u64[2] = GenRandom64();
Data512[i].u64[3] = GenRandom64();
Data512[i].u64[4] = GenRandom64();
Data512[i].u64[5] = GenRandom64();
Data512[i].u64[6] = GenRandom64();
Data512[i].u64[7] = GenRandom64();
}
}
static DWORD WINAPI Thread_SSE(LPVOID Parameter)
{
AdjustAffinity();
// Get the current (fresh) state
M128U64 SseState[REG_COUNT_SSE] = { 0 };
get_SSE_state(SseState);
// Make sure it's all zero
static const M128U64 Zero128 = { 0 };
for (ULONG i = 0; i < ARRAYSIZE(SseState); i++)
{
ok_eq_m128i(SseState[i], Zero128);
}
// Set a new "random" state
RandomFill128(SseState, ARRAYSIZE(SseState));
set_SSE_state(SseState);
return 0;
}
void Test_SSE(void)
{
if (!IsProcessorFeaturePresent(PF_XMMI_INSTRUCTIONS_AVAILABLE))
{
skip("SSE not supported\n");
return;
}
// Fill the array with random numbers
M128U64 InSseState[REG_COUNT_SSE];
RandomFill128(InSseState, ARRAYSIZE(InSseState));
_SEH2_TRY
{
// Set the state
set_SSE_state(InSseState);
}
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)
{
ok(FALSE, "Failed to set SSE state\n");
return;
}
_SEH2_END;
// Run a different thread that uses SSE
HANDLE hThread = CreateThread(NULL, 0, Thread_SSE, NULL, 0, NULL);
ok(hThread != NULL, "CreateThread failed\n");
if (hThread == NULL)
{
skip("CreateThread failed\n");
return;
}
WaitForSingleObject(hThread, INFINITE);
CloseHandle(hThread);
// Get the state
M128U64 OutSseState[REG_COUNT_SSE] = { 0 };
get_SSE_state(OutSseState);
// Validte the state of the non-volatile registers
for (ULONG i = 6; i < REG_COUNT_SSE; i++)
{
ok_eq_m128i(OutSseState[i], InSseState[i]);
}
}
static DWORD WINAPI Thread_AVX(LPVOID Parameter)
{
AdjustAffinity();
// Get the current (fresh) state
M256U64 AvxState[REG_COUNT_AVX];
get_AVX_state(AvxState);
// Make sure it's all zero
static const M256U64 Zero256 = { 0 };
for (ULONG i = 0; i < ARRAYSIZE(AvxState); i++)
{
ok_eq_m256i(AvxState[i], Zero256);
}
// Set a new "random" state
RandomFill256(AvxState, ARRAYSIZE(AvxState));
set_AVX_state(AvxState);
return 0;
}
void Test_AVX(void)
{
if (!IsProcessorFeaturePresent(PF_AVX_INSTRUCTIONS_AVAILABLE))
{
skip("AVX not supported\n");
return;
}
// Fill the array with random numbers
M256U64 InAvxState[REG_COUNT_AVX];
RandomFill256(InAvxState, ARRAYSIZE(InAvxState));
_SEH2_TRY
{
// Set the state
set_AVX_state(InAvxState);
}
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)
{
ok(FALSE, "Failed to set AVX state\n");
return;
}
_SEH2_END;
// Run a different thread that uses AVX
HANDLE hThread = CreateThread(NULL, 0, Thread_AVX, NULL, 0, NULL);
ok(hThread != NULL, "CreateThread failed\n");
if (hThread == NULL)
{
skip("CreateThread failed\n");
return;
}
WaitForSingleObject(hThread, INFINITE);
CloseHandle(hThread);
// Get the state
M256U64 OutAvxState[REG_COUNT_AVX] = { 0 };
get_AVX_state(OutAvxState);
// Validte the state of the non-volatile registers
for (ULONG i = 6; i < ARRAYSIZE(OutAvxState); i++)
{
ok_eq_m256i(OutAvxState[i], InAvxState[i]);
}
}
static DWORD WINAPI Thread_AVX512(LPVOID Parameter)
{
AdjustAffinity();
// Get the current (fresh) state
M512U64 Avx512State[REG_COUNT_AVX512];
get_AVX512_state(Avx512State);
// Make sure it's all zero
static const M512U64 Zero512 = { 0 };
for (ULONG i = 0; i < ARRAYSIZE(Avx512State); i++)
{
ok_eq_m512i(Avx512State[i], Zero512);
}
// Set a new "random" state
RandomFill512(Avx512State, ARRAYSIZE(Avx512State));
set_AVX512_state(Avx512State);
return 0;
}
void Test_AVX512(void)
{
if (!IsProcessorFeaturePresent(PF_AVX512F_INSTRUCTIONS_AVAILABLE))
{
skip("AVX512 not supported\n");
return;
}
// Fill the array with random numbers
M512U64 InAvx512State[REG_COUNT_AVX512];
RandomFill512(InAvx512State, ARRAYSIZE(InAvx512State));
_SEH2_TRY
{
// Set the state
set_AVX512_state(InAvx512State);
}
_SEH2_EXCEPT(EXCEPTION_EXECUTE_HANDLER)
{
ok(FALSE, "Failed to set AVX512 state\n");
return;
}
_SEH2_END;
// Run a different thread that uses AVX512
HANDLE hThread = CreateThread(NULL, 0, Thread_AVX512, NULL, 0, NULL);
ok(hThread != NULL, "CreateThread failed\n");
if (hThread == NULL)
{
skip("CreateThread failed\n");
return;
}
WaitForSingleObject(hThread, INFINITE);
CloseHandle(hThread);
// Get the state
M512U64 OutAvx512State[REG_COUNT_AVX512] = { 0 };
get_AVX512_state(OutAvx512State);
// Validte the state of the non-volatile registers
for (ULONG i = 6; i < ARRAYSIZE(OutAvx512State); i++)
{
ok_eq_m512i(OutAvx512State[i], InAvx512State[i]);
}
}
START_TEST(XStateSave)
{
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
for (s_ProcessorNumber = 0;
s_ProcessorNumber < sysinfo.dwNumberOfProcessors;
s_ProcessorNumber++)
{
AdjustAffinity();
Test_SSE();
Test_AVX();
Test_AVX512();
}
}

View file

@ -0,0 +1,195 @@
/*
* PROJECT: ReactOS API Tests
* LICENSE: MIT (https://spdx.org/licenses/MIT)
* PURPOSE: Assembly helpers for extended state tests
* COPYRIGHT: Copyright 2025 Timo Kreuzer <timo.kreuzer@reactos.org>
*/
#include <asm.inc>
#ifdef _M_IX86
#define rcx ecx
#define get_SSE_state @get_SSE_state@4
#define set_SSE_state @set_SSE_state@4
#define get_AVX_state @get_AVX_state@4
#define set_AVX_state @set_AVX_state@4
#define get_AVX512_state @get_AVX512_state@4
#define set_AVX512_state @set_AVX512_state@4
.code
#else
.code64
#endif
// void __fastcall get_SSE_state(__m128i data[16]);
PUBLIC get_SSE_state
get_SSE_state:
movaps [rcx + 0 * 16], xmm0
movaps [rcx + 1 * 16], xmm1
movaps [rcx + 2 * 16], xmm2
movaps [rcx + 3 * 16], xmm3
movaps [rcx + 4 * 16], xmm4
movaps [rcx + 5 * 16], xmm5
movaps [rcx + 6 * 16], xmm6
movaps [rcx + 7 * 16], xmm7
#ifndef _M_IX86
movaps [rcx + 8 * 16], xmm8
movaps [rcx + 9 * 16], xmm9
movaps [rcx + 10 * 16], xmm10
movaps [rcx + 11 * 16], xmm11
movaps [rcx + 12 * 16], xmm12
movaps [rcx + 13 * 16], xmm13
movaps [rcx + 14 * 16], xmm14
movaps [rcx + 15 * 16], xmm15
#endif
ret
// void __fastcall set_SSE_state(__m128i data[16]);
PUBLIC set_SSE_state
set_SSE_state:
movaps xmm0, [rcx + 0 * 16]
movaps xmm1, [rcx + 1 * 16]
movaps xmm2, [rcx + 2 * 16]
movaps xmm3, [rcx + 3 * 16]
movaps xmm4, [rcx + 4 * 16]
movaps xmm5, [rcx + 5 * 16]
movaps xmm6, [rcx + 6 * 16]
movaps xmm7, [rcx + 7 * 16]
#ifndef _M_IX86
movaps xmm8, [rcx + 8 * 16]
movaps xmm9, [rcx + 9 * 16]
movaps xmm10, [rcx + 10 * 16]
movaps xmm11, [rcx + 11 * 16]
movaps xmm12, [rcx + 12 * 16]
movaps xmm13, [rcx + 13 * 16]
movaps xmm14, [rcx + 14 * 16]
movaps xmm15, [rcx + 15 * 16]
#endif
ret
// void __fastcall get_AVX_state(__m256i data[16]);
PUBLIC get_AVX_state
get_AVX_state:
vmovaps [rcx + 0 * 32], ymm0
vmovaps [rcx + 1 * 32], ymm1
vmovaps [rcx + 2 * 32], ymm2
vmovaps [rcx + 3 * 32], ymm3
vmovaps [rcx + 4 * 32], ymm4
vmovaps [rcx + 5 * 32], ymm5
vmovaps [rcx + 6 * 32], ymm6
vmovaps [rcx + 7 * 32], ymm7
#ifndef _M_IX86
vmovaps [rcx + 8 * 32], ymm8
vmovaps [rcx + 9 * 32], ymm9
vmovaps [rcx + 10 * 32], ymm10
vmovaps [rcx + 11 * 32], ymm11
vmovaps [rcx + 12 * 32], ymm12
vmovaps [rcx + 13 * 32], ymm13
vmovaps [rcx + 14 * 32], ymm14
vmovaps [rcx + 15 * 32], ymm15
#endif
ret
// void __fastcall set_AVX_state(__m256i data[16]);
PUBLIC set_AVX_state
set_AVX_state:
vmovaps ymm0, [rcx + 0 * 32]
vmovaps ymm1, [rcx + 1 * 32]
vmovaps ymm2, [rcx + 2 * 32]
vmovaps ymm3, [rcx + 3 * 32]
vmovaps ymm4, [rcx + 4 * 32]
vmovaps ymm5, [rcx + 5 * 32]
vmovaps ymm6, [rcx + 6 * 32]
vmovaps ymm7, [rcx + 7 * 32]
#ifndef _M_IX86
vmovaps ymm8, [rcx + 8 * 32]
vmovaps ymm9, [rcx + 9 * 32]
vmovaps ymm10, [rcx + 10 * 32]
vmovaps ymm11, [rcx + 11 * 32]
vmovaps ymm12, [rcx + 12 * 32]
vmovaps ymm13, [rcx + 13 * 32]
vmovaps ymm14, [rcx + 14 * 32]
vmovaps ymm15, [rcx + 15 * 32]
#endif
ret
// void __fastcall get_AVX512_state(M512U64 data[16]);
PUBLIC get_AVX512_state
get_AVX512_state:
vmovaps [rcx + 0 * 64], zmm0
vmovaps [rcx + 1 * 64], zmm1
vmovaps [rcx + 2 * 64], zmm2
vmovaps [rcx + 3 * 64], zmm3
vmovaps [rcx + 4 * 64], zmm4
vmovaps [rcx + 5 * 64], zmm5
vmovaps [rcx + 6 * 64], zmm6
vmovaps [rcx + 7 * 64], zmm7
#ifndef _M_IX86
vmovaps [rcx + 8 * 64], zmm8
vmovaps [rcx + 9 * 64], zmm9
vmovaps [rcx + 10 * 64], zmm10
vmovaps [rcx + 11 * 64], zmm11
vmovaps [rcx + 12 * 64], zmm12
vmovaps [rcx + 13 * 64], zmm13
vmovaps [rcx + 14 * 64], zmm14
vmovaps [rcx + 15 * 64], zmm15
vmovaps [rcx + 16 * 64], zmm16
vmovaps [rcx + 17 * 64], zmm17
vmovaps [rcx + 18 * 64], zmm18
vmovaps [rcx + 19 * 64], zmm19
vmovaps [rcx + 20 * 64], zmm20
vmovaps [rcx + 21 * 64], zmm21
vmovaps [rcx + 22 * 64], zmm22
vmovaps [rcx + 23 * 64], zmm23
vmovaps [rcx + 24 * 64], zmm24
vmovaps [rcx + 25 * 64], zmm25
vmovaps [rcx + 26 * 64], zmm26
vmovaps [rcx + 27 * 64], zmm27
vmovaps [rcx + 28 * 64], zmm28
vmovaps [rcx + 29 * 64], zmm29
vmovaps [rcx + 30 * 64], zmm30
vmovaps [rcx + 31 * 64], zmm31
#endif
ret
// void __fastcall set_AVX512_state(const M512U64 data[16]);
PUBLIC set_AVX512_state
set_AVX512_state:
vmovaps zmm0, [rcx + 0 * 64]
vmovaps zmm1, [rcx + 1 * 64]
vmovaps zmm2, [rcx + 2 * 64]
vmovaps zmm3, [rcx + 3 * 64]
vmovaps zmm4, [rcx + 4 * 64]
vmovaps zmm5, [rcx + 5 * 64]
vmovaps zmm6, [rcx + 6 * 64]
vmovaps zmm7, [rcx + 7 * 64]
#ifndef _M_IX86
vmovaps zmm8, [rcx + 8 * 64]
vmovaps zmm9, [rcx + 9 * 64]
vmovaps zmm10, [rcx + 10 * 64]
vmovaps zmm11, [rcx + 11 * 64]
vmovaps zmm12, [rcx + 12 * 64]
vmovaps zmm13, [rcx + 13 * 64]
vmovaps zmm14, [rcx + 14 * 64]
vmovaps zmm15, [rcx + 15 * 64]
vmovaps zmm16, [rcx + 16 * 64]
vmovaps zmm17, [rcx + 17 * 64]
vmovaps zmm18, [rcx + 18 * 64]
vmovaps zmm19, [rcx + 19 * 64]
vmovaps zmm20, [rcx + 20 * 64]
vmovaps zmm21, [rcx + 21 * 64]
vmovaps zmm22, [rcx + 22 * 64]
vmovaps zmm23, [rcx + 23 * 64]
vmovaps zmm24, [rcx + 24 * 64]
vmovaps zmm25, [rcx + 25 * 64]
vmovaps zmm26, [rcx + 26 * 64]
vmovaps zmm27, [rcx + 27 * 64]
vmovaps zmm28, [rcx + 28 * 64]
vmovaps zmm29, [rcx + 29 * 64]
vmovaps zmm30, [rcx + 30 * 64]
vmovaps zmm31, [rcx + 31 * 64]
#endif
ret
END

View file

@ -4,10 +4,12 @@
#include <apitest.h>
extern void func_SystemCall(void);
extern void func_XStateSave(void);
const struct test winetest_testlist[] =
{
{ "SystemCall", func_SystemCall },
{ "XStateSave", func_XStateSave },
{ 0, 0 }
};