From 72fbbdf854455fcc50ca7eb7c151ad32a29be186 Mon Sep 17 00:00:00 2001 From: Timo Kreuzer Date: Tue, 14 Nov 2023 16:38:09 +0200 Subject: [PATCH] [NTOS:KE/x64] Implement KiInitializeXStateConfiguration --- ntoskrnl/include/internal/amd64/ke.h | 7 + ntoskrnl/ke/amd64/kiinit.c | 21 +- ntoskrnl/ke/amd64/xstate.c | 309 +++++++++++++++++++++++++++ ntoskrnl/ntos.cmake | 3 +- 4 files changed, 333 insertions(+), 7 deletions(-) create mode 100644 ntoskrnl/ke/amd64/xstate.c diff --git a/ntoskrnl/include/internal/amd64/ke.h b/ntoskrnl/include/internal/amd64/ke.h index 2bd4d5c8f3d..f3112871d62 100644 --- a/ntoskrnl/include/internal/amd64/ke.h +++ b/ntoskrnl/include/internal/amd64/ke.h @@ -88,6 +88,8 @@ extern "C" { #ifndef __ASM__ +extern SIZE_T KeXStateLength; + #include "intrin_i.h" typedef struct _KIDT_INIT @@ -494,6 +496,11 @@ KiProcessorFreezeHandler( _In_ PKTRAP_FRAME TrapFrame, _In_ PKEXCEPTION_FRAME ExceptionFrame); +VOID +NTAPI +KiInitializeXStateConfiguration( + _In_ ULONG Processor); + #ifdef __cplusplus } // extern "C" #endif diff --git a/ntoskrnl/ke/amd64/kiinit.c b/ntoskrnl/ke/amd64/kiinit.c index 125cf852c86..050ec6d2a49 100644 --- a/ntoskrnl/ke/amd64/kiinit.c +++ b/ntoskrnl/ke/amd64/kiinit.c @@ -207,6 +207,13 @@ KiInitializeCpu(PKIPCR Pcr) /* Disable x87 fpu exceptions */ __writecr0(__readcr0() & ~CR0_NE); + /* Check if XSAVE is supported */ + if (FeatureBits & KF_XSTATE) + { + /* Enable CR4.OSXSAVE[Bit 18] */ + __writecr4(__readcr4() | CR4_XSAVE); + } + /* LDT is unused */ __lldt(0); @@ -504,11 +511,7 @@ KiSystemStartup(IN PLOADER_PARAMETER_BLOCK LoaderBlock) /* Set the PRCB for this Processor */ KiProcessorBlock[Cpu] = &Pcr->Prcb; - /* Align stack to 16 bytes */ - LoaderBlock->KernelStack &= ~(16 - 1); - - /* Save the initial thread and stack */ - InitialStack = LoaderBlock->KernelStack; // Checkme + /* Save the initial thread */ InitialThread = (PKTHREAD)LoaderBlock->Thread; /* Set us as the current process */ @@ -562,7 +565,13 @@ KiSystemStartup(IN PLOADER_PARAMETER_BLOCK LoaderBlock) /* Machine specific kernel initialization */ if (Cpu == 0) KiInitializeKernelMachineDependent(&Pcr->Prcb, LoaderBlock); + /* Initialize extended state management */ + KiInitializeXStateConfiguration(Cpu); + + /* Calculate the initial stack pointer */ + InitialStack = ALIGN_DOWN_BY(LoaderBlock->KernelStack - KeXStateLength, 64); + /* Switch to new kernel stack and start kernel bootstrapping */ - KiSwitchToBootStack(InitialStack & ~3); + KiSwitchToBootStack(InitialStack); } diff --git a/ntoskrnl/ke/amd64/xstate.c b/ntoskrnl/ke/amd64/xstate.c new file mode 100644 index 00000000000..d78dd8e801d --- /dev/null +++ b/ntoskrnl/ke/amd64/xstate.c @@ -0,0 +1,309 @@ +/* + * PROJECT: ReactOS Kernel + * LICENSE: MIT (https://spdx.org/licenses/MIT) + * PURPOSE: Extended processor state management + * COPYRIGHT: Copyright 2025 Timo Kreuzer + */ + +#include +#include +#include +#define NDEBUG +#include + +// These are not officially documented +#define XSTATE_PKRU 9 +#define XSTATE_HDC 13 +#define XSTATE_UINTR 14 +#define XSTATE_LBR 15 +#define XSTATE_MASK_PKRU (1LL << (XSTATE_PKRU)) +#define XSTATE_MASK_HDC (1LL << (XSTATE_HDC)) +#define XSTATE_MASK_UINTR (1LL << (XSTATE_UINTR)) +#define XSTATE_MASK_LBR (1LL << (XSTATE_LBR)) + +#define XSTATE_MASK_SUPERVISOR \ + (XSTATE_MASK_IPT | \ + XSTATE_MASK_PASID | \ + XSTATE_MASK_CET_U | \ + XSTATE_MASK_CET_S | \ + XSTATE_MASK_HDC | \ + XSTATE_MASK_UINTR | \ + XSTATE_MASK_LBR) + +/*! + * \brief Determines the extended state configuration for the current processor + * + * \param XStateConfig - Pointer to a XSTATE_CONFIGURATION structure that receives the configuration + * + * \see https://windows-internals.com/cet-on-windows/#3-xstate-configuration + */ +CODE_SEG("INIT") +static +VOID +KiGetXStateConfiguration( + _Out_ PXSTATE_CONFIGURATION XStateConfig) +{ + ULONG64 SupportedUserMask; + ULONG64 SupportedSupervisorMask; + ULONG64 SupportedComponentMask; + ULONG NextUserOffset, NextSupervisorOffset, NextOffset; + + RtlZeroMemory(XStateConfig, sizeof(*XStateConfig)); + + /* Read CPUID_EXTENDED_STATE main leaf (0x0D, 0x00) */ + CPUID_EXTENDED_STATE_MAIN_LEAF_REGS ExtStateMain; + __cpuidex(ExtStateMain.AsInt32, + CPUID_EXTENDED_STATE, + CPUID_EXTENDED_STATE_MAIN_LEAF); + + /* Get the supported XCR0 bits */ + SupportedUserMask = (ULONG64)ExtStateMain.Edx << 32 | + (ULONG64)ExtStateMain.Eax.Uint32; + + /* FIXME: Temporary workaround until we have dynamic kernel stack size */ + SupportedUserMask &= ~XSTATE_MASK_LARGE_FEATURES; + + /* Mask the allowed components */ + SupportedUserMask &= XSTATE_MASK_ALLOWED; + + /* Read CPUID_EXTENDED_STATE sub-leaf (0x0D, 0x01) */ + CPUID_EXTENDED_STATE_SUB_LEAF_REGS ExtStateSub; + __cpuidex(ExtStateSub.AsInt32, + CPUID_EXTENDED_STATE, + CPUID_EXTENDED_STATE_SUB_LEAF); + + /* Save control flags */ + XStateConfig->OptimizedSave = ExtStateSub.Eax.Bits.XSAVEOPT; + XStateConfig->CompactionEnabled = ExtStateSub.Eax.Bits.XSAVEC; + XStateConfig->ExtendedFeatureDisable = ExtStateSub.Eax.Bits.Xfd; + + /* Determine supported supervisor features */ + SupportedSupervisorMask = 0; + if (ExtStateSub.Eax.Bits.XSAVES) + { + SupportedSupervisorMask = (ULONG64)ExtStateSub.Edx << 32 | + (ULONG64)ExtStateSub.Ecx.Uint32; + SupportedSupervisorMask &= XSTATE_MASK_ALLOWED & XSTATE_MASK_SUPERVISOR; + } + + /* Calculate full mask */ + SupportedComponentMask = SupportedUserMask | SupportedSupervisorMask; + + /* Basic features (always enabled) */ + XStateConfig->Features[XSTATE_LEGACY_FLOATING_POINT].Offset = 0; + XStateConfig->Features[XSTATE_LEGACY_FLOATING_POINT].Size = FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters); + XStateConfig->AllFeatures[XSTATE_LEGACY_FLOATING_POINT] = FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters); + XStateConfig->Features[XSTATE_LEGACY_SSE].Offset = FIELD_OFFSET(XSAVE_FORMAT, XmmRegisters); + XStateConfig->Features[XSTATE_LEGACY_SSE].Size = FIELD_SIZE(XSAVE_FORMAT, XmmRegisters); + XStateConfig->AllFeatures[XSTATE_LEGACY_SSE] = FIELD_SIZE(XSAVE_FORMAT, XmmRegisters); + + /* Other components start after legacy state + header */ + NextUserOffset = NextSupervisorOffset = sizeof(XSAVE_AREA); + + /* Loop all components from 2 up */ + for (ULONG Component = 2; Component < MAXIMUM_XSTATE_FEATURES; Component++) + { + ULONG64 ComponentBit = (1ULL << Component); + + /* Query component features */ + CPUID_EXTENDED_STATE_SIZE_OFFSET_REGS ExtStateComponent; + __cpuidex(ExtStateComponent.AsInt32, + CPUID_EXTENDED_STATE, + Component); + + /* Save size for all features */ + XStateConfig->AllFeatures[Component] = ExtStateComponent.Size; + + /* If the offset is 0, this component isn't valid */ + if (ExtStateComponent.Size == 0) continue; + + /* Check for components that are not OS supported */ + if ((ComponentBit & SupportedComponentMask) == 0) + { + /* This emulates weird (broken) Windows behavior */ + if ((ComponentBit & XSTATE_MASK_SUPERVISOR) == 0) + { + XStateConfig->Features[Component].Offset = ExtStateComponent.Offset; + XStateConfig->Features[Component].Size = ExtStateComponent.Size; + } + + /* Skip the rest */ + continue; + } + + /* Check if compaction is enabled */ + if (XStateConfig->CompactionEnabled) + { + /* Align the offsets, if needed */ + if (ExtStateComponent.Ecx.Bits.Aligned) + { + XStateConfig->AlignedFeatures |= ComponentBit; + NextSupervisorOffset = ALIGN_UP(NextSupervisorOffset, 64); + if ((ComponentBit & SupportedUserMask) != 0) + { + NextUserOffset = ALIGN_UP(NextUserOffset, 64); + } + } + + /* Update the supervisor offset */ + NextSupervisorOffset += ExtStateComponent.Size; + + /* For user components save and update the offset and size */ + if ((ComponentBit & SupportedUserMask) != 0) + { + XStateConfig->Features[Component].Offset = NextUserOffset; + XStateConfig->Features[Component].Size = ExtStateComponent.Size; + NextUserOffset += ExtStateComponent.Size; + } + } + else + { + /* Not compacted, use the offset and size specified by the CPUID */ + NextOffset = ExtStateComponent.Offset + ExtStateComponent.Size; + NextSupervisorOffset = max(NextSupervisorOffset, NextOffset); + + /* For user components save and update the offset and size */ + if ((ComponentBit & SupportedUserMask) != 0) + { + XStateConfig->Features[Component].Offset = ExtStateComponent.Offset; + XStateConfig->Features[Component].Size = ExtStateComponent.Size; + NextUserOffset = max(NextUserOffset, NextOffset); + } + } + } + + /* Save the features to be enabled */ + XStateConfig->EnabledFeatures = SupportedUserMask; + XStateConfig->EnabledVolatileFeatures = + SupportedUserMask & ~XSTATE_MASK_PERSISTENT; + XStateConfig->EnabledSupervisorFeatures = SupportedSupervisorMask; + XStateConfig->EnabledUserVisibleSupervisorFeatures = + SupportedSupervisorMask & XSTATE_MASK_USER_VISIBLE_SUPERVISOR; + + /* Save the calculated sizes */ + XStateConfig->Size = NextUserOffset; + XStateConfig->AllFeatureSize = NextSupervisorOffset; + ASSERT(XStateConfig->AllFeatureSize >= XStateConfig->Size); +} + +/*! + * \brief Validates the provided extended state configuration against the global one + * + * \param XStateConfig - Pointer to a XSTATE_CONFIGURATION structure containing the configuration + */ +CODE_SEG("INIT") +static +VOID +ValidateXStateConfig( + _In_ PXSTATE_CONFIGURATION XState) +{ + PXSTATE_CONFIGURATION GlobalXState = &SharedUserData->XState; + + if ((XState->EnabledFeatures != GlobalXState->EnabledFeatures) || + (XState->EnabledSupervisorFeatures != GlobalXState->EnabledSupervisorFeatures) || + (XState->Size != GlobalXState->Size) || + (XState->AllFeatureSize != GlobalXState->AllFeatureSize)) + { + /* Invalid features */ + KeBugCheck(MULTIPROCESSOR_CONFIGURATION_NOT_SUPPORTED); + } + + for (ULONG i = 0; i < MAXIMUM_XSTATE_FEATURES; i++) + { + if ((XState->Features[i].Size != GlobalXState->Features[i].Size) || + (XState->Features[i].Offset != GlobalXState->Features[i].Offset) || + (XState->AllFeatures[i] != GlobalXState->AllFeatures[i])) + { + /* Invalid features */ + KeBugCheck(MULTIPROCESSOR_CONFIGURATION_NOT_SUPPORTED); + } + } +} + +/*! + * \brief Initializes the extended state configuration for the current processor + * + * \param ProcessorNumber - Number of the current processor + */ +CODE_SEG("INIT") +VOID +NTAPI +KiInitializeXStateConfiguration( + _In_ ULONG ProcessorNumber) +{ + /* Check if XSAVE is supported */ + if ((KeFeatureBits & KF_XSTATE) == 0) + { + /* XSAVE is not supported */ + return; + } + + if (ProcessorNumber == 0) + { + /* Processor 0: Retrieve the global configuration */ + KiGetXStateConfiguration(&SharedUserData->XState); + + if (SharedUserData->XState.AllFeatureSize == 0) + { + KeFeatureBits &= ~KF_XSTATE; + return; + } + + KeXStateLength = SharedUserData->XState.AllFeatureSize; + } + else + { + /* Processor 1+: validate the configuration against the global one */ + XSTATE_CONFIGURATION XState; + KiGetXStateConfiguration(&XState); + ValidateXStateConfig(&XState); + } + + /* Enable the user mode components in XCR0 */ + _xsetbv(0, SharedUserData->XState.EnabledFeatures); + + /* Now that we have set everything up, query CPUID again to get the required + size based on components enabled in XCR0 */ + CPUID_EXTENDED_STATE_MAIN_LEAF_REGS ExtStateMain; + __cpuidex(ExtStateMain.AsInt32, + CPUID_EXTENDED_STATE, + CPUID_EXTENDED_STATE_MAIN_LEAF); + + /* CPUID 0xD, leaf 0, EBX should return the size required by all components + enabled in XCR0 and thus match our calculation. But VBox doesn't handle + this correctly and simply returns the full size of all *supported* + features, independent of XCR0. We check and warn. */ + if (ExtStateMain.Ebx > SharedUserData->XState.Size) + { + DPRINT1("Processor %lu, CPUID 0xD, leaf 0, EBX returns 0x%x, but we calculated 0x%lx\n", + ProcessorNumber, + ExtStateMain.Ebx, + SharedUserData->XState.Size); + } + + /* Check if we have any supervisor components enabled */ + if (SharedUserData->XState.EnabledSupervisorFeatures != 0) + { + /* Enable the supervisor components in IA32_XSS */ + __writemsr(MSR_IA32_XSS, SharedUserData->XState.EnabledSupervisorFeatures); + + /* Get the required size for features enabled in both XCR0 and IA32_XSS */ + CPUID_EXTENDED_STATE_SUB_LEAF_REGS ExtStateSubLeaf; + __cpuidex(ExtStateSubLeaf.AsInt32, + CPUID_EXTENDED_STATE, + CPUID_EXTENDED_STATE_SUB_LEAF); + + /* Check if all components fit into what we calculated. Same VBox issue + here as described above. */ + if (ExtStateSubLeaf.Ebx.XSaveAreaSize > SharedUserData->XState.AllFeatureSize) + { + DPRINT1("Processor %lu, CPUID 0xD, leaf 1, EBX returns 0x%x, but we calculated 0x%lx\n", + ProcessorNumber, + ExtStateMain.Ebx, + SharedUserData->XState.Size); + + /* The problem is likely the VM, but to be safe, we adjust the size */ + SharedUserData->XState.AllFeatureSize = ExtStateSubLeaf.Ebx.XSaveAreaSize; + } + } +} diff --git a/ntoskrnl/ntos.cmake b/ntoskrnl/ntos.cmake index 65e8b743f79..f8853b12b95 100644 --- a/ntoskrnl/ntos.cmake +++ b/ntoskrnl/ntos.cmake @@ -361,7 +361,8 @@ elseif(ARCH STREQUAL "amd64") ${REACTOS_SOURCE_DIR}/ntoskrnl/ps/amd64/psctx.c ${REACTOS_SOURCE_DIR}/ntoskrnl/ke/amd64/stubs.c ${REACTOS_SOURCE_DIR}/ntoskrnl/ke/amd64/traphandler.c - ${REACTOS_SOURCE_DIR}/ntoskrnl/ke/amd64/usercall.c) + ${REACTOS_SOURCE_DIR}/ntoskrnl/ke/amd64/usercall.c + ${REACTOS_SOURCE_DIR}/ntoskrnl/ke/amd64/xstate.c) elseif(ARCH STREQUAL "arm") list(APPEND ASM_SOURCE ${REACTOS_SOURCE_DIR}/ntoskrnl/ex/arm/ioport.s