[NTOS:KE] Fully implement FPU Save/Restore mechanism

This commit fully implements the inner logic of KeSaveFloatingPointState and KeRestoreFloatingPointState routines. On ReactOS we're currently simply doing a FNSAVE operation whereas on Windows it is a lot more than that.

On Windows Server 2003 the logic more or less goes like this. In order to save the FPU state the NPX state of the current thread has to be checked first, that is, if NPX is loaded and currently charged for the current thread then the system will acquire the NPX registers actively present. From that point it performs either a FNSAVE or FXSAVE
if FX is actually supported. Otherwise the control word and MXCsr registers are obtained.

FXSAVE/FNSAVE operation is done solely if the FX save area is held up in a pool allocation. Pool allocation occurs if it's been found out that the NPX IRQL of the thread is not the same as the current thread which from where it determines if the interrupt level is APC then allocate some pool memory and hold the save area there, otherwise
the save area in question is grabbed from the current processor control region. If NPX is not loaded for the current thread then the FPU state is obtained from the NPX frame.

In our case we'll be doing something way simpler. Only do a FXSAVE/FNSAVE directly of the FPU state registers, in this way we are simplifying the code and the actual logic of Save/Restore mechanism.
This commit is contained in:
George Bișoc 2022-01-25 13:19:06 +01:00
parent c020966091
commit 064a35dc67
No known key found for this signature in database
GPG key ID: 688C4FBE25D7DEF6

View file

@ -12,6 +12,8 @@
#define NDEBUG
#include <debug.h>
#include <xmmintrin.h>
/* GLOBALS *******************************************************************/
/* The TSS to use for Double Fault Traps (INT 0x9) */
@ -75,6 +77,9 @@ typedef union _CPU_SIGNATURE
ULONG AsULONG;
} CPU_SIGNATURE;
/* FX area alignment size */
#define FXSAVE_ALIGN 15
/* SUPPORT ROUTINES FOR MSVC COMPATIBILITY ***********************************/
/* NSC/Cyrix CPU configuration register index */
@ -97,7 +102,6 @@ setCx86(UCHAR reg, UCHAR data)
WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x23, data);
}
/* FUNCTIONS *****************************************************************/
CODE_SEG("INIT")
@ -1306,59 +1310,230 @@ KiCoprocessorError(VOID)
__writecr0(__readcr0() | CR0_TS);
}
/*
* @implemented
/**
* @brief
* Saves the current floating point unit state
* context of the current calling thread.
*
* @param[out] Save
* The saved floating point context given to the
* caller at the end of function's operations.
* The structure whose data contents are opaque
* to the calling thread.
*
* @return
* Returns STATUS_SUCCESS if the function has
* successfully completed its operations.
* STATUS_INSUFFICIENT_RESOURCES is returned
* if the function couldn't allocate memory
* for FPU state information.
*
* @remarks
* The function performs a FPU state save
* in two ways. A normal FPU save (FNSAVE)
* is performed if the system doesn't have
* SSE/SSE2, otherwise the function performs
* a save of FPU, MMX and SSE states save (FXSAVE).
*/
#if defined(__clang__)
__attribute__((__target__("sse")))
#endif
NTSTATUS
NTAPI
KeSaveFloatingPointState(OUT PKFLOATING_SAVE Save)
KeSaveFloatingPointState(
_Out_ PKFLOATING_SAVE Save)
{
PFNSAVE_FORMAT FpState;
PFLOATING_SAVE_CONTEXT FsContext;
PFX_SAVE_AREA FxSaveAreaFrame;
PKPRCB CurrentPrcb;
/* Sanity checks */
ASSERT(Save);
ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
UNIMPLEMENTED_ONCE;
ASSERT(KeI386NpxPresent);
FpState = ExAllocatePool(NonPagedPool, sizeof (FNSAVE_FORMAT));
if (!FpState) return STATUS_INSUFFICIENT_RESOURCES;
*((PVOID *) Save) = FpState;
#ifdef __GNUC__
asm volatile("fnsave %0\n\t" : "=m" (*FpState));
#else
__asm
/* Initialize the floating point context */
FsContext = ExAllocatePoolWithTag(NonPagedPool,
sizeof(FLOATING_SAVE_CONTEXT),
TAG_FLOATING_POINT_CONTEXT);
if (!FsContext)
{
mov eax, [FpState]
fnsave [eax]
};
#endif
/* Bail out if we failed */
return STATUS_INSUFFICIENT_RESOURCES;
}
KeGetCurrentThread()->Header.NpxIrql = KeGetCurrentIrql();
/*
* Allocate some memory pool for the buffer. The size
* of this allocated buffer is the FX area plus the
* alignment requirement needed for FXSAVE as a 16-byte
* aligned pointer is compulsory in order to save the
* FPU state.
*/
FsContext->Buffer = ExAllocatePoolWithTag(NonPagedPool,
sizeof(FX_SAVE_AREA) + FXSAVE_ALIGN,
TAG_FLOATING_POINT_FX);
if (!FsContext->Buffer)
{
/* Bail out if we failed */
ExFreePoolWithTag(FsContext, TAG_FLOATING_POINT_CONTEXT);
return STATUS_INSUFFICIENT_RESOURCES;
}
/*
* Now cache the allocated buffer into the save area
* and align the said area to a 16-byte boundary. Why
* do we have to do this is because of ExAllocate function.
* We gave the necessary alignment requirement in the pool
* allocation size although the function will always return
* a 8-byte aligned pointer. Aligning the given pointer directly
* can cause issues when freeing it from memory afterwards. With
* that said, we have to cache the buffer to the area so that we
* do not touch or mess the allocated buffer any further.
*/
FsContext->PfxSaveArea = ALIGN_UP_POINTER_BY(FsContext->Buffer, 16);
/* Disable interrupts and get the current processor control region */
_disable();
CurrentPrcb = KeGetCurrentPrcb();
/* Store the current thread to context */
FsContext->CurrentThread = KeGetCurrentThread();
/*
* Save the previous NPX thread state registers (aka Numeric
* Processor eXtension) into the current context so that
* we are informing the scheduler the current FPU state
* belongs to this thread.
*/
if (FsContext->CurrentThread != CurrentPrcb->NpxThread)
{
if ((CurrentPrcb->NpxThread != NULL) &&
(CurrentPrcb->NpxThread->NpxState == NPX_STATE_LOADED))
{
/* Get the FX frame */
FxSaveAreaFrame = KiGetThreadNpxArea(CurrentPrcb->NpxThread);
/* Save the FPU state */
Ke386SaveFpuState(FxSaveAreaFrame);
/* NPX thread has lost its state */
CurrentPrcb->NpxThread->NpxState = NPX_STATE_NOT_LOADED;
FxSaveAreaFrame->NpxSavedCpu = 0;
}
/* The new NPX thread is the current thread */
CurrentPrcb->NpxThread = FsContext->CurrentThread;
}
/* Perform the save */
Ke386SaveFpuState(FsContext->PfxSaveArea);
/* Store the NPX IRQL */
FsContext->OldNpxIrql = FsContext->CurrentThread->Header.NpxIrql;
/* Set the current IRQL to NPX */
FsContext->CurrentThread->Header.NpxIrql = KeGetCurrentIrql();
/* Initialize the FPU */
Ke386FnInit();
/* Enable interrupts back */
_enable();
/* Give the saved FPU context to the caller */
*((PVOID *) Save) = FsContext;
return STATUS_SUCCESS;
}
/*
* @implemented
/**
* @brief
* Restores the original FPU state context that has
* been saved by a API call of KeSaveFloatingPointState.
* Callers are expected to restore the floating point
* state by calling this function when they've finished
* doing FPU operations.
*
* @param[in] Save
* The saved floating point context that is to be given
* to the function to restore the FPU state.
*
* @return
* Returns STATUS_SUCCESS indicating the function
* has fully completed its operations.
*/
#if defined(__clang__)
__attribute__((__target__("sse")))
#endif
NTSTATUS
NTAPI
KeRestoreFloatingPointState(IN PKFLOATING_SAVE Save)
KeRestoreFloatingPointState(
_In_ PKFLOATING_SAVE Save)
{
PFNSAVE_FORMAT FpState = *((PVOID *) Save);
ASSERT(KeGetCurrentThread()->Header.NpxIrql == KeGetCurrentIrql());
UNIMPLEMENTED_ONCE;
PFLOATING_SAVE_CONTEXT FsContext;
#ifdef __GNUC__
asm volatile("fnclex\n\t");
asm volatile("frstor %0\n\t" : "=m" (*FpState));
#else
__asm
/* Sanity checks */
ASSERT(Save);
ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
ASSERT(KeI386NpxPresent);
/* Cache the saved FS context */
FsContext = *((PVOID *) Save);
/*
* We have to restore the regular saved FPU
* state. For this we must first do some
* validation checks so that we are sure
* ourselves the state context is saved
* properly. Check if we are in the same
* calling thread.
*/
if (FsContext->CurrentThread != KeGetCurrentThread())
{
fnclex
mov eax, [FpState]
frstor [eax]
};
#endif
/*
* This isn't the thread that saved the
* FPU state context, crash the system!
*/
KeBugCheckEx(INVALID_FLOATING_POINT_STATE,
0x2,
(ULONG_PTR)FsContext->CurrentThread,
(ULONG_PTR)KeGetCurrentThread(),
0);
}
/* Are we under the same NPX interrupt level? */
if (FsContext->CurrentThread->Header.NpxIrql != KeGetCurrentIrql())
{
/* The interrupt level has changed, crash the system! */
KeBugCheckEx(INVALID_FLOATING_POINT_STATE,
0x1,
(ULONG_PTR)FsContext->CurrentThread->Header.NpxIrql,
(ULONG_PTR)KeGetCurrentIrql(),
0);
}
/* Disable interrupts */
_disable();
/*
* The saved FPU state context is valid,
* it's time to restore the state. First,
* clear FPU exceptions now.
*/
Ke386ClearFpExceptions();
/* Restore the state */
Ke386RestoreFpuState(FsContext->PfxSaveArea);
/* Give the saved NPX IRQL back to the NPX thread */
FsContext->CurrentThread->Header.NpxIrql = FsContext->OldNpxIrql;
/* Enable interrupts back */
_enable();
/* We're done, free the allocated area and context */
ExFreePoolWithTag(FsContext->Buffer, TAG_FLOATING_POINT_FX);
ExFreePoolWithTag(FsContext, TAG_FLOATING_POINT_CONTEXT);
ExFreePool(FpState);
return STATUS_SUCCESS;
}