[HAL][NTOS] Implement x64 SMP startup code

TODO: fix lgdt macro to work with GAAS and ML
Works with PIIX3 and cdrom on AHCI or SCSI controller
When cdrom is on IDE controller it doesn't seem to work
This commit is contained in:
Timo Kreuzer 2023-11-23 22:20:38 +02:00
parent 98baac1ea2
commit 323f18e446
6 changed files with 464 additions and 24 deletions

View file

@ -1,34 +1,205 @@
/*
* PROJECT: ReactOS Kernel
* LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later)
* PURPOSE: AMD64 Application Processor (AP) spinup setup
* COPYRIGHT: Copyright 2023 Justin Miller <justin.miller@reactos.org>
* PURPOSE: Assembly file for real mode AP code
* COPYRIGHT: Copyright 2023 Timo Kreuzer <timo.kreuzer@reactos.org>
*/
#include <asm.inc>
#include <ksamd64.inc>
#define ZERO_OFFSET(f) (offset f - offset HalpAPEntry16)
#ifndef _USE_ML
.macro data32, opcode:vararg
.byte 0x66
\opcode
.endm
#endif
PUBLIC HalpAPEntry16
PUBLIC HalpAPEntryData
PUBLIC HalpAPEntry32
PUBLIC HalpAPEntry16End
.code
.code64 // Workaround to be able to link this code
///////////////////////////////////////////////////////////////////////////////
// 16-bit code
///////////////////////////////////////////////////////////////////////////////
// The following code is executed in real mode.
// We compile it as 64-bit code, because otherwise we cannot link it.
// To fix differences between 64-bit and 16-bt code, we need to manually
// add the data32 (0x66) prefix to some instructions.
// We also need to use indirect addressing, because the 64-bit assembler
// does not support the 16-bit addressing mode.
HalpAPEntry16:
/* Disable interrupts */
cli
xor ax, ax
mov ds, ax
mov ss, ax
/* Use esi = 0 for register relative addressing */
data32 xor esi, esi
/* Set up ds segment */
data32 mov edi, cs
data32 mov ds, edi
/* Calculate the flat base address */
data32 shl edi, 4
/* Enable A20 address line */
data32 call EnableA20
/* Set up far pointer offset for switch to 32 bit */
data32 lea eax, [edi + ZERO_OFFSET(APEntry32)]
data32 mov [esi + ZERO_OFFSET(Jump32Offset)], eax
/* Set up far pointer offset for switch to 64 bit */
data32 lea eax, [edi + ZERO_OFFSET(APEntry64)]
data32 mov [esi + ZERO_OFFSET(Jump64Offset)], eax
/* Set up temporary GDT pointer */
data32 lea eax, [edi + ZERO_OFFSET(TempGdt)]
data32 mov [esi + ZERO_OFFSET(TempGdtr_Base)], eax
/* Load the initial GDT */
#ifdef _USE_ML
lgdt fword ptr [esi + ZERO_OFFSET(TempGdtr)]
#else
lgdt ds:[esi + ZERO_OFFSET(TempGdtr)]
#endif
/* Set PAE and PGE: 10100000b */
mov rax, cr4
data32 or eax, HEX(00A0)
mov cr4, rax
/* Enable protected mode */
mov rax, cr0
data32 or eax, HEX(00000001)
mov cr0, rax
/* Long jump, 32bit address */
.byte HEX(66)
.byte HEX(EA)
Jump32Offset:
.long 0
.short HEX(20) // Protected mode CS
Empty8042:
.word HEX(00eb), HEX(00eb) // jmp $+2, jmp $+2
in al, HEX(64)
cmp al, HEX(0ff) // legacy-free machine without keyboard
jz Empty8042_ret // controllers on Intel Macs read back 0xFF
test al, 2
jnz Empty8042
Empty8042_ret:
data32 ret
EnableA20:
data32 call Empty8042
mov al, HEX(0D1) // command write
out HEX(064), al
data32 call Empty8042
mov al, HEX(0DF) // A20 on
out HEX(060), al
data32 call Empty8042
mov al, HEX(0FF) // pulse output port
out HEX(064), al
data32 call Empty8042
data32 ret
.align 8
TempGdt:
.word HEX(0000), HEX(0000), HEX(0000), HEX(0000) /* 00: NULL descriptor */
.word HEX(0000), HEX(0000), HEX(0000), HEX(0000) /* 08: */
.word HEX(0000), HEX(0000), HEX(9B00), HEX(0020) /* 10: long mode CS */
.word HEX(FFFF), HEX(0000), HEX(9300), HEX(00CF) /* 18: long mode DS */
.word HEX(FFFF), HEX(0000), HEX(9B00), HEX(00CF) /* 20: protected mode CS */
.word HEX(FFFF), HEX(0000), HEX(9300), HEX(00CF) /* 28: protected mode DS */
/* GDT table pointer */
TempGdtr:
.word HEX(2F) /* Limit */
TempGdtr_Base:
.long 0 /* Base */
///////////////////////////////////////////////////////////////////////////////
// 32-bit code
///////////////////////////////////////////////////////////////////////////////
APEntry32:
/* Load protected mode DS */
mov eax, HEX(28)
mov ds, eax
/* Set up the long mode page table in cr3 */
lea eax, [rdi + PAGE_SIZE]
mov cr3, rax
/* Enable EFER.LME (long mode enable) and NXE (no-execute enable) */
mov ecx, MSR_EFER
rdmsr
or eax, HEX(00000900)
wrmsr
/* Enable paging for long mode */
mov rax, cr0
or eax, HEX(80000000)
mov cr0, rax
/* Long jump to 64 bit segment to activate long mode */
.byte HEX(EA)
Jump64Offset:
.long 0
.short HEX(10) // Long mode CS
///////////////////////////////////////////////////////////////////////////////
// 64-bit code
///////////////////////////////////////////////////////////////////////////////
APEntry64:
/* Load long mode segments */
mov ax, KGDT64_R0_DATA
mov ds, ax
mov es, ax
mov fs, ax
mov gs, ax
mov ss, ax
hlt
/* Load ProcessorState pointer */
mov rsi, [rdi + ZERO_OFFSET(ProcessorState)]
/* Load the final GDT and IDT from the ProcessorState */
lgdt fword ptr [rsi + PsGdtr]
lidt fword ptr [rsi + PsIdtr]
/* Set LTR */
mov ax, KGDT64_SYS_TSS
ltr ax
/* Set up the stack */
mov rsp, [rsi + PsContextFrame + CxRsp]
/* Set parameters and jump to entry point */
mov rcx, [rsi + PsContextFrame + CxRcx]
mov rdx, [rsi + PsContextFrame + CxRdx]
mov r8, [rsi + PsContextFrame + CxR8]
mov r9, [rsi + PsContextFrame + CxR9]
jmp qword ptr [rsi + PsContextFrame + CxRip]
// Layout in sync with AP_ENTRY_DATA in spinup.c
.align 8
HalpAPEntryData:
ProcessorState:
.quad 0
HalpAPEntry16End:
.long HEX(0)
HalpAPEntry32:
.long HEX(0)
HalpAPEntryData:
.long HEX(0)
nop
END

View file

@ -1,8 +1,9 @@
/*
* PROJECT: ReactOS Kernel
* LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later)
* PURPOSE: AMD64 Application Processor (AP) spinup setup
* COPYRIGHT: Copyright 2023 Justin Miller <justin.miller@reactos.org>
* PURPOSE: i386 Application Processor (AP) spinup setup
* COPYRIGHT: Copyright 2021 Victor Perevertkin <victor.perevertkin@reactos.org>
* Copyright 2021-2023 Justin Miller <justin.miller@reactos.org>
*/
/* INCLUDES ******************************************************************/
@ -13,12 +14,112 @@
#define NDEBUG
#include <debug.h>
/* GLOBALS *******************************************************************/
extern PPROCESSOR_IDENTITY HalpProcessorIdentity;
extern PHYSICAL_ADDRESS HalpLowStubPhysicalAddress;
extern PVOID HalpLowStub;
// The data necessary for a boot (stored inside HalpLowStub)
extern PVOID HalpAPEntry16;
extern PVOID HalpAPEntryData;
extern PVOID HalpAPEntry16End;
extern HALP_APIC_INFO_TABLE HalpApicInfoTable;
ULONG HalpStartedProcessorCount = 1;
#ifndef Add2Ptr
#define Add2Ptr(P,I) ((PVOID)((PUCHAR)(P) + (I)))
#endif
#ifndef PtrOffset
#define PtrOffset(B,O) ((ULONG)((ULONG_PTR)(O) - (ULONG_PTR)(B)))
#endif
// Windows uses PROCESSOR_START_BLOCK (offsets defined in ksamd64.inc)
typedef struct _AP_ENTRY_DATA
{
PKPROCESSOR_STATE ProcessorState;
} AP_ENTRY_DATA, *PAP_ENTRY_DATA;
/* FUNCTIONS *****************************************************************/
static
ULONG
HalpSetupTemporaryMappings(
_In_ PKPROCESSOR_STATE ProcessorState)
{
PMMPXE RootPageTable = Add2Ptr(HalpLowStub, 1 * PAGE_SIZE);
PMMPPE PageTableLvl3 = Add2Ptr(HalpLowStub, 2 * PAGE_SIZE);
PMMPDE PageTableLvl2 = Add2Ptr(HalpLowStub, 3 * PAGE_SIZE);
PHYSICAL_ADDRESS PhysicalAddress;
ULONG SelfMapPxi;
/* Copy current mappings */
RtlCopyMemory(RootPageTable, MiAddressToPxe(NULL), PAGE_SIZE);
/* Set up self-mapping PXE */
SelfMapPxi = MiAddressToPxi(MiAddressToPxe(NULL));
PhysicalAddress = MmGetPhysicalAddress(RootPageTable);
RootPageTable[SelfMapPxi].u.Flush.PageFrameNumber = PhysicalAddress.QuadPart >> PAGE_SHIFT;
/* Set up low PXE */
PhysicalAddress = MmGetPhysicalAddress(PageTableLvl3);
RootPageTable[0].u.Flush.PageFrameNumber = PhysicalAddress.QuadPart >> PAGE_SHIFT;
RootPageTable[0].u.Flush.Valid = 1;
RootPageTable[0].u.Flush.Write = 1;
/* Set up low PPE */
PhysicalAddress = MmGetPhysicalAddress(PageTableLvl2);
PageTableLvl3[0].u.Flush.PageFrameNumber = PhysicalAddress.QuadPart >> PAGE_SHIFT;
PageTableLvl3[0].u.Flush.Valid = 1;
PageTableLvl3[0].u.Flush.Write = 1;
/* Set up a large-page low PDE */
PageTableLvl2[0].u.Flush.PageFrameNumber = 0;
PageTableLvl2[0].u.Flush.Valid = 1;
PageTableLvl2[0].u.Flush.Write = 1;
PageTableLvl2[0].u.Flush.LargePage = 1;
PhysicalAddress = MmGetPhysicalAddress(RootPageTable);
ASSERT(PhysicalAddress.QuadPart < 0x100000000);
return (ULONG)PhysicalAddress.QuadPart;
}
BOOLEAN
NTAPI
HalStartNextProcessor(
_In_ PLOADER_PARAMETER_BLOCK LoaderBlock,
_In_ PKPROCESSOR_STATE ProcessorState)
{
//TODO:
return FALSE;
PAP_ENTRY_DATA APEntryData;
ULONG InitialCr3;
if (HalpStartedProcessorCount == HalpApicInfoTable.ProcessorCount)
return FALSE;
/* Clean up low stub from any previous data */
RtlZeroMemory(HalpLowStub, HALP_LOW_STUB_SIZE_IN_PAGES * PAGE_SIZE);
/* Initalize the temporary page table */
InitialCr3 = HalpSetupTemporaryMappings(ProcessorState);
/* Put the bootstrap code into low memory */
SIZE_T APEntrySize = (ULONG_PTR)&HalpAPEntry16End - (ULONG_PTR)&HalpAPEntry16;
ASSERT(APEntrySize <= PAGE_SIZE);
RtlCopyMemory(HalpLowStub, &HalpAPEntry16, APEntrySize);
/* Get a pointer to APEntryData */
SIZE_T Offset = PtrOffset(&HalpAPEntry16, &HalpAPEntryData);
APEntryData = Add2Ptr(HalpLowStub, Offset);
/* Fill in the APEntryData structure */
APEntryData->ProcessorState = ProcessorState;
/* Start the processor */
ApicStartApplicationProcessor(HalpStartedProcessorCount, HalpLowStubPhysicalAddress);
HalpStartedProcessorCount++;
return TRUE;
}

View file

@ -494,6 +494,19 @@ KiProcessorFreezeHandler(
_In_ PKTRAP_FRAME TrapFrame,
_In_ PKEXCEPTION_FRAME ExceptionFrame);
VOID
KiInitializeProcessorBootStructures(
_In_ ULONG ProcessorNumber,
_Out_ PKIPCR Pcr,
_In_ PKGDTENTRY64 GdtBase,
_In_ PKIDTENTRY64 IdtBase,
_In_ PKTSS64 TssBase,
_In_ PKTHREAD IdleThread,
_In_ PVOID KernelStack,
_In_ PVOID DpcStack,
_In_ PVOID DoubleFaultStack,
_In_ PVOID NmiStack);
#ifdef __cplusplus
} // extern "C"
#endif

View file

@ -535,8 +535,8 @@ KiSystemStartup(IN PLOADER_PARAMETER_BLOCK LoaderBlock)
if (KdPollBreakIn()) DbgBreakPointWithStatus(DBG_STATUS_CONTROL_C);
}
DPRINT1("Pcr = %p, Gdt = %p, Idt = %p, Tss = %p\n",
Pcr, Pcr->GdtBase, Pcr->IdtBase, Pcr->TssBase);
DPRINT1("Cpu %u: Pcr = %p, Gdt = %p, Idt = %p, Tss = %p\n",
Cpu, Pcr, Pcr->GdtBase, Pcr->IdtBase, Pcr->TssBase);
/* Acquire lock */
while (InterlockedBitTestAndSet64((PLONG64)&KiFreezeExecutionLock, 0))
@ -557,6 +557,7 @@ KiSystemStartup(IN PLOADER_PARAMETER_BLOCK LoaderBlock)
/* Raise to HIGH_LEVEL */
KfRaiseIrql(HIGH_LEVEL);
/* Machine specific kernel initialization */
if (Cpu == 0) KiInitializeKernelMachineDependent(&Pcr->Prcb, LoaderBlock);

View file

@ -12,6 +12,22 @@
#define NDEBUG
#include <debug.h>
typedef struct _APINFO
{
DECLSPEC_ALIGN(PAGE_SIZE) KIDTENTRY64 Idt[256];
DECLSPEC_ALIGN(PAGE_SIZE) KGDTENTRY64 Gdt[128];
//DECLSPEC_ALIGN(16) UINT8 NMIStackData[DOUBLE_FAULT_STACK_SIZE];
KIPCR Pcr;
ETHREAD Thread;
KTSS64 Tss;
//KTSS64 TssDoubleFault;
//KTSS64 TssNMI;
} APINFO, *PAPINFO;
VOID
NTAPI
KiSaveProcessorControlState(OUT PKPROCESSOR_STATE ProcessorState);
/* FUNCTIONS *****************************************************************/
CODE_SEG("INIT")
@ -19,5 +35,143 @@ VOID
NTAPI
KeStartAllProcessors(VOID)
{
UNIMPLEMENTED;
PVOID KernelStack, DpcStack, DoubleFaultStack, NmiStack;
ULONG ProcessorCount = 0;
PAPINFO APInfo;
PKPROCESSOR_STATE ProcessorState;
//__debugbreak();
//if (KeNumberProcessors <= 2) return;
while (TRUE)
{
ProcessorCount++;
KernelStack = NULL;
DpcStack = NULL;
DoubleFaultStack = NULL;
NmiStack = NULL;
/* Allocate structures for a new CPU. */
APInfo = ExAllocatePoolZero(NonPagedPool, sizeof(APINFO), ' eK');
if (APInfo == NULL)
{
DPRINT1("Failed to allocate APInfo\n");
break;
}
ASSERT(ALIGN_DOWN_POINTER_BY(APInfo, PAGE_SIZE) == APInfo);
/* Allocate a kernel stack */
KernelStack = MmCreateKernelStack(FALSE, 0);
if (KernelStack == NULL)
{
DPRINT1("Failed to allocate kernel stack\n");
break;
}
/* Allocate a DPC stack */
DpcStack = MmCreateKernelStack(FALSE, 0);
if (DpcStack == NULL)
{
DPRINT1("Failed to allocate DPC stack\n");
break;
}
/* Allocate a double-fault stack */
DoubleFaultStack = MmCreateKernelStack(FALSE, 0);
if (DoubleFaultStack == NULL)
{
DPRINT1("Failed to allocate double-fault stack\n");
break;
}
/* Allocate an NMI stack */
NmiStack = MmCreateKernelStack(FALSE, 0);
if (NmiStack == NULL)
{
DPRINT1("Failed to allocate NMI stack\n");
break;
}
/* Zero the APInfo */
RtlZeroMemory(APInfo, sizeof(APINFO));
/* Copy the GDT and IDT */
PKIPCR CurrentPcr = (PKIPCR)KeGetPcr();
RtlCopyMemory(APInfo->Gdt, CurrentPcr->GdtBase, sizeof(APInfo->Gdt));
RtlCopyMemory(APInfo->Idt, CurrentPcr->IdtBase, sizeof(APInfo->Idt));
/* Initialize PCR and TSS */
KiInitializeProcessorBootStructures(ProcessorCount,
&APInfo->Pcr,
APInfo->Gdt,
APInfo->Idt,
&APInfo->Tss,
&APInfo->Thread.Tcb,
KernelStack,
DpcStack,
DoubleFaultStack,
NmiStack);
/* Set up the processor state */
ProcessorState = &APInfo->Pcr.Prcb.ProcessorState;
KiSaveProcessorControlState(ProcessorState);
/* Set up GDT and IDT in the ProcessorState */
ProcessorState->SpecialRegisters.Gdtr.Base = APInfo->Gdt;
ProcessorState->SpecialRegisters.Gdtr.Limit = sizeof(APInfo->Gdt) - 1;
ProcessorState->SpecialRegisters.Idtr.Base = APInfo->Idt;
ProcessorState->SpecialRegisters.Idtr.Limit = sizeof(APInfo->Idt) - 1;
/* Set up parameters for entry point */
ProcessorState->ContextFrame.Rsp = (ULONG64)KernelStack - 5 * 8;
ProcessorState->ContextFrame.Rip = (ULONG64)KiSystemStartup;
ProcessorState->ContextFrame.Rcx = (ULONG64)KeLoaderBlock;
/* Set up the loader-block */
KeLoaderBlock->KernelStack = (ULONG64)KernelStack;
KeLoaderBlock->Thread = (ULONG64)&APInfo->Thread;
KeLoaderBlock->Process = (ULONG64)PsIdleProcess;
KeLoaderBlock->Prcb = (ULONG64)&APInfo->Pcr.Prcb;
/* Start the next processor */
DPRINT1("Attempting to start processor #%u\n", ProcessorCount);
if (!HalStartNextProcessor(KeLoaderBlock, ProcessorState))
{
DPRINT1("Failed to start processor #%u\n", ProcessorCount);
break;
}
/* Wait for it to start */
while (KeLoaderBlock->Prcb)
{
//TODO: Add a time out so we don't wait forever
KeMemoryBarrier();
YieldProcessor();
}
}
if (KernelStack != NULL)
{
MmDeleteKernelStack(KernelStack, FALSE);
}
if (DpcStack != NULL)
{
MmDeleteKernelStack(DpcStack, FALSE);
}
if (DoubleFaultStack != NULL)
{
MmDeleteKernelStack(DoubleFaultStack, FALSE);
}
if (NmiStack != NULL)
{
MmDeleteKernelStack(NmiStack, FALSE);
}
if (APInfo != NULL)
{
ExFreePoolWithTag(APInfo, ' eK');
}
}

View file

@ -674,8 +674,8 @@ OFFSET(PsKernelDr2, KPROCESSOR_STATE, SpecialRegisters.KernelDr2),
OFFSET(PsKernelDr3, KPROCESSOR_STATE, SpecialRegisters.KernelDr3),
OFFSET(PsKernelDr6, KPROCESSOR_STATE, SpecialRegisters.KernelDr6),
OFFSET(PsKernelDr7, KPROCESSOR_STATE, SpecialRegisters.KernelDr7),
OFFSET(PsGdtr, KPROCESSOR_STATE, SpecialRegisters.Gdtr),
OFFSET(PsIdtr, KPROCESSOR_STATE, SpecialRegisters.Idtr),
OFFSET(PsGdtr, KPROCESSOR_STATE, SpecialRegisters.Gdtr.Limit),
OFFSET(PsIdtr, KPROCESSOR_STATE, SpecialRegisters.Idtr.Limit),
OFFSET(PsTr, KPROCESSOR_STATE, SpecialRegisters.Tr),
OFFSET(PsLdtr, KPROCESSOR_STATE, SpecialRegisters.Ldtr),
OFFSET(PsMxCsr, KPROCESSOR_STATE, SpecialRegisters.MxCsr),
@ -711,8 +711,8 @@ OFFSET(SrKernelDr2, KSPECIAL_REGISTERS, KernelDr2),
OFFSET(SrKernelDr3, KSPECIAL_REGISTERS, KernelDr3),
OFFSET(SrKernelDr6, KSPECIAL_REGISTERS, KernelDr6),
OFFSET(SrKernelDr7, KSPECIAL_REGISTERS, KernelDr7),
OFFSET(SrGdtr, KSPECIAL_REGISTERS, Gdtr),
OFFSET(SrIdtr, KSPECIAL_REGISTERS, Idtr),
OFFSET(SrGdtr, KSPECIAL_REGISTERS, Gdtr.Limit),
OFFSET(SrIdtr, KSPECIAL_REGISTERS, Idtr.Limit),
OFFSET(SrTr, KSPECIAL_REGISTERS, Tr),
OFFSET(SrMxCsr, KSPECIAL_REGISTERS, MxCsr),
OFFSET(SrMsrGsBase, KSPECIAL_REGISTERS, MsrGsBase),