From 323f18e446aef87e488f6c1cbaa20eb614ac7155 Mon Sep 17 00:00:00 2001 From: Timo Kreuzer Date: Thu, 23 Nov 2023 22:20:38 +0200 Subject: [PATCH] [HAL][NTOS] Implement x64 SMP startup code TODO: fix lgdt macro to work with GAAS and ML Works with PIIX3 and cdrom on AHCI or SCSI controller When cdrom is on IDE controller it doesn't seem to work --- hal/halx86/smp/amd64/apentry.S | 197 +++++++++++++++++++++++++-- hal/halx86/smp/amd64/spinup.c | 109 ++++++++++++++- ntoskrnl/include/internal/amd64/ke.h | 13 ++ ntoskrnl/ke/amd64/kiinit.c | 5 +- ntoskrnl/ke/amd64/mproc.c | 156 ++++++++++++++++++++- sdk/include/asm/ksamd64.template.h | 8 +- 6 files changed, 464 insertions(+), 24 deletions(-) diff --git a/hal/halx86/smp/amd64/apentry.S b/hal/halx86/smp/amd64/apentry.S index 2e61d4f7b65..e04805cc876 100644 --- a/hal/halx86/smp/amd64/apentry.S +++ b/hal/halx86/smp/amd64/apentry.S @@ -1,34 +1,205 @@ /* * PROJECT: ReactOS Kernel * LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later) - * PURPOSE: AMD64 Application Processor (AP) spinup setup - * COPYRIGHT: Copyright 2023 Justin Miller + * PURPOSE: Assembly file for real mode AP code + * COPYRIGHT: Copyright 2023 Timo Kreuzer */ #include +#include + +#define ZERO_OFFSET(f) (offset f - offset HalpAPEntry16) + +#ifndef _USE_ML +.macro data32, opcode:vararg + .byte 0x66 + \opcode +.endm +#endif PUBLIC HalpAPEntry16 PUBLIC HalpAPEntryData -PUBLIC HalpAPEntry32 PUBLIC HalpAPEntry16End -.code +.code64 // Workaround to be able to link this code + +/////////////////////////////////////////////////////////////////////////////// +// 16-bit code +/////////////////////////////////////////////////////////////////////////////// + +// The following code is executed in real mode. +// We compile it as 64-bit code, because otherwise we cannot link it. +// To fix differences between 64-bit and 16-bt code, we need to manually +// add the data32 (0x66) prefix to some instructions. +// We also need to use indirect addressing, because the 64-bit assembler +// does not support the 16-bit addressing mode. + HalpAPEntry16: + + /* Disable interrupts */ cli - xor ax, ax - mov ds, ax - mov ss, ax + /* Use esi = 0 for register relative addressing */ + data32 xor esi, esi + + /* Set up ds segment */ + data32 mov edi, cs + data32 mov ds, edi + + /* Calculate the flat base address */ + data32 shl edi, 4 + + /* Enable A20 address line */ + data32 call EnableA20 + + /* Set up far pointer offset for switch to 32 bit */ + data32 lea eax, [edi + ZERO_OFFSET(APEntry32)] + data32 mov [esi + ZERO_OFFSET(Jump32Offset)], eax + + /* Set up far pointer offset for switch to 64 bit */ + data32 lea eax, [edi + ZERO_OFFSET(APEntry64)] + data32 mov [esi + ZERO_OFFSET(Jump64Offset)], eax + + /* Set up temporary GDT pointer */ + data32 lea eax, [edi + ZERO_OFFSET(TempGdt)] + data32 mov [esi + ZERO_OFFSET(TempGdtr_Base)], eax + + /* Load the initial GDT */ +#ifdef _USE_ML + lgdt fword ptr [esi + ZERO_OFFSET(TempGdtr)] +#else + lgdt ds:[esi + ZERO_OFFSET(TempGdtr)] +#endif + + /* Set PAE and PGE: 10100000b */ + mov rax, cr4 + data32 or eax, HEX(00A0) + mov cr4, rax + + /* Enable protected mode */ + mov rax, cr0 + data32 or eax, HEX(00000001) + mov cr0, rax + + /* Long jump, 32bit address */ + .byte HEX(66) + .byte HEX(EA) +Jump32Offset: + .long 0 + .short HEX(20) // Protected mode CS + +Empty8042: + .word HEX(00eb), HEX(00eb) // jmp $+2, jmp $+2 + in al, HEX(64) + cmp al, HEX(0ff) // legacy-free machine without keyboard + jz Empty8042_ret // controllers on Intel Macs read back 0xFF + test al, 2 + jnz Empty8042 +Empty8042_ret: + data32 ret + +EnableA20: + data32 call Empty8042 + mov al, HEX(0D1) // command write + out HEX(064), al + data32 call Empty8042 + mov al, HEX(0DF) // A20 on + out HEX(060), al + data32 call Empty8042 + mov al, HEX(0FF) // pulse output port + out HEX(064), al + data32 call Empty8042 + data32 ret + +.align 8 +TempGdt: + .word HEX(0000), HEX(0000), HEX(0000), HEX(0000) /* 00: NULL descriptor */ + .word HEX(0000), HEX(0000), HEX(0000), HEX(0000) /* 08: */ + .word HEX(0000), HEX(0000), HEX(9B00), HEX(0020) /* 10: long mode CS */ + .word HEX(FFFF), HEX(0000), HEX(9300), HEX(00CF) /* 18: long mode DS */ + .word HEX(FFFF), HEX(0000), HEX(9B00), HEX(00CF) /* 20: protected mode CS */ + .word HEX(FFFF), HEX(0000), HEX(9300), HEX(00CF) /* 28: protected mode DS */ + +/* GDT table pointer */ +TempGdtr: + .word HEX(2F) /* Limit */ +TempGdtr_Base: + .long 0 /* Base */ + +/////////////////////////////////////////////////////////////////////////////// +// 32-bit code +/////////////////////////////////////////////////////////////////////////////// + +APEntry32: + + /* Load protected mode DS */ + mov eax, HEX(28) + mov ds, eax + + /* Set up the long mode page table in cr3 */ + lea eax, [rdi + PAGE_SIZE] + mov cr3, rax + + /* Enable EFER.LME (long mode enable) and NXE (no-execute enable) */ + mov ecx, MSR_EFER + rdmsr + or eax, HEX(00000900) + wrmsr + + /* Enable paging for long mode */ + mov rax, cr0 + or eax, HEX(80000000) + mov cr0, rax + + /* Long jump to 64 bit segment to activate long mode */ + .byte HEX(EA) +Jump64Offset: + .long 0 + .short HEX(10) // Long mode CS + + +/////////////////////////////////////////////////////////////////////////////// +// 64-bit code +/////////////////////////////////////////////////////////////////////////////// + +APEntry64: + + /* Load long mode segments */ + mov ax, KGDT64_R0_DATA + mov ds, ax + mov es, ax mov fs, ax mov gs, ax + mov ss, ax - hlt + /* Load ProcessorState pointer */ + mov rsi, [rdi + ZERO_OFFSET(ProcessorState)] + + /* Load the final GDT and IDT from the ProcessorState */ + lgdt fword ptr [rsi + PsGdtr] + lidt fword ptr [rsi + PsIdtr] + + /* Set LTR */ + mov ax, KGDT64_SYS_TSS + ltr ax + + /* Set up the stack */ + mov rsp, [rsi + PsContextFrame + CxRsp] + + /* Set parameters and jump to entry point */ + mov rcx, [rsi + PsContextFrame + CxRcx] + mov rdx, [rsi + PsContextFrame + CxRdx] + mov r8, [rsi + PsContextFrame + CxR8] + mov r9, [rsi + PsContextFrame + CxR9] + jmp qword ptr [rsi + PsContextFrame + CxRip] + +// Layout in sync with AP_ENTRY_DATA in spinup.c +.align 8 +HalpAPEntryData: +ProcessorState: + .quad 0 HalpAPEntry16End: -.long HEX(0) -HalpAPEntry32: -.long HEX(0) -HalpAPEntryData: -.long HEX(0) + nop END diff --git a/hal/halx86/smp/amd64/spinup.c b/hal/halx86/smp/amd64/spinup.c index fe0173f59b6..feefbcae44b 100644 --- a/hal/halx86/smp/amd64/spinup.c +++ b/hal/halx86/smp/amd64/spinup.c @@ -1,8 +1,9 @@ /* * PROJECT: ReactOS Kernel * LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later) - * PURPOSE: AMD64 Application Processor (AP) spinup setup - * COPYRIGHT: Copyright 2023 Justin Miller + * PURPOSE: i386 Application Processor (AP) spinup setup + * COPYRIGHT: Copyright 2021 Victor Perevertkin + * Copyright 2021-2023 Justin Miller */ /* INCLUDES ******************************************************************/ @@ -13,12 +14,112 @@ #define NDEBUG #include +/* GLOBALS *******************************************************************/ + +extern PPROCESSOR_IDENTITY HalpProcessorIdentity; +extern PHYSICAL_ADDRESS HalpLowStubPhysicalAddress; +extern PVOID HalpLowStub; + +// The data necessary for a boot (stored inside HalpLowStub) +extern PVOID HalpAPEntry16; +extern PVOID HalpAPEntryData; +extern PVOID HalpAPEntry16End; +extern HALP_APIC_INFO_TABLE HalpApicInfoTable; + +ULONG HalpStartedProcessorCount = 1; + +#ifndef Add2Ptr +#define Add2Ptr(P,I) ((PVOID)((PUCHAR)(P) + (I))) +#endif +#ifndef PtrOffset +#define PtrOffset(B,O) ((ULONG)((ULONG_PTR)(O) - (ULONG_PTR)(B))) +#endif + +// Windows uses PROCESSOR_START_BLOCK (offsets defined in ksamd64.inc) +typedef struct _AP_ENTRY_DATA +{ + PKPROCESSOR_STATE ProcessorState; +} AP_ENTRY_DATA, *PAP_ENTRY_DATA; + +/* FUNCTIONS *****************************************************************/ + +static +ULONG +HalpSetupTemporaryMappings( + _In_ PKPROCESSOR_STATE ProcessorState) +{ + PMMPXE RootPageTable = Add2Ptr(HalpLowStub, 1 * PAGE_SIZE); + PMMPPE PageTableLvl3 = Add2Ptr(HalpLowStub, 2 * PAGE_SIZE); + PMMPDE PageTableLvl2 = Add2Ptr(HalpLowStub, 3 * PAGE_SIZE); + PHYSICAL_ADDRESS PhysicalAddress; + ULONG SelfMapPxi; + + /* Copy current mappings */ + RtlCopyMemory(RootPageTable, MiAddressToPxe(NULL), PAGE_SIZE); + + /* Set up self-mapping PXE */ + SelfMapPxi = MiAddressToPxi(MiAddressToPxe(NULL)); + PhysicalAddress = MmGetPhysicalAddress(RootPageTable); + RootPageTable[SelfMapPxi].u.Flush.PageFrameNumber = PhysicalAddress.QuadPart >> PAGE_SHIFT; + + /* Set up low PXE */ + PhysicalAddress = MmGetPhysicalAddress(PageTableLvl3); + RootPageTable[0].u.Flush.PageFrameNumber = PhysicalAddress.QuadPart >> PAGE_SHIFT; + RootPageTable[0].u.Flush.Valid = 1; + RootPageTable[0].u.Flush.Write = 1; + + /* Set up low PPE */ + PhysicalAddress = MmGetPhysicalAddress(PageTableLvl2); + PageTableLvl3[0].u.Flush.PageFrameNumber = PhysicalAddress.QuadPart >> PAGE_SHIFT; + PageTableLvl3[0].u.Flush.Valid = 1; + PageTableLvl3[0].u.Flush.Write = 1; + + /* Set up a large-page low PDE */ + PageTableLvl2[0].u.Flush.PageFrameNumber = 0; + PageTableLvl2[0].u.Flush.Valid = 1; + PageTableLvl2[0].u.Flush.Write = 1; + PageTableLvl2[0].u.Flush.LargePage = 1; + + PhysicalAddress = MmGetPhysicalAddress(RootPageTable); + ASSERT(PhysicalAddress.QuadPart < 0x100000000); + + return (ULONG)PhysicalAddress.QuadPart; +} + BOOLEAN NTAPI HalStartNextProcessor( _In_ PLOADER_PARAMETER_BLOCK LoaderBlock, _In_ PKPROCESSOR_STATE ProcessorState) { - //TODO: - return FALSE; + PAP_ENTRY_DATA APEntryData; + ULONG InitialCr3; + + if (HalpStartedProcessorCount == HalpApicInfoTable.ProcessorCount) + return FALSE; + + /* Clean up low stub from any previous data */ + RtlZeroMemory(HalpLowStub, HALP_LOW_STUB_SIZE_IN_PAGES * PAGE_SIZE); + + /* Initalize the temporary page table */ + InitialCr3 = HalpSetupTemporaryMappings(ProcessorState); + + /* Put the bootstrap code into low memory */ + SIZE_T APEntrySize = (ULONG_PTR)&HalpAPEntry16End - (ULONG_PTR)&HalpAPEntry16; + ASSERT(APEntrySize <= PAGE_SIZE); + RtlCopyMemory(HalpLowStub, &HalpAPEntry16, APEntrySize); + + /* Get a pointer to APEntryData */ + SIZE_T Offset = PtrOffset(&HalpAPEntry16, &HalpAPEntryData); + APEntryData = Add2Ptr(HalpLowStub, Offset); + + /* Fill in the APEntryData structure */ + APEntryData->ProcessorState = ProcessorState; + + /* Start the processor */ + ApicStartApplicationProcessor(HalpStartedProcessorCount, HalpLowStubPhysicalAddress); + + HalpStartedProcessorCount++; + + return TRUE; } diff --git a/ntoskrnl/include/internal/amd64/ke.h b/ntoskrnl/include/internal/amd64/ke.h index 2bd4d5c8f3d..fdecb2397ed 100644 --- a/ntoskrnl/include/internal/amd64/ke.h +++ b/ntoskrnl/include/internal/amd64/ke.h @@ -494,6 +494,19 @@ KiProcessorFreezeHandler( _In_ PKTRAP_FRAME TrapFrame, _In_ PKEXCEPTION_FRAME ExceptionFrame); +VOID +KiInitializeProcessorBootStructures( + _In_ ULONG ProcessorNumber, + _Out_ PKIPCR Pcr, + _In_ PKGDTENTRY64 GdtBase, + _In_ PKIDTENTRY64 IdtBase, + _In_ PKTSS64 TssBase, + _In_ PKTHREAD IdleThread, + _In_ PVOID KernelStack, + _In_ PVOID DpcStack, + _In_ PVOID DoubleFaultStack, + _In_ PVOID NmiStack); + #ifdef __cplusplus } // extern "C" #endif diff --git a/ntoskrnl/ke/amd64/kiinit.c b/ntoskrnl/ke/amd64/kiinit.c index 09a33226288..1529ab93922 100644 --- a/ntoskrnl/ke/amd64/kiinit.c +++ b/ntoskrnl/ke/amd64/kiinit.c @@ -535,8 +535,8 @@ KiSystemStartup(IN PLOADER_PARAMETER_BLOCK LoaderBlock) if (KdPollBreakIn()) DbgBreakPointWithStatus(DBG_STATUS_CONTROL_C); } - DPRINT1("Pcr = %p, Gdt = %p, Idt = %p, Tss = %p\n", - Pcr, Pcr->GdtBase, Pcr->IdtBase, Pcr->TssBase); + DPRINT1("Cpu %u: Pcr = %p, Gdt = %p, Idt = %p, Tss = %p\n", + Cpu, Pcr, Pcr->GdtBase, Pcr->IdtBase, Pcr->TssBase); /* Acquire lock */ while (InterlockedBitTestAndSet64((PLONG64)&KiFreezeExecutionLock, 0)) @@ -557,6 +557,7 @@ KiSystemStartup(IN PLOADER_PARAMETER_BLOCK LoaderBlock) /* Raise to HIGH_LEVEL */ KfRaiseIrql(HIGH_LEVEL); + /* Machine specific kernel initialization */ if (Cpu == 0) KiInitializeKernelMachineDependent(&Pcr->Prcb, LoaderBlock); diff --git a/ntoskrnl/ke/amd64/mproc.c b/ntoskrnl/ke/amd64/mproc.c index d990a859269..9d8313e6d44 100644 --- a/ntoskrnl/ke/amd64/mproc.c +++ b/ntoskrnl/ke/amd64/mproc.c @@ -12,6 +12,22 @@ #define NDEBUG #include +typedef struct _APINFO +{ + DECLSPEC_ALIGN(PAGE_SIZE) KIDTENTRY64 Idt[256]; + DECLSPEC_ALIGN(PAGE_SIZE) KGDTENTRY64 Gdt[128]; + //DECLSPEC_ALIGN(16) UINT8 NMIStackData[DOUBLE_FAULT_STACK_SIZE]; + KIPCR Pcr; + ETHREAD Thread; + KTSS64 Tss; + //KTSS64 TssDoubleFault; + //KTSS64 TssNMI; +} APINFO, *PAPINFO; + +VOID +NTAPI +KiSaveProcessorControlState(OUT PKPROCESSOR_STATE ProcessorState); + /* FUNCTIONS *****************************************************************/ CODE_SEG("INIT") @@ -19,5 +35,143 @@ VOID NTAPI KeStartAllProcessors(VOID) { - UNIMPLEMENTED; + PVOID KernelStack, DpcStack, DoubleFaultStack, NmiStack; + ULONG ProcessorCount = 0; + PAPINFO APInfo; + PKPROCESSOR_STATE ProcessorState; + + //__debugbreak(); + //if (KeNumberProcessors <= 2) return; + + while (TRUE) + { + ProcessorCount++; + KernelStack = NULL; + DpcStack = NULL; + DoubleFaultStack = NULL; + NmiStack = NULL; + + /* Allocate structures for a new CPU. */ + APInfo = ExAllocatePoolZero(NonPagedPool, sizeof(APINFO), ' eK'); + if (APInfo == NULL) + { + DPRINT1("Failed to allocate APInfo\n"); + break; + } + ASSERT(ALIGN_DOWN_POINTER_BY(APInfo, PAGE_SIZE) == APInfo); + + /* Allocate a kernel stack */ + KernelStack = MmCreateKernelStack(FALSE, 0); + if (KernelStack == NULL) + { + DPRINT1("Failed to allocate kernel stack\n"); + break; + } + + /* Allocate a DPC stack */ + DpcStack = MmCreateKernelStack(FALSE, 0); + if (DpcStack == NULL) + { + DPRINT1("Failed to allocate DPC stack\n"); + break; + } + + /* Allocate a double-fault stack */ + DoubleFaultStack = MmCreateKernelStack(FALSE, 0); + if (DoubleFaultStack == NULL) + { + DPRINT1("Failed to allocate double-fault stack\n"); + break; + } + + /* Allocate an NMI stack */ + NmiStack = MmCreateKernelStack(FALSE, 0); + if (NmiStack == NULL) + { + DPRINT1("Failed to allocate NMI stack\n"); + break; + } + + /* Zero the APInfo */ + RtlZeroMemory(APInfo, sizeof(APINFO)); + + /* Copy the GDT and IDT */ + PKIPCR CurrentPcr = (PKIPCR)KeGetPcr(); + RtlCopyMemory(APInfo->Gdt, CurrentPcr->GdtBase, sizeof(APInfo->Gdt)); + RtlCopyMemory(APInfo->Idt, CurrentPcr->IdtBase, sizeof(APInfo->Idt)); + + /* Initialize PCR and TSS */ + KiInitializeProcessorBootStructures(ProcessorCount, + &APInfo->Pcr, + APInfo->Gdt, + APInfo->Idt, + &APInfo->Tss, + &APInfo->Thread.Tcb, + KernelStack, + DpcStack, + DoubleFaultStack, + NmiStack); + + /* Set up the processor state */ + ProcessorState = &APInfo->Pcr.Prcb.ProcessorState; + KiSaveProcessorControlState(ProcessorState); + + /* Set up GDT and IDT in the ProcessorState */ + ProcessorState->SpecialRegisters.Gdtr.Base = APInfo->Gdt; + ProcessorState->SpecialRegisters.Gdtr.Limit = sizeof(APInfo->Gdt) - 1; + ProcessorState->SpecialRegisters.Idtr.Base = APInfo->Idt; + ProcessorState->SpecialRegisters.Idtr.Limit = sizeof(APInfo->Idt) - 1; + + /* Set up parameters for entry point */ + ProcessorState->ContextFrame.Rsp = (ULONG64)KernelStack - 5 * 8; + ProcessorState->ContextFrame.Rip = (ULONG64)KiSystemStartup; + ProcessorState->ContextFrame.Rcx = (ULONG64)KeLoaderBlock; + + /* Set up the loader-block */ + KeLoaderBlock->KernelStack = (ULONG64)KernelStack; + KeLoaderBlock->Thread = (ULONG64)&APInfo->Thread; + KeLoaderBlock->Process = (ULONG64)PsIdleProcess; + KeLoaderBlock->Prcb = (ULONG64)&APInfo->Pcr.Prcb; + + /* Start the next processor */ + DPRINT1("Attempting to start processor #%u\n", ProcessorCount); + if (!HalStartNextProcessor(KeLoaderBlock, ProcessorState)) + { + DPRINT1("Failed to start processor #%u\n", ProcessorCount); + break; + } + + /* Wait for it to start */ + while (KeLoaderBlock->Prcb) + { + //TODO: Add a time out so we don't wait forever + KeMemoryBarrier(); + YieldProcessor(); + } + } + + if (KernelStack != NULL) + { + MmDeleteKernelStack(KernelStack, FALSE); + } + + if (DpcStack != NULL) + { + MmDeleteKernelStack(DpcStack, FALSE); + } + + if (DoubleFaultStack != NULL) + { + MmDeleteKernelStack(DoubleFaultStack, FALSE); + } + + if (NmiStack != NULL) + { + MmDeleteKernelStack(NmiStack, FALSE); + } + + if (APInfo != NULL) + { + ExFreePoolWithTag(APInfo, ' eK'); + } } diff --git a/sdk/include/asm/ksamd64.template.h b/sdk/include/asm/ksamd64.template.h index 0f3a16d6209..6166217d5dd 100644 --- a/sdk/include/asm/ksamd64.template.h +++ b/sdk/include/asm/ksamd64.template.h @@ -674,8 +674,8 @@ OFFSET(PsKernelDr2, KPROCESSOR_STATE, SpecialRegisters.KernelDr2), OFFSET(PsKernelDr3, KPROCESSOR_STATE, SpecialRegisters.KernelDr3), OFFSET(PsKernelDr6, KPROCESSOR_STATE, SpecialRegisters.KernelDr6), OFFSET(PsKernelDr7, KPROCESSOR_STATE, SpecialRegisters.KernelDr7), -OFFSET(PsGdtr, KPROCESSOR_STATE, SpecialRegisters.Gdtr), -OFFSET(PsIdtr, KPROCESSOR_STATE, SpecialRegisters.Idtr), +OFFSET(PsGdtr, KPROCESSOR_STATE, SpecialRegisters.Gdtr.Limit), +OFFSET(PsIdtr, KPROCESSOR_STATE, SpecialRegisters.Idtr.Limit), OFFSET(PsTr, KPROCESSOR_STATE, SpecialRegisters.Tr), OFFSET(PsLdtr, KPROCESSOR_STATE, SpecialRegisters.Ldtr), OFFSET(PsMxCsr, KPROCESSOR_STATE, SpecialRegisters.MxCsr), @@ -711,8 +711,8 @@ OFFSET(SrKernelDr2, KSPECIAL_REGISTERS, KernelDr2), OFFSET(SrKernelDr3, KSPECIAL_REGISTERS, KernelDr3), OFFSET(SrKernelDr6, KSPECIAL_REGISTERS, KernelDr6), OFFSET(SrKernelDr7, KSPECIAL_REGISTERS, KernelDr7), -OFFSET(SrGdtr, KSPECIAL_REGISTERS, Gdtr), -OFFSET(SrIdtr, KSPECIAL_REGISTERS, Idtr), +OFFSET(SrGdtr, KSPECIAL_REGISTERS, Gdtr.Limit), +OFFSET(SrIdtr, KSPECIAL_REGISTERS, Idtr.Limit), OFFSET(SrTr, KSPECIAL_REGISTERS, Tr), OFFSET(SrMxCsr, KSPECIAL_REGISTERS, MxCsr), OFFSET(SrMsrGsBase, KSPECIAL_REGISTERS, MsrGsBase),