From 25badcfbbdc4f6f58546ff3b1920bbe6d9d3498e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Herm=C3=A8s=20B=C3=A9lusca-Ma=C3=AFto?= Date: Tue, 1 Oct 2019 03:50:29 +0200 Subject: [PATCH] [FREELDR] Add support for loading Linux in x64 FreeLdr. Part 1/2: ASM code. Add also ASM implementation for intrinsics that may not be always present on MSVC (e.g. MSVC 2010). --- boot/freeldr/freeldr/CMakeLists.txt | 3 +- boot/freeldr/freeldr/arch/amd64/entry.S | 104 +++++++++----- boot/freeldr/freeldr/arch/amd64/int386.S | 7 +- boot/freeldr/freeldr/arch/amd64/linux.S | 159 +++++++++++++++++++++ boot/freeldr/freeldr/arch/realmode/amd64.S | 41 +++--- 5 files changed, 254 insertions(+), 60 deletions(-) create mode 100644 boot/freeldr/freeldr/arch/amd64/linux.S diff --git a/boot/freeldr/freeldr/CMakeLists.txt b/boot/freeldr/freeldr/CMakeLists.txt index 53eaca33b74..d4086a7893b 100644 --- a/boot/freeldr/freeldr/CMakeLists.txt +++ b/boot/freeldr/freeldr/CMakeLists.txt @@ -143,7 +143,8 @@ elseif(ARCH STREQUAL "amd64") list(APPEND FREELDR_COMMON_ASM_SOURCE arch/amd64/entry.S arch/amd64/int386.S - arch/amd64/pnpbios.S) + arch/amd64/pnpbios.S + arch/amd64/linux.S) list(APPEND FREELDR_NTLDR_SOURCE ntldr/arch/amd64/winldr.c) diff --git a/boot/freeldr/freeldr/arch/amd64/entry.S b/boot/freeldr/freeldr/arch/amd64/entry.S index 31c6411ee00..24ecc07291d 100644 --- a/boot/freeldr/freeldr/arch/amd64/entry.S +++ b/boot/freeldr/freeldr/arch/amd64/entry.S @@ -1,6 +1,7 @@ #include #include +#include EXTERN BootMain:PROC // EXTERN cmdline:DWORD @@ -75,50 +76,71 @@ Reboot: /* Stop the floppy drive motor */ call DiskStopFloppyMotor - /* Set the function ID */ + /* Set the function ID and switch to real mode (we don't return) */ mov bx, FNID_Reboot - - /* Switch to real mode (we don't return) */ jmp SwitchToReal /* - * VOID __cdecl ChainLoadBiosBootSectorCode( - * IN UCHAR BootDrive OPTIONAL, - * IN ULONG BootPartition OPTIONAL); + * VOID __cdecl Relocator16Boot( + * IN REGS* In, + * IN USHORT StackSegment, + * IN USHORT StackPointer, + * IN USHORT CodeSegment, + * IN USHORT CodePointer); * - * RETURNS: Nothing + * RETURNS: Nothing. + * + * NOTE: The implementation of this function is similar to that of Int386(), + * with the proviso that no attempt is done to save the original values of + * the registers since we will not need them anyway, as we do not return back + * to the caller but instead place the machine in a permanent new CPU state. */ -PUBLIC ChainLoadBiosBootSectorCode -ChainLoadBiosBootSectorCode: - /* Set the boot drive */ - mov dl, [esp + 4] - test dl, dl - jnz set_part - mov dl, byte ptr [FrldrBootDrive] +PUBLIC Relocator16Boot +Relocator16Boot: - /* Set the boot partition */ -set_part: - mov eax, [esp + 8] - test eax, eax - jnz continue - mov eax, dword ptr [FrldrBootPartition] -continue: - /* Store the 1-byte truncated partition number in DH */ - mov dh, al + /* Save home registers */ + mov qword ptr [rsp + 8], rcx + mov word ptr [rsp + 16], dx + mov word ptr [rsp + 24], r8w + mov word ptr [rsp + 32], r9w + +#if 0 + /* Save non-volatile registers */ + push rbx + push rsi + push rdi +#endif + + /* Copy input registers */ + mov rsi, qword ptr [rsp + 8] + mov rdi, BSS_RegisterSet + mov rcx, REGS_SIZE / 4 + rep movsd + + /* Set the stack segment/offset */ + // Since BSS_CallbackReturn contains a ULONG, store in its high word + // the stack segment and in its low word the stack offset. + mov ax, word ptr [rsp + 16] + shl eax, 16 + mov ax, word ptr [rsp + 24] + mov dword ptr [BSS_CallbackReturn], eax /* - * Don't stop the floppy drive motor when we are just booting a bootsector, - * a drive, or a partition. If we were to stop the floppy motor, the BIOS - * wouldn't be informed and if the next read is to a floppy then the BIOS - * will still think the motor is on and this will result in a read error. + * Set the code segment/offset (Copy entry point) + * NOTE: We permanently *ERASE* the contents of ds:[BSS_RealModeEntry] + * but it is not a problem since we are going to place the machine in + * a permanent new CPU state. */ - // call DiskStopFloppyMotor + // Since BSS_RealModeEntry contains a ULONG, store in its high word + // the code segment and in its low word the code offset. + mov ax, word ptr [rsp + 32] + shl eax, 16 + mov ax, word ptr [rsp + 40] + mov dword ptr [BSS_RealModeEntry], eax - /* Set the function ID */ - mov bx, FNID_ChainLoadBiosBootSectorCode - - /* Switch to real mode (we don't return) */ + /* Set the function ID and switch to real mode (we don't return) */ + mov bx, FNID_Relocator16Boot jmp SwitchToReal @@ -166,7 +188,7 @@ jumpvector: .word CMODE_CS SwitchToRealCompSegment: - /* Note: In fact the CPU is in 32 bit mode here. But it will interprete + /* Note: In fact the CPU is in 32 bit mode here. But it will interpret the generated instructions accordingly. rax will become eax */ /* Step 2 - deactivate long mode, by disabling paging */ @@ -191,18 +213,30 @@ CallRealMode_return: ///////////////////////////////////////// -//void __lgdt(void *Source); +// void __fastfail(unsigned int Code); +PUBLIC __fastfail +__fastfail: + // mov ecx, [rsp + 4] + int HEX(29) + +// void __lgdt(void *Source); PUBLIC __lgdt __lgdt: lgdt fword ptr [rcx] ret -//void __ltr(unsigned short Source); +// void __ltr(unsigned short Source); PUBLIC __ltr __ltr: ltr cx ret +// void _sgdt(void *Destination); +PUBLIC __sgdt +__sgdt: + sgdt fword ptr [rcx] + ret + /* 64-bit stack pointer */ stack64: diff --git a/boot/freeldr/freeldr/arch/amd64/int386.S b/boot/freeldr/freeldr/arch/amd64/int386.S index dc486ee7b18..f3bd65a4da7 100644 --- a/boot/freeldr/freeldr/arch/amd64/int386.S +++ b/boot/freeldr/freeldr/arch/amd64/int386.S @@ -20,6 +20,7 @@ #include #include #include + .code64 EXTERN CallRealMode:PROC @@ -40,7 +41,7 @@ Int386: push rsi push rdi - /* Alloc stack space for home registers */ + /* Alloc stack space for home registers (+ alignment) */ sub rsp, 40 //.ENDPROLOG @@ -68,11 +69,11 @@ int386_2: /* Copy output registers */ mov rsi, BSS_RegisterSet - mov rdi, [r11 + 24] + mov rdi, qword ptr [r11 + 24] mov rcx, REGS_SIZE / 4 rep movsd - /* cleanup and return */ + /* Cleanup and return */ add rsp, 40 pop rdi pop rsi diff --git a/boot/freeldr/freeldr/arch/amd64/linux.S b/boot/freeldr/freeldr/arch/amd64/linux.S new file mode 100644 index 00000000000..383111ae66c --- /dev/null +++ b/boot/freeldr/freeldr/arch/amd64/linux.S @@ -0,0 +1,159 @@ +/* + * FreeLoader + * Copyright (C) 1998-2002 Brian Palmer + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include +#include +#include + +EXTERN DiskStopFloppyMotor:PROC +EXTERN Relocator16Boot:PROC +EXTERN FrldrBootDrive:BYTE +EXTERN FrldrBootPartition:DWORD + +.code64 + +Regs: + .space REGS_SIZE + +/* + * VOID __cdecl BootLinuxKernel( + * IN ULONG KernelSize, + * IN PVOID KernelCurrentLoadAddress, + * IN PVOID KernelTargetLoadAddress, + * IN UCHAR DriveNumber, + * IN ULONG PartitionNumber); + */ +PUBLIC BootLinuxKernel +BootLinuxKernel: + + /* Save home registers */ + mov r11, rsp + mov dword ptr [r11 + 8], ecx + mov qword ptr [r11 + 16], rdx + mov qword ptr [r11 + 24], r8 + mov byte ptr [r11 + 32], r9b + + /* Save non-volatile registers */ + push rsi + push rdi + + /* Allocate stack space for home registers (+ alignment) */ + sub rsp, (8*4 + 8) + //.ENDPROLOG + + /* Stop the floppy drive motor */ + call DiskStopFloppyMotor + + /* Set all segment registers to 0x9000 */ + mov ax, HEX(9000) + mov word ptr [Regs + REGS_DS], ax + mov word ptr [Regs + REGS_ES], ax + mov word ptr [Regs + REGS_FS], ax + mov word ptr [Regs + REGS_GS], ax + + /* Set the boot drive */ + xor edx, edx + mov dl, byte ptr [r11 + 32] + test dl, dl + jnz set_part + mov dl, byte ptr /*ds:*/[FrldrBootDrive] + + /* Set the boot partition */ +set_part: + mov eax, dword ptr [r11 + 40] + test eax, eax + jnz continue + mov eax, dword ptr /*ds:*/[FrldrBootPartition] +continue: + /* Store the 1-byte truncated partition number in DH */ + mov dh, al + + mov dword ptr [Regs + REGS_EDX], edx + + /* + * Relocate the kernel image to its final destination (can be as low as 0x10000). + * The reason we can overwrite low memory is because this code executes + * between 0000:8000 and 0000:FFFF. That leaves space for 32k of code + * before we start interfering with Linux kernel address space. + */ + + /* Get KernelSize in ECX */ + xor rcx, rcx // Put the 64..32 higher bits to zero + mov ecx, dword ptr [r11 + 8] + test rcx, rcx // If size is zero, do not perform relocations + jz after_reloc + + /* Load the source and target addresses */ + mov rsi, qword ptr [r11 + 16] // HEX(100000) // LINUX_KERNEL_LOAD_ADDRESS + mov rdi, qword ptr [r11 + 24] // HEX(10000) + +// +// FIXME: Support relocating *upwards*, overlapping regions, aligned addresses, +// etc... !! See memmove code. +// + /* Check how we should perform relocation */ + cmp rdi, rsi + je after_reloc // target == source: do not perform relocations + ja reloc_up // target > source: relocate up +// jb reloc_down // target < source: relocate down (default) + +reloc_down: + /* Move the kernel down - Start with low addresses and increment them */ + cld +#if 0 + rep movsb +#else + mov rdx, rcx // Copy the total number of bytes in EDX + and rdx, HEX(0FFFFFFFC) // Number of bytes we copy using DWORDs + xor rdx, rcx // Number of remaining bytes to copy after the DWORDs + shr rcx, 2 // Count number of DWORDs + rep movsd // Move DWORDs + mov rcx, rdx // Count number of remaining bytes + rep movsb // Move bytes +#endif + jmp after_reloc + +reloc_up: + /* Move the kernel up - Start with high addresses and decrement them */ + std + add rsi, rcx + add rdi, rcx + dec rsi + dec rdi + rep movsb + // jmp after_reloc + +after_reloc: + + mov word ptr [rsp-8 + 40], HEX(0000) // CodePointer + mov r9w, HEX(9020) // CodeSegment + mov r8w, HEX(9000) // StackPointer + mov dx, HEX(9000) // StackSegment + mov rcx, offset Regs + call Relocator16Boot + + /* Cleanup and return */ + add rsp, (8*4 + 8) + pop rdi + pop rsi + + /* We must never get there */ + int 3 + +END diff --git a/boot/freeldr/freeldr/arch/realmode/amd64.S b/boot/freeldr/freeldr/arch/realmode/amd64.S index d6bf02c639b..0a97afd402d 100644 --- a/boot/freeldr/freeldr/arch/realmode/amd64.S +++ b/boot/freeldr/freeldr/arch/realmode/amd64.S @@ -8,7 +8,7 @@ .code16 -/* fat helper code */ +/* FAT helper code */ #include "fathelp.inc" .org 512 @@ -107,20 +107,20 @@ Msg_SwitchToLongMode: gdt: .word HEX(0000), HEX(0000), HEX(0000), HEX(0000) /* 00: NULL descriptor */ .word HEX(0000), HEX(0000), HEX(0000), HEX(0000) /* 08: */ - .word HEX(0000), HEX(0000), HEX(9800), HEX(0020) /* 10: long mode cs */ - .word HEX(ffff), HEX(0000), HEX(f300), HEX(00cf) /* 18: long mode ds */ + .word HEX(0000), HEX(0000), HEX(9800), HEX(0020) /* 10: long mode CS */ + .word HEX(FFFF), HEX(0000), HEX(F300), HEX(00CF) /* 18: long mode DS */ .word HEX(FFFF), HEX(0000), HEX(9E00), HEX(0000) /* 20: 16-bit real mode CS */ .word HEX(FFFF), HEX(0000), HEX(9200), HEX(0000) /* 28: 16-bit real mode DS */ - .word HEX(FFFF), HEX(0000), HEX(9B00), HEX(00CF) /* 30: compat mode cs */ + .word HEX(FFFF), HEX(0000), HEX(9B00), HEX(00CF) /* 30: compat mode CS */ /* GDT table pointer */ gdtptr: - .word HEX(37) /* Limit */ - .long offset gdt /* Base Address */ + .word HEX(37) /* Limit */ + .long offset gdt /* Base Address */ CheckFor64BitSupport: - /* Check if CPU supports CPUID */ + /* Check whether the CPU supports CPUID */ pushad pushfd pop eax @@ -130,7 +130,7 @@ CheckFor64BitSupport: popfd pushfd pop eax - cmp eax,ebx + cmp eax, ebx jnz .CheckForPAE mov si, offset .Msg_NoCpuidSupport @@ -144,10 +144,10 @@ CheckFor64BitSupport: .CheckForPAE: /* CPUID support detected - getting the PAE/PGE */ - mov eax,1 // Fn0000_0001 - PAE in EDX[6] + mov eax, 1 // Fn0000_0001 - PAE in EDX[6] cpuid - and edx, HEX(00a0) - cmp edx, HEX(00a0) + and edx, HEX(00A0) + cmp edx, HEX(00A0) je .CheckForLongMode mov si, offset .Msg_NoPAE @@ -164,7 +164,7 @@ CheckFor64BitSupport: mov eax, HEX(80000001) cpuid and edx, HEX(20000000) - test edx,edx + test edx, edx jnz .Success mov si, offset .Msg_NoLongMode @@ -195,7 +195,7 @@ BuildPageTables: /* One entry in the PML4 pointing to PDP */ mov eax, PDP_ADDRESS - or eax, HEX(0f) + or eax, HEX(0F) stosd /* clear rest */ @@ -205,7 +205,7 @@ BuildPageTables: /* One entry in the PDP pointing to PD */ mov eax, PD_ADDRESS - or eax, HEX(0f) + or eax, HEX(0F) stosd /* clear rest */ @@ -218,8 +218,8 @@ BuildPageTables: mov eax, HEX(008f) .Bpt2: - mov es: [di], eax - mov dword ptr es: [di + 4], 0 + mov es:[di], eax + mov dword ptr es:[di + 4], 0 add eax, 512 * 4096 // add 512 4k pages add di, 8 @@ -242,7 +242,7 @@ BuildPageTables: RealModeEntryPoint: /* Disable Protected Mode */ mov eax, cr0 - and eax, HEX(0fffffffe) // ~0x00000001 + and eax, HEX(0FFFFFFFE) // ~0x00000001 mov cr0, eax /* Clear prefetch queue & correct CS */ @@ -295,7 +295,7 @@ ExitToLongMode: mov word ptr ds:[stack16], sp /* Set PAE and PGE: 10100000b */ - mov eax, HEX(00a0) + mov eax, HEX(00A0) mov cr4, eax /* Point cr3 at the PML4 */ @@ -322,7 +322,7 @@ InLongMode: //DB 66h, 66h, 0C7h, 04h, 25h, 00h, 80h, 0Bh, 00h, 31h, 0Eh //mov word ptr [HEX(b8000)], HEX(0e00) + '1' - .byte HEX(0ff), HEX(25) // opcode of 64bit indirect jump + .byte HEX(0FF), HEX(25) // opcode of 64bit indirect jump .long 1 // relative address of LongModeEntryPoint nop LongModeEntryPoint: @@ -334,11 +334,10 @@ LongModeEntryPoint: CallbackTable: .word Int386 .word Reboot - .word ChainLoadBiosBootSectorCode + .word Relocator16Boot .word PxeCallApi .word PnpBiosGetDeviceNodeCount .word PnpBiosGetDeviceNode - .word 0 // BootLinuxKernel /* 16-bit stack pointer */ stack16: