Perf improvements:

[NTOS]: Optimize trap entry/exit by manually copying registers to the trap frame the correct way from the first time. Avoids conversion between PUSHA and KTRAP_FRAMEs and makes trap frames compatible the whole time (other than being slightly faster).
    [NTOS]: Provide compiler with hints on likely code paths during trap entry and exit, which makes the code more linear and improves performance. The following assumptions (known to be true) are made: (1) Interrupts happen more often than system calls (per unit of time), so prioritize paths we take during interrupts. (2) The CPU spends most of its time in Ring 3, so prioritize traps from user-mode. (3) V8086 mode, debugging, 16-bit stacks, are uncommon, so de-prioritize them.
    [NTOS]: Use KTRAP_FRAME offset names recommended by Timo instead of substraction which was confusing some people (still seems clearer to me).

svn path=/trunk/; revision=45064
This commit is contained in:
Sir Richard 2010-01-13 22:01:20 +00:00
parent 1cb5c9f019
commit 03b23a2450
3 changed files with 59 additions and 103 deletions

View file

@ -237,9 +237,17 @@ _&Name:
push 0
.endif
/* Save all register state before we touch it */
pushad
/* Make space for trap frame and save registers before we enter C code */
sub esp, KTRAP_FRAME_ERROR_CODE
mov [esp+KTRAP_FRAME_EAX], eax
mov [esp+KTRAP_FRAME_EBX], ebx
mov [esp+KTRAP_FRAME_ECX], ecx
mov [esp+KTRAP_FRAME_EDX], edx
mov [esp+KTRAP_FRAME_ESI], esi
mov [esp+KTRAP_FRAME_EDI], edi
mov [esp+KTRAP_FRAME_EBP], ebp
mov ecx, esp
/*
* The GPF and Invalid Opcode handlers are performance critical when talking
* about V8086 traps, because they control the main flow of execution during
@ -270,14 +278,9 @@ _&Name:
* up DS/ES and lets us go on our merry way...
*/
.if \FastV86
/* ESP+12 is EFLAGS on interrupt frame, add 8*4 for the PUSHAD frame */
mov edx, [esp+12+8*4]
mov edx, [esp+KTRAP_FRAME_EFLAGS]
.endif
/* Now make space for the trap frame and store the pointer as first arg */
sub esp, KTRAP_FRAME_LENGTH - KTRAP_FRAME_PREVIOUS_MODE
mov ecx, esp
/* Normally we just have one parameter, but fast V86 handlers need two */
.if \FastV86
jmp @&Name&Handler@8

View file

@ -137,44 +137,6 @@ KiVdmTrap(IN PKTRAP_FRAME TrapFrame)
((KiUserTrap(TrapFrame)) && (PsGetCurrentProcess()->VdmObjects)));
}
VOID
FORCEINLINE
KiTrapFrameFromPushaStack(IN PKTRAP_FRAME TrapFrame)
{
/*
* This sequence is Bavarian Alchemist Black Magic
*
* *** DO NOT MODIFY ***
*/
TrapFrame->Edx = TrapFrame->Esi;
TrapFrame->Esi = TrapFrame->PreviousPreviousMode;
TrapFrame->Ecx = TrapFrame->Ebx;
TrapFrame->Ebx = TrapFrame->Edi;
TrapFrame->Edi = TrapFrame->Eax;
TrapFrame->Eax = TrapFrame->Ebp;
TrapFrame->Ebp = (ULONG)TrapFrame->ExceptionList;
TrapFrame->TempEsp = TrapFrame->SegFs;
}
VOID
FORCEINLINE
KiPushaStackFromTrapFrame(IN PKTRAP_FRAME TrapFrame)
{
/*
* This sequence is Bavarian Alchemist Black Magic
*
* *** DO NOT MODIFY ***
*/
TrapFrame->SegFs = TrapFrame->TempEsp;
TrapFrame->ExceptionList = (PVOID)TrapFrame->Ebp;
TrapFrame->Ebp = TrapFrame->Eax;
TrapFrame->Eax = TrapFrame->Edi;
TrapFrame->Edi = TrapFrame->Ebx;
TrapFrame->Ebx = TrapFrame->Ecx;
TrapFrame->PreviousPreviousMode = TrapFrame->Esi;
TrapFrame->Esi = TrapFrame->Edx;
}
VOID
FORCEINLINE
KiCheckForApcDelivery(IN PKTRAP_FRAME TrapFrame)
@ -247,19 +209,29 @@ FORCEINLINE
VOID
KiTrapReturn(IN PKTRAP_FRAME TrapFrame)
{
/* Restore registers */
KiPushaStackFromTrapFrame(TrapFrame);
/* Regular interrupt exit */
__asm__ __volatile__
(
"movl %0, %%esp\n"
"addl %1, %%esp\n"
"popa\n"
"addl $4, %%esp\n"
"movl %c[a](%%esp), %%eax\n"
"movl %c[b](%%esp), %%ebx\n"
"movl %c[c](%%esp), %%ecx\n"
"movl %c[d](%%esp), %%edx\n"
"movl %c[s](%%esp), %%esi\n"
"movl %c[i](%%esp), %%edi\n"
"movl %c[p](%%esp), %%ebp\n"
"addl $%c[e],%%esp\n"
"iret\n"
:
: "r"(TrapFrame), "i"(KTRAP_FRAME_LENGTH - KTRAP_FRAME_PREVIOUS_MODE)
: "r"(TrapFrame),
[a] "i"(KTRAP_FRAME_EAX),
[b] "i"(KTRAP_FRAME_EBX),
[c] "i"(KTRAP_FRAME_ECX),
[d] "i"(KTRAP_FRAME_EDX),
[s] "i"(KTRAP_FRAME_ESI),
[i] "i"(KTRAP_FRAME_EDI),
[p] "i"(KTRAP_FRAME_EBP),
[e] "i"(KTRAP_FRAME_EIP)
: "%esp"
);
}

View file

@ -56,14 +56,11 @@ KiExitTrap(IN PKTRAP_FRAME TrapFrame,
KTRAP_EXIT_SKIP_BITS SkipBits = { .Bits = Skip };
KiExitTrapDebugChecks(TrapFrame, SkipBits);
/* If you skip volatile reload, you must skip segment reload */
ASSERT((SkipBits.SkipVolatiles == FALSE) || (SkipBits.SkipSegments == TRUE));
/* Restore the SEH handler chain */
KeGetPcr()->Tib.ExceptionList = TrapFrame->ExceptionList;
/* Check if the previous mode must be restored */
if (!SkipBits.SkipPreviousMode)
if (__builtin_expect(!SkipBits.SkipPreviousMode, 0)) /* More INTS than SYSCALLs */
{
/* Not handled yet */
UNIMPLEMENTED;
@ -71,7 +68,7 @@ KiExitTrap(IN PKTRAP_FRAME TrapFrame,
}
/* Check if there are active debug registers */
if (TrapFrame->Dr7 & ~DR7_RESERVED_MASK)
if (__builtin_expect(TrapFrame->Dr7 & ~DR7_RESERVED_MASK, 0))
{
/* Not handled yet */
UNIMPLEMENTED;
@ -79,10 +76,10 @@ KiExitTrap(IN PKTRAP_FRAME TrapFrame,
}
/* Check if this was a V8086 trap */
if (TrapFrame->EFlags & EFLAGS_V86_MASK) KiTrapReturn(TrapFrame);
if (__builtin_expect(TrapFrame->EFlags & EFLAGS_V86_MASK, 0)) KiTrapReturn(TrapFrame);
/* Check if the trap frame was edited */
if (!(TrapFrame->SegCs & FRAME_EDITED))
if (__builtin_expect(!(TrapFrame->SegCs & FRAME_EDITED), 0))
{
/* Not handled yet */
UNIMPLEMENTED;
@ -90,7 +87,7 @@ KiExitTrap(IN PKTRAP_FRAME TrapFrame,
}
/* Check if this is a user trap */
if (KiUserTrap(TrapFrame))
if (__builtin_expect(KiUserTrap(TrapFrame), 1)) /* Ring 3 is where we spend time */
{
/* Check if segments should be restored */
if (!SkipBits.SkipSegments)
@ -105,20 +102,9 @@ KiExitTrap(IN PKTRAP_FRAME TrapFrame,
/* Always restore FS since it goes from KPCR to TEB */
Ke386SetFs(TrapFrame->SegFs);
}
/* Check if the caller wants to skip volatiles */
if (SkipBits.SkipVolatiles)
{
/*
* When we do the system call handler through this path, we need
* to have some sort to restore the kernel EAX instead of pushing
* back the user EAX. We'll figure it out...
*/
DPRINT1("Warning: caller doesn't want volatiles restored\n");
}
/* Check for ABIOS code segment */
if (TrapFrame->SegCs == 0x80)
if (__builtin_expect(TrapFrame->SegCs == 0x80, 0))
{
/* Not handled yet */
UNIMPLEMENTED;
@ -126,16 +112,20 @@ KiExitTrap(IN PKTRAP_FRAME TrapFrame,
}
/* Check for system call -- a system call skips volatiles! */
if (SkipBits.SkipVolatiles)
if (__builtin_expect(SkipBits.SkipVolatiles, 0)) /* More INTs than SYSCALLs */
{
/* Not handled yet */
UNIMPLEMENTED;
while (TRUE);
/*
* When we do the system call handler through this path, we need
* to have some sort to restore the kernel EAX instead of pushing
* back the user EAX. We'll figure it out...
*/
DPRINT1("Warning: caller doesn't want volatiles restored\n");
}
else
{
/* Return from interrupt */
KiTrapReturn(TrapFrame);
KiTrapReturn(TrapFrame);
}
}
@ -154,7 +144,7 @@ KiExitV86Trap(IN PKTRAP_FRAME TrapFrame)
Thread->Alerted[KernelMode] = FALSE;
/* Are there pending user APCs? */
if (!Thread->ApcState.UserApcPending) break;
if (__builtin_expect(!Thread->ApcState.UserApcPending, 1)) break;
/* Raise to APC level and enable interrupts */
OldIrql = KfRaiseIrql(APC_LEVEL);
@ -168,11 +158,11 @@ KiExitV86Trap(IN PKTRAP_FRAME TrapFrame)
_disable();
/* Return if this isn't V86 mode anymore */
if (TrapFrame->EFlags & EFLAGS_V86_MASK) return;
if (__builtin_expect(TrapFrame->EFlags & EFLAGS_V86_MASK, 0)) return;
}
/* If we got here, we're still in a valid V8086 context, so quit it */
if (TrapFrame->Dr7 & ~DR7_RESERVED_MASK)
if (__builtin_expect(TrapFrame->Dr7 & ~DR7_RESERVED_MASK, 0))
{
/* Not handled yet */
UNIMPLEMENTED;
@ -207,10 +197,7 @@ KiEnterV86Trap(IN PKTRAP_FRAME TrapFrame)
Ke386SetFs(KGDT_R0_PCR);
Ke386SetDs(KGDT_R3_DATA | RPL_MASK);
Ke386SetEs(KGDT_R3_DATA | RPL_MASK);
/* Save registers */
KiTrapFrameFromPushaStack(TrapFrame);
/* Save exception list and bogus previous mode */
TrapFrame->PreviousPreviousMode = -1;
TrapFrame->ExceptionList = KeGetPcr()->Tib.ExceptionList;
@ -220,7 +207,7 @@ KiEnterV86Trap(IN PKTRAP_FRAME TrapFrame)
/* Save DR7 and check for debugging */
TrapFrame->Dr7 = __readdr(7);
if (TrapFrame->Dr7 & ~DR7_RESERVED_MASK)
if (__builtin_expect(TrapFrame->Dr7 & ~DR7_RESERVED_MASK, 0))
{
UNIMPLEMENTED;
while (TRUE);
@ -231,21 +218,18 @@ VOID
FASTCALL
KiEnterInterruptTrap(IN PKTRAP_FRAME TrapFrame)
{
/* Save registers */
KiTrapFrameFromPushaStack(TrapFrame);
/* Set bogus previous mode */
TrapFrame->PreviousPreviousMode = -1;
/* Check for V86 mode */
if (TrapFrame->EFlags & EFLAGS_V86_MASK)
if (__builtin_expect(TrapFrame->EFlags & EFLAGS_V86_MASK, 0))
{
UNIMPLEMENTED;
while (TRUE);
}
/* Check if this wasn't kernel code */
if (TrapFrame->SegCs != KGDT_R0_CODE)
if (__builtin_expect(TrapFrame->SegCs != KGDT_R0_CODE, 1)) /* Ring 3 is more common */
{
/* Save segments and then switch to correct ones */
TrapFrame->SegFs = Ke386GetFs();
@ -269,7 +253,7 @@ KiEnterInterruptTrap(IN PKTRAP_FRAME TrapFrame)
/* Flush DR7 and check for debugging */
TrapFrame->Dr7 = 0;
if (KeGetCurrentThread()->DispatcherHeader.DebugActive & 0xFF)
if (__builtin_expect(KeGetCurrentThread()->DispatcherHeader.DebugActive & 0xFF, 0))
{
UNIMPLEMENTED;
while (TRUE);
@ -307,22 +291,19 @@ KiEnterTrap(IN PKTRAP_FRAME TrapFrame)
TrapFrame->SegGs = Ke386GetGs();
Ke386SetFs(KGDT_R0_PCR);
/* Save registers */
KiTrapFrameFromPushaStack(TrapFrame);
/* Save exception list and bogus previous mode */
TrapFrame->PreviousPreviousMode = -1;
TrapFrame->ExceptionList = KeGetPcr()->Tib.ExceptionList;
/* Check for 16-bit stack */
if ((ULONG_PTR)TrapFrame < 0x10000)
if (__builtin_expect((ULONG_PTR)TrapFrame < 0x10000, 0))
{
UNIMPLEMENTED;
while (TRUE);
}
/* Check for V86 mode */
if (TrapFrame->EFlags & EFLAGS_V86_MASK)
if (__builtin_expect(TrapFrame->EFlags & EFLAGS_V86_MASK, 0))
{
/* Restore V8086 segments into Protected Mode segments */
TrapFrame->SegFs = TrapFrame->V86Fs;
@ -336,7 +317,7 @@ KiEnterTrap(IN PKTRAP_FRAME TrapFrame)
/* Flush DR7 and check for debugging */
TrapFrame->Dr7 = 0;
if (KeGetCurrentThread()->DispatcherHeader.DebugActive & 0xFF)
if (__builtin_expect(KeGetCurrentThread()->DispatcherHeader.DebugActive & 0xFF, 0))
{
UNIMPLEMENTED;
while (TRUE);
@ -1041,13 +1022,13 @@ KiTrap0DHandler(IN PKTRAP_FRAME TrapFrame,
KIRQL OldIrql;
/* Check for V86 GPF */
if (EFlags & EFLAGS_V86_MASK)
if (__builtin_expect(EFlags & EFLAGS_V86_MASK, 1))
{
/* Enter V86 trap */
KiEnterV86Trap(TrapFrame);
/* Must be a VDM process */
if (!PsGetCurrentProcess()->VdmObjects)
if (__builtin_expect(!PsGetCurrentProcess()->VdmObjects, 0))
{
/* Enable interrupts */
_enable();
@ -1063,7 +1044,7 @@ KiTrap0DHandler(IN PKTRAP_FRAME TrapFrame,
_enable();
/* Handle the V86 opcode */
if (Ki386HandleOpcodeV86(TrapFrame) == 0xFF)
if (__builtin_expect(Ki386HandleOpcodeV86(TrapFrame) == 0xFF, 0))
{
/* Should only happen in VDM mode */
UNIMPLEMENTED;
@ -1075,7 +1056,7 @@ KiTrap0DHandler(IN PKTRAP_FRAME TrapFrame,
_disable();
/* Do a quick V86 exit if possible */
if (TrapFrame->EFlags & EFLAGS_V86_MASK) KiExitV86Trap(TrapFrame);
if (__builtin_expect(TrapFrame->EFlags & EFLAGS_V86_MASK, 1)) KiExitV86Trap(TrapFrame);
/* Exit trap the slow way */
KiEoiHelper(TrapFrame);