- Replace broken implementation of HalpCalibrateStallExecution with a new implementation by a mysterious HAL ninja and myself. The old implementation calculated the stall count factor incorrectly and produced bogus results that were off by several thousand, and varied by as much for each boot, and can best be described as "rand() made complicated". The new implementation installs its own RTC interrupt handler to accurately calculate the stall scale factor, all done in assembler instead of broken C. Fixes the hang at boot when initializing Uniata as stalls no longer takes 10 times or more as long to execute then they should.

svn path=/trunk/; revision=43789
This commit is contained in:
Stefan Ginsberg 2009-10-27 01:03:41 +00:00
parent 47b50e1e0b
commit 838ed7f31a
3 changed files with 378 additions and 193 deletions

View file

@ -97,12 +97,12 @@ HalInitSystem(IN ULONG BootPhase,
/* Force initial PIC state */
KfRaiseIrql(KeGetCurrentIrql());
/* Initialize the clock */
HalpInitializeClock();
/* Setup busy waiting */
HalpCalibrateStallExecution();
/* Initialize the clock */
HalpInitializeClock();
/* Fill out the dispatch tables */
HalQuerySystemInformation = HaliQuerySystemInformation;
HalSetSystemInformation = HaliSetSystemInformation;

View file

@ -17,12 +17,387 @@ _HalpLastPerfCounterLow: .long 0
_HalpLastPerfCounterHigh: .long 0
_HalpPerfCounterLow: .long 0
_HalpPerfCounterHigh: .long 0
_HalpSystemHardwareLock: .long 0
_HalpSystemHardwareFlags: .long 0
_UnhandledMsg:
.asciz "\n\x7\x7!!! Unhandled or Unexpected Code at line: %lx!!!\n"
/* FUNCTIONS *****************************************************************/
.global _HalpReleaseCmosSpinLock@0
.func HalpReleaseCmosSpinLock@0
_HalpReleaseCmosSpinLock@0:
#ifdef CONFIG_SMP
/* Save clobbered register */
push eax
/* Push saved EFLAGS */
push _HalpSystemHardwareFlags
/* Release the lock */
lea eax, _HalpSystemHardwareLock
RELEASE_SPINLOCK(eax)
/* Restore EFLAGS */
popf
/* Return */
pop eax
ret
#else
/* Restore EFLAGS and return */
push _HalpSystemHardwareFlags
popf
ret
#endif
.endfunc
.global _HalpAcquireSystemHardwareSpinLock@0
.func HalpAcquireSystemHardwareSpinLock@0
_HalpAcquireSystemHardwareSpinLock@0:
#ifdef CONFIG_SMP
/* Save clobbered register */
push eax
HardwareLock:
/* Save EFLAGS and disable interrupts */
pushf
cli
/* This is the CMOS lock, acquire it */
lea eax, _HalpSystemHardwareLock
ACQUIRE_SPINLOCK(eax, CmosSpin)
/* We have it, return the flags */
pop _HalpSystemHardwareFlags
pop eax
ret
CmosSpin:
/* Restore EFLAGS */
pushf _HalpSystemHardwareLock
popf
/* Spin */
SPIN_ON_LOCK(eax, HardwareLock)
#else
/* Save EFLAGS, disable interrupts and return */
pushf
cli
pop _HalpSystemHardwareFlags
ret
#endif
.endfunc
.global _HalpCalibrateStallExecution@0
.func HalpCalibrateStallExecution@0
_HalpCalibrateStallExecution@0:
/* Setup the stack frame */
push ebp
mov ebp, esp
sub esp, 12
/* Save EFLAGS and kill interrupts */
pushf
cli
/* Get the current interrupt mask on the PICs */
xor eax, eax
in al, 0xA1
shl eax, 8
in al, 0x21
/* Save it */
push eax
/* Now mask everything except the RTC and PIC 2 chain-interrupt */
mov eax, ~((1 << 2) | (1 << 8))
/* Program the PICs */
out 0x21, al
shr eax, 8
out 0xA1, al
/* Now get the IDT */
sidt [ebp-8]
mov ecx, [ebp-6]
/* Get the IDT entry for the RTC */
mov eax, 0x38
shl eax, 3
add ecx, eax
/* Save the original RTC ISR */
push [ecx]
push [ecx+4]
push ecx
/* Now load our new handler */
mov eax, offset OnlyOnePersonCanWriteHalCode
mov [ecx], ax
mov word ptr [ecx+2], KGDT_R0_CODE
mov word ptr [ecx+4], 0x8E00
shr eax, 16
mov [ecx+6], ax
/* Reset our counter */
mov dword ptr [ebp-12], 0
/* Acquire CMOS lock */
call _HalpAcquireSystemHardwareSpinLock@0
/* Now initialize register A on the CMOS */
mov ax, (0x2D << 8) | 0xA
out 0x70, al
jmp $+2
mov al, ah
out 0x71, al
jmp $+2
/* Read register B */
mov ax, 0xB
out 0x70, al
jmp $+2
in al, 0x71
jmp $+2
/* Don't touch the LastKnownGoodConfig hack */
and al, 1
mov ah, al
/* Enable the interrupt */
or ah, 0x42
/* Now write the register B */
mov al, 0xB
out 0x70, al
jmp $+2
mov al, ah
out 0x71, al
jmp $+2
/* Read register C */
mov al, 0xC
out 0x70, al
jmp $+2
in al, 0x71
jmp $+2
/* Read register D */
mov al, 0xD
out 0x70, al
jmp $+2
in al, 0x71
jmp $+2
/* Release CMOS lock */
mov dword ptr [ebp-12], 0
call _HalpReleaseCmosSpinLock@0
/* Initialize looper */
xor eax, eax
/* Align to 16 bytes */
.align 16
/* Enable interrupts! */
sti
jmp Looper
/* Align to 16 bytes */
.align 16
/* Subtract one count */
Looper:
sub eax, 1
jnz Looper
/* ASSERT: If we got here, then the RTC never fired */
call _DbgBreakPoint@0
jmp Looper
OnlyOnePersonCanWriteHalCode:
/*********************** THIS IS THE RTC HANDLER **************************/
/* Increment the interrupt count and check if this is the first one */
inc dword ptr [ebp-12]
cmp dword ptr [ebp-12], 1
jnz ComputeStall
/*
* It is the first one -- we'll ignore it, since it fires randomly!
* Get rid of the old return address and push the new one in (our looper)
*/
pop eax
push offset Looper
/* Acquire CMOS lock */
call _HalpAcquireSystemHardwareSpinLock@0
/* Now initialize register A on the CMOS */
mov ax, (0x2D << 8) | 0xA
out 0x70, al
jmp $+2
mov al, ah
out 0x71, al
jmp $+2
/* Read register B */
mov ax, 0xB
out 0x70, al
jmp $+2
in al, 0x71
jmp $+2
/* Don't touch the LastKnownGoodConfig hack */
and al, 1
mov ah, al
/* Enable the interrupt */
or ah, 0x42
/* Now write the register B */
mov al, 0xB
out 0x70, al
jmp $+2
mov al, ah
out 0x71, al
jmp $+2
/* Read register C */
mov al, 0xC
out 0x70, al
jmp $+2
in al, 0x71
jmp $+2
/* Read register D */
mov al, 0xD
out 0x70, al
jmp $+2
in al, 0x71
jmp $+2
/* Release CMOS lock */
call _HalpReleaseCmosSpinLock@0
/* Dismiss the interrupt */
mov al, 0x20
out 0xA0, al
mov al, 0x62
out 0x20, al
/* Reset the counter and return back to the looper */
xor eax, eax
iretd
/******************* THIS IS THE 2ND RTC HANDLER **************************/
ComputeStall:
/* Do the calculation */
neg eax
xor edx, edx
mov ecx, 125000 /* RTC fires every 125 ms */
div ecx
/* Is the remainder 0? */
cmp edx, 0
jz FoundFactor
/* Otherwise fix-up the loop count */
inc eax
FoundFactor:
/* Save the stall scale factor */
mov fs:[KPCR_STALL_SCALE_FACTOR], eax
/* Prepare for interrupt return */
pop eax
push offset AndItsNotYou
mov eax, 0x13
/* Acquire CMOS lock */
call _HalpAcquireSystemHardwareSpinLock@0
/* Now initialize register A on the CMOS */
mov ax, (0x2D << 8) | 0xA
out 0x70, al
jmp $+2
mov al, ah
out 0x71, al
jmp $+2
/* Read register B */
mov ax, 0xB
out 0x70, al
jmp $+2
in al, 0x71
jmp $+2
/* Don't touch the LastKnownGoodConfig hack */
and al, 1
mov ah, al
/* Disable the interrupt */
or ah, 0x2
/* Now write the register B */
mov al, 0xB
out 0x70, al
jmp $+2
mov al, ah
out 0x71, al
jmp $+2
/* Read register C */
mov al, 0xC
out 0x70, al
jmp $+2
in al, 0x71
jmp $+2
/* Release CMOS lock */
call _HalpReleaseCmosSpinLock@0
/* Dismiss the interrupt */
mov al, 0x20
out 0xA0, al
mov al, 0x62
out 0x20, al
/* Disable interrupts on return */
and word ptr [esp+8], ~EFLAGS_INTERRUPT_MASK
iretd
/************************* WE ARE BACK FROM RTC ***************************/
AndItsNotYou:
/* Restore the IDT */
pop ecx
pop [ecx+4]
pop [ecx]
/* Restore the mask */
pop eax
out 0x21, al
shr eax, 8
out 0xA1, al
/* Restore EFLAGS */
popf
/* Restore stack and return */
mov esp, ebp
pop ebp
ret
.endfunc
.globl _KeStallExecutionProcessor@4
.func KeStallExecutionProcessor@4
_KeStallExecutionProcessor@4:
@ -54,34 +429,6 @@ Done:
ret 4
.endfunc
.globl _HalpQuery8254Counter@0
.func HalpQuery8254Counter@0
_HalpQuery8254Counter@0:
/* Save EFLAGS and disable interrupts */
pushfd
cli
/* Set timer data */
mov al, 0
out 0x43, al
jmp $+2
/* Read current timer */
in al, 0x40
jmp $+2
movzx ecx, al
in al, 0x40
mov ch, al
/* Return it and restore interrupt state */
mov eax, ecx
popfd
ret
.endfunc
.global _KeQueryPerformanceCounter@4
.func KeQueryPerformanceCounter@4
_KeQueryPerformanceCounter@4:

View file

@ -14,11 +14,6 @@
/* GLOBALS *******************************************************************/
/* time to wait */
#define MICROSECOND_TO_WAIT 1000
/* the tick count for 1 ms is 1193.182 (1193182 Hz) round it up */
#define TICKCOUNT_TO_WAIT 1194
BOOLEAN HalpClockSetMSRate;
ULONG HalpCurrentTimeIncrement;
ULONG HalpCurrentRollOver;
@ -135,161 +130,4 @@ HalSetTimeIncrement(IN ULONG Increment)
return HalpRolloverTable[Increment - 1].HighPart;
}
ULONG
WaitFor8254Wraparound(VOID)
{
ULONG StartTicks;
ULONG PrevTicks;
LONG Delta;
StartTicks = HalpQuery8254Counter();
do
{
PrevTicks = StartTicks;
StartTicks = HalpQuery8254Counter();
Delta = StartTicks - PrevTicks;
/*
* This limit for delta seems arbitrary, but it isn't, it's
* slightly above the level of error a buggy Mercury/Neptune
* chipset timer can cause.
*/
}
while (Delta < 300);
return StartTicks;
}
VOID
NTAPI
HalpCalibrateStallExecution(VOID)
{
ULONG CalibrationBit;
ULONG EndTicks;
ULONG StartTicks;
ULONG OverheadTicks;
PKIPCR Pcr;
Pcr = (PKIPCR)KeGetPcr();
/* Measure the delay for the minimum call overhead in ticks */
Pcr->StallScaleFactor = 1;
StartTicks = WaitFor8254Wraparound();
KeStallExecutionProcessor(1);
EndTicks = HalpQuery8254Counter();
OverheadTicks = (StartTicks - EndTicks);
do
{
/* Increase the StallScaleFactor */
Pcr->StallScaleFactor = Pcr->StallScaleFactor * 2;
if (Pcr->StallScaleFactor == 0)
{
/* Nothing found */
break;
}
/* Get the start ticks */
StartTicks = WaitFor8254Wraparound();
/* Wait for a defined time */
KeStallExecutionProcessor(MICROSECOND_TO_WAIT);
/* Get the end ticks */
EndTicks = HalpQuery8254Counter();
DPRINT("Pcr->StallScaleFactor: %d\n", Pcr->StallScaleFactor);
DPRINT("Time1 : StartTicks %i - EndTicks %i = %i\n",
StartTicks, EndTicks, StartTicks - EndTicks);
} while ((StartTicks - EndTicks) <= (TICKCOUNT_TO_WAIT + OverheadTicks));
/* A StallScaleFactor lesser than INITIAL_STALL_COUNT makes no sense */
if (Pcr->StallScaleFactor >= (INITIAL_STALL_COUNT * 2))
{
/* Adjust the StallScaleFactor */
Pcr->StallScaleFactor = Pcr->StallScaleFactor / 2;
/* Setup the CalibrationBit */
CalibrationBit = Pcr->StallScaleFactor;
for (;;)
{
/* Lower the CalibrationBit */
CalibrationBit = CalibrationBit / 2;
if (CalibrationBit == 0)
{
break;
}
/* Add the CalibrationBit */
Pcr->StallScaleFactor = Pcr->StallScaleFactor + CalibrationBit;
/* Get the start ticks */
StartTicks = WaitFor8254Wraparound();
/* Wait for a defined time */
KeStallExecutionProcessor(MICROSECOND_TO_WAIT);
/* Get the end ticks */
EndTicks = HalpQuery8254Counter();
DPRINT("Pcr->StallScaleFactor: %d\n", Pcr->StallScaleFactor);
DPRINT("Time2 : StartTicks %i - EndTicks %i = %i\n",
StartTicks, EndTicks, StartTicks - EndTicks);
if ((StartTicks-EndTicks) > (TICKCOUNT_TO_WAIT+OverheadTicks))
{
/* Too big so subtract the CalibrationBit */
Pcr->StallScaleFactor = Pcr->StallScaleFactor - CalibrationBit;
}
}
DPRINT("New StallScaleFactor: %d\n", Pcr->StallScaleFactor);
}
else
{
/* Set StallScaleFactor to the default */
Pcr->StallScaleFactor = INITIAL_STALL_COUNT;
}
#if 0
/* For debugging */
ULONG i;
DPRINT1("About to start delay loop test\n");
DPRINT1("Waiting for a minute...");
for (i = 0; i < (60*1000*20); i++)
{
KeStallExecutionProcessor(50);
}
DPRINT1("finished\n");
DPRINT1("About to start delay loop test\n");
DPRINT1("Waiting for a minute...");
for (i = 0; i < (60*1000); i++)
{
KeStallExecutionProcessor(1000);
}
DPRINT1("finished\n");
DPRINT1("About to start delay loop test\n");
DPRINT1("Waiting for a minute...");
for (i = 0; i < (60*1000*1000); i++)
{
KeStallExecutionProcessor(1);
}
DPRINT1("finished\n");
DPRINT1("About to start delay loop test\n");
DPRINT1("Waiting for a minute...");
KeStallExecutionProcessor(60*1000000);
DPRINT1("finished\n");
#endif
}
/* EOF */