[MEGAPERF]: This build introduces the following performance boosts:

- Using a dead stack lookaside list for up to 5 dead kernel thread stacks. 1500% improvement when rapidly destroying/creating threads, such as during second stage setup and many winetests.
- Using a free pool page lookaside list for up to 12 free non-paged or paged pool pages. 800% improvement when allocating big pages from the pool, as well as during pool expansion.
- Using a bucketized per-processor and local list (in the KPRCB) for block sizes between 1 and 32 bytes. 1000% improvement when rapidly allocating/freeing small pool allocations, and 8x reduction in pool fragmentation.

svn path=/trunk/; revision=56000
This commit is contained in:
Sir Richard 2012-03-04 17:56:00 +00:00
parent 6c3e9221be
commit 645e6a793a
6 changed files with 197 additions and 4 deletions

View file

@ -367,7 +367,7 @@ IopUnloadDevice(IN PDEVICE_OBJECT DeviceObject)
/* We can't unload unless there's an unload handler */
if (!DriverObject->DriverUnload)
{
DPRINT1("No DriverUnload function! '%wZ' will not be unloaded!\n", &DriverObject->DriverName);
DPRINT("No DriverUnload function! '%wZ' will not be unloaded!\n", &DriverObject->DriverName);
return;
}

View file

@ -1346,7 +1346,7 @@ ExQueryPoolUsage(OUT PULONG PagedPoolPages,
{
ULONG i;
PPOOL_DESCRIPTOR PoolDesc;
//
// Assume all failures
//
@ -1414,6 +1414,8 @@ ExAllocatePoolWithTag(IN POOL_TYPE PoolType,
KIRQL OldIrql;
USHORT BlockSize, i;
ULONG OriginalType;
PKPRCB Prcb = KeGetCurrentPrcb();
PGENERAL_LOOKASIDE LookasideList;
//
// Some sanity checks
@ -1561,6 +1563,57 @@ ExAllocatePoolWithTag(IN POOL_TYPE PoolType,
i = (USHORT)((NumberOfBytes + sizeof(POOL_HEADER) + (POOL_BLOCK_SIZE - 1))
/ POOL_BLOCK_SIZE);
//
// Handle lookaside list optimization for both paged and nonpaged pool
//
if (i <= MAXIMUM_PROCESSORS)
{
//
// Try popping it from the per-CPU lookaside list
//
LookasideList = (PoolType == PagedPool) ?
Prcb->PPPagedLookasideList[i - 1].P :
Prcb->PPNPagedLookasideList[i - 1].P;
LookasideList->TotalAllocates++;
Entry = (PPOOL_HEADER)InterlockedPopEntrySList(&LookasideList->ListHead);
if (!Entry)
{
//
// We failed, try popping it from the global list
//
LookasideList = (PoolType == PagedPool) ?
Prcb->PPPagedLookasideList[i - 1].L :
Prcb->PPNPagedLookasideList[i - 1].L;
LookasideList->TotalAllocates++;
Entry = (PPOOL_HEADER)InterlockedPopEntrySList(&LookasideList->ListHead);
}
//
// If we were able to pop it, update the accounting and return the block
//
if (Entry)
{
LookasideList->AllocateHits++;
//
// Get the real entry, write down its pool type, and track it
//
Entry--;
Entry->PoolType = PoolType + 1;
ExpInsertPoolTracker(Tag,
Entry->BlockSize * POOL_BLOCK_SIZE,
OriginalType);
//
// Return the pool allocation
//
Entry->PoolTag = Tag;
(POOL_FREE_BLOCK(Entry))->Flink = NULL;
(POOL_FREE_BLOCK(Entry))->Blink = NULL;
return POOL_FREE_BLOCK(Entry);
}
}
//
// Loop in the free lists looking for a block if this size. Start with the
// list optimized for this kind of size lookup
@ -1902,6 +1955,8 @@ ExFreePoolWithTag(IN PVOID P,
ULONG Tag;
BOOLEAN Combined = FALSE;
PFN_NUMBER PageCount, RealPageCount;
PKPRCB Prcb = KeGetCurrentPrcb();
PGENERAL_LOOKASIDE LookasideList;
//
// Check if any of the debug flags are enabled
@ -2072,6 +2127,40 @@ ExFreePoolWithTag(IN PVOID P,
BlockSize * POOL_BLOCK_SIZE,
Entry->PoolType - 1);
//
// Is this allocation small enough to have come from a lookaside list?
//
if (BlockSize <= MAXIMUM_PROCESSORS)
{
//
// Try pushing it into the per-CPU lookaside list
//
LookasideList = (PoolType == PagedPool) ?
Prcb->PPPagedLookasideList[BlockSize - 1].P :
Prcb->PPNPagedLookasideList[BlockSize - 1].P;
LookasideList->TotalFrees++;
if (ExQueryDepthSList(&LookasideList->ListHead) < LookasideList->Depth)
{
LookasideList->FreeHits++;
InterlockedPushEntrySList(&LookasideList->ListHead, P);
return;
}
//
// We failed, try to push it into the global lookaside list
//
LookasideList = (PoolType == PagedPool) ?
Prcb->PPPagedLookasideList[BlockSize - 1].L :
Prcb->PPNPagedLookasideList[BlockSize - 1].L;
LookasideList->TotalFrees++;
if (ExQueryDepthSList(&LookasideList->ListHead) < LookasideList->Depth)
{
LookasideList->FreeHits++;
InterlockedPushEntrySList(&LookasideList->ListHead, P);
return;
}
}
//
// Get the pointer to the next entry
//

View file

@ -598,6 +598,8 @@ extern PVOID MiSystemViewStart;
extern PVOID MiSessionPoolEnd; // 0xBE000000
extern PVOID MiSessionPoolStart; // 0xBD000000
extern PVOID MiSessionViewStart; // 0xBE000000
extern ULONG MmMaximumDeadKernelStacks;
extern SLIST_HEADER MmDeadStackSListHead;
BOOLEAN
FORCEINLINE

View file

@ -2076,6 +2076,9 @@ MmArmInitSystem(IN ULONG Phase,
KeInitializeEvent(&MmZeroingPageEvent, SynchronizationEvent, FALSE);
MmZeroingPageThreadActive = FALSE;
/* Initialize the dead stack S-LIST */
InitializeSListHead(&MmDeadStackSListHead);
//
// Check if this is a machine with less than 19MB of RAM
//
@ -2268,18 +2271,21 @@ MmArmInitSystem(IN ULONG Phase,
{
/* Set small system */
MmSystemSize = MmSmallSystem;
MmMaximumDeadKernelStacks = 0;
}
else if (MmNumberOfPhysicalPages <= ((19 * _1MB) / PAGE_SIZE))
{
/* Set small system and add 100 pages for the cache */
MmSystemSize = MmSmallSystem;
MmSystemCacheWsMinimum += 100;
MmMaximumDeadKernelStacks = 2;
}
else
{
/* Set medium system and add 400 pages for the cache */
MmSystemSize = MmMediumSystem;
MmSystemCacheWsMinimum += 400;
MmMaximumDeadKernelStacks = 5;
}
/* Check for less than 24MB */

View file

@ -27,6 +27,10 @@ SIZE_T MmAllocatedNonPagedPool;
ULONG MmSpecialPoolTag;
ULONG MmConsumedPoolPercentage;
BOOLEAN MmProtectFreedNonPagedPool;
SLIST_HEADER MiNonPagedPoolSListHead;
ULONG MiNonPagedPoolSListMaximum = 4;
SLIST_HEADER MiPagedPoolSListHead;
ULONG MiPagedPoolSListMaximum = 8;
/* PRIVATE FUNCTIONS **********************************************************/
@ -277,6 +281,34 @@ MiInitializeNonPagedPool(VOID)
PMMPTE PointerPte;
PAGED_CODE();
//
// Initialize the pool S-LISTs as well as their maximum count. In general,
// we'll allow 8 times the default on a 2GB system, and two times the default
// on a 1GB system.
//
InitializeSListHead(&MiPagedPoolSListHead);
InitializeSListHead(&MiNonPagedPoolSListHead);
if (MmNumberOfPhysicalPages >= ((2 * _1GB) /PAGE_SIZE))
{
MiNonPagedPoolSListMaximum *= 8;
MiPagedPoolSListMaximum *= 8;
}
else if (MmNumberOfPhysicalPages >= (_1GB /PAGE_SIZE))
{
MiNonPagedPoolSListMaximum *= 2;
MiPagedPoolSListMaximum *= 2;
}
//
// However if debugging options for the pool are enabled, turn off the S-LIST
// to reduce the risk of messing things up even more
//
if (MmProtectFreedNonPagedPool)
{
MiNonPagedPoolSListMaximum = 0;
MiPagedPoolSListMaximum = 0;
}
//
// We keep 4 lists of free pages (4 lists help avoid contention)
//
@ -410,6 +442,15 @@ MiAllocatePoolPages(IN POOL_TYPE PoolType,
//
if ((PoolType & BASE_POOL_TYPE_MASK) == PagedPool)
{
//
// If only one page is being requested, try to grab it from the S-LIST
//
if ((SizeInPages == 1) && (ExQueryDepthSList(&MiPagedPoolSListHead)))
{
BaseVa = InterlockedPopEntrySList(&MiPagedPoolSListHead);
if (BaseVa) return BaseVa;
}
//
// Lock the paged pool mutex
//
@ -610,6 +651,15 @@ MiAllocatePoolPages(IN POOL_TYPE PoolType,
return BaseVa;
}
//
// If only one page is being requested, try to grab it from the S-LIST
//
if ((SizeInPages == 1) && (ExQueryDepthSList(&MiNonPagedPoolSListHead)))
{
BaseVa = InterlockedPopEntrySList(&MiNonPagedPoolSListHead);
if (BaseVa) return BaseVa;
}
//
// Allocations of less than 4 pages go into their individual buckets
//
@ -861,9 +911,16 @@ MiFreePoolPages(IN PVOID StartingVa)
while (!RtlTestBit(MmPagedPoolInfo.EndOfPagedPoolBitmap, End)) End++;
//
// Now calculate the total number of pages this allocation spans
// Now calculate the total number of pages this allocation spans. If it's
// only one page, add it to the S-LIST instead of freeing it
//
NumberOfPages = End - i + 1;
if ((NumberOfPages == 1) &&
(ExQueryDepthSList(&MiPagedPoolSListHead) < MiPagedPoolSListMaximum))
{
InterlockedPushEntrySList(&MiPagedPoolSListHead, StartingVa);
return 1;
}
/* Delete the actual pages */
PointerPte = MmPagedPoolInfo.FirstPteForPagedPool + i;
@ -898,10 +955,18 @@ MiFreePoolPages(IN PVOID StartingVa)
}
//
// Get the first PTE and its corresponding PFN entry
// Get the first PTE and its corresponding PFN entry. If this is also the
// last PTE, meaning that this allocation was only for one page, push it into
// the S-LIST instead of freeing it
//
StartPte = PointerPte = MiAddressToPte(StartingVa);
StartPfn = Pfn1 = MiGetPfnEntry(PointerPte->u.Hard.PageFrameNumber);
if ((Pfn1->u3.e1.EndOfAllocation == 1) &&
(ExQueryDepthSList(&MiNonPagedPoolSListHead) < MiNonPagedPoolSListMaximum))
{
InterlockedPushEntrySList(&MiNonPagedPoolSListHead, StartingVa);
return 1;
}
//
// Loop until we find the last PTE

View file

@ -19,6 +19,8 @@
ULONG MmProcessColorSeed = 0x12345678;
PMMWSL MmWorkingSetList;
ULONG MmMaximumDeadKernelStacks = 5;
SLIST_HEADER MmDeadStackSListHead;
/* PRIVATE FUNCTIONS **********************************************************/
@ -234,6 +236,19 @@ MmDeleteKernelStack(IN PVOID StackBase,
PointerPte = MiAddressToPte(StackBase);
PointerPte--;
//
// If this is a small stack, just push the stack onto the dead stack S-LIST
//
if (!GuiStack)
{
if (ExQueryDepthSList(&MmDeadStackSListHead) < MmMaximumDeadKernelStacks)
{
Pfn1 = MiGetPfnEntry(PointerPte->u.Hard.PageFrameNumber);
InterlockedPushEntrySList(&MmDeadStackSListHead, &Pfn1->u1.NextStackPfn);
return;
}
}
//
// Calculate pages used
//
@ -303,6 +318,7 @@ MmCreateKernelStack(IN BOOLEAN GuiStack,
KIRQL OldIrql;
PFN_NUMBER PageFrameIndex;
ULONG i;
PMMPFN Pfn1;
//
// Calculate pages needed
@ -318,6 +334,21 @@ MmCreateKernelStack(IN BOOLEAN GuiStack,
}
else
{
//
// If the dead stack S-LIST has a stack on it, use it instead of allocating
// new system PTEs for this stack
//
if (ExQueryDepthSList(&MmDeadStackSListHead))
{
Pfn1 = (PMMPFN)InterlockedPopEntrySList(&MmDeadStackSListHead);
if (Pfn1)
{
PointerPte = Pfn1->PteAddress;
BaseAddress = MiPteToAddress(++PointerPte);
return BaseAddress;
}
}
//
// We'll allocate 12K and that's it
//