mirror of
https://github.com/reactos/reactos.git
synced 2025-02-21 16:04:57 +00:00
[MEGAPERF]: This build introduces the following performance boosts:
- Using a dead stack lookaside list for up to 5 dead kernel thread stacks. 1500% improvement when rapidly destroying/creating threads, such as during second stage setup and many winetests. - Using a free pool page lookaside list for up to 12 free non-paged or paged pool pages. 800% improvement when allocating big pages from the pool, as well as during pool expansion. - Using a bucketized per-processor and local list (in the KPRCB) for block sizes between 1 and 32 bytes. 1000% improvement when rapidly allocating/freeing small pool allocations, and 8x reduction in pool fragmentation. svn path=/trunk/; revision=56000
This commit is contained in:
parent
6c3e9221be
commit
645e6a793a
6 changed files with 197 additions and 4 deletions
|
@ -367,7 +367,7 @@ IopUnloadDevice(IN PDEVICE_OBJECT DeviceObject)
|
|||
/* We can't unload unless there's an unload handler */
|
||||
if (!DriverObject->DriverUnload)
|
||||
{
|
||||
DPRINT1("No DriverUnload function! '%wZ' will not be unloaded!\n", &DriverObject->DriverName);
|
||||
DPRINT("No DriverUnload function! '%wZ' will not be unloaded!\n", &DriverObject->DriverName);
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -1346,7 +1346,7 @@ ExQueryPoolUsage(OUT PULONG PagedPoolPages,
|
|||
{
|
||||
ULONG i;
|
||||
PPOOL_DESCRIPTOR PoolDesc;
|
||||
|
||||
|
||||
//
|
||||
// Assume all failures
|
||||
//
|
||||
|
@ -1414,6 +1414,8 @@ ExAllocatePoolWithTag(IN POOL_TYPE PoolType,
|
|||
KIRQL OldIrql;
|
||||
USHORT BlockSize, i;
|
||||
ULONG OriginalType;
|
||||
PKPRCB Prcb = KeGetCurrentPrcb();
|
||||
PGENERAL_LOOKASIDE LookasideList;
|
||||
|
||||
//
|
||||
// Some sanity checks
|
||||
|
@ -1561,6 +1563,57 @@ ExAllocatePoolWithTag(IN POOL_TYPE PoolType,
|
|||
i = (USHORT)((NumberOfBytes + sizeof(POOL_HEADER) + (POOL_BLOCK_SIZE - 1))
|
||||
/ POOL_BLOCK_SIZE);
|
||||
|
||||
//
|
||||
// Handle lookaside list optimization for both paged and nonpaged pool
|
||||
//
|
||||
if (i <= MAXIMUM_PROCESSORS)
|
||||
{
|
||||
//
|
||||
// Try popping it from the per-CPU lookaside list
|
||||
//
|
||||
LookasideList = (PoolType == PagedPool) ?
|
||||
Prcb->PPPagedLookasideList[i - 1].P :
|
||||
Prcb->PPNPagedLookasideList[i - 1].P;
|
||||
LookasideList->TotalAllocates++;
|
||||
Entry = (PPOOL_HEADER)InterlockedPopEntrySList(&LookasideList->ListHead);
|
||||
if (!Entry)
|
||||
{
|
||||
//
|
||||
// We failed, try popping it from the global list
|
||||
//
|
||||
LookasideList = (PoolType == PagedPool) ?
|
||||
Prcb->PPPagedLookasideList[i - 1].L :
|
||||
Prcb->PPNPagedLookasideList[i - 1].L;
|
||||
LookasideList->TotalAllocates++;
|
||||
Entry = (PPOOL_HEADER)InterlockedPopEntrySList(&LookasideList->ListHead);
|
||||
}
|
||||
|
||||
//
|
||||
// If we were able to pop it, update the accounting and return the block
|
||||
//
|
||||
if (Entry)
|
||||
{
|
||||
LookasideList->AllocateHits++;
|
||||
|
||||
//
|
||||
// Get the real entry, write down its pool type, and track it
|
||||
//
|
||||
Entry--;
|
||||
Entry->PoolType = PoolType + 1;
|
||||
ExpInsertPoolTracker(Tag,
|
||||
Entry->BlockSize * POOL_BLOCK_SIZE,
|
||||
OriginalType);
|
||||
|
||||
//
|
||||
// Return the pool allocation
|
||||
//
|
||||
Entry->PoolTag = Tag;
|
||||
(POOL_FREE_BLOCK(Entry))->Flink = NULL;
|
||||
(POOL_FREE_BLOCK(Entry))->Blink = NULL;
|
||||
return POOL_FREE_BLOCK(Entry);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Loop in the free lists looking for a block if this size. Start with the
|
||||
// list optimized for this kind of size lookup
|
||||
|
@ -1902,6 +1955,8 @@ ExFreePoolWithTag(IN PVOID P,
|
|||
ULONG Tag;
|
||||
BOOLEAN Combined = FALSE;
|
||||
PFN_NUMBER PageCount, RealPageCount;
|
||||
PKPRCB Prcb = KeGetCurrentPrcb();
|
||||
PGENERAL_LOOKASIDE LookasideList;
|
||||
|
||||
//
|
||||
// Check if any of the debug flags are enabled
|
||||
|
@ -2072,6 +2127,40 @@ ExFreePoolWithTag(IN PVOID P,
|
|||
BlockSize * POOL_BLOCK_SIZE,
|
||||
Entry->PoolType - 1);
|
||||
|
||||
//
|
||||
// Is this allocation small enough to have come from a lookaside list?
|
||||
//
|
||||
if (BlockSize <= MAXIMUM_PROCESSORS)
|
||||
{
|
||||
//
|
||||
// Try pushing it into the per-CPU lookaside list
|
||||
//
|
||||
LookasideList = (PoolType == PagedPool) ?
|
||||
Prcb->PPPagedLookasideList[BlockSize - 1].P :
|
||||
Prcb->PPNPagedLookasideList[BlockSize - 1].P;
|
||||
LookasideList->TotalFrees++;
|
||||
if (ExQueryDepthSList(&LookasideList->ListHead) < LookasideList->Depth)
|
||||
{
|
||||
LookasideList->FreeHits++;
|
||||
InterlockedPushEntrySList(&LookasideList->ListHead, P);
|
||||
return;
|
||||
}
|
||||
|
||||
//
|
||||
// We failed, try to push it into the global lookaside list
|
||||
//
|
||||
LookasideList = (PoolType == PagedPool) ?
|
||||
Prcb->PPPagedLookasideList[BlockSize - 1].L :
|
||||
Prcb->PPNPagedLookasideList[BlockSize - 1].L;
|
||||
LookasideList->TotalFrees++;
|
||||
if (ExQueryDepthSList(&LookasideList->ListHead) < LookasideList->Depth)
|
||||
{
|
||||
LookasideList->FreeHits++;
|
||||
InterlockedPushEntrySList(&LookasideList->ListHead, P);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Get the pointer to the next entry
|
||||
//
|
||||
|
|
|
@ -598,6 +598,8 @@ extern PVOID MiSystemViewStart;
|
|||
extern PVOID MiSessionPoolEnd; // 0xBE000000
|
||||
extern PVOID MiSessionPoolStart; // 0xBD000000
|
||||
extern PVOID MiSessionViewStart; // 0xBE000000
|
||||
extern ULONG MmMaximumDeadKernelStacks;
|
||||
extern SLIST_HEADER MmDeadStackSListHead;
|
||||
|
||||
BOOLEAN
|
||||
FORCEINLINE
|
||||
|
|
|
@ -2076,6 +2076,9 @@ MmArmInitSystem(IN ULONG Phase,
|
|||
KeInitializeEvent(&MmZeroingPageEvent, SynchronizationEvent, FALSE);
|
||||
MmZeroingPageThreadActive = FALSE;
|
||||
|
||||
/* Initialize the dead stack S-LIST */
|
||||
InitializeSListHead(&MmDeadStackSListHead);
|
||||
|
||||
//
|
||||
// Check if this is a machine with less than 19MB of RAM
|
||||
//
|
||||
|
@ -2268,18 +2271,21 @@ MmArmInitSystem(IN ULONG Phase,
|
|||
{
|
||||
/* Set small system */
|
||||
MmSystemSize = MmSmallSystem;
|
||||
MmMaximumDeadKernelStacks = 0;
|
||||
}
|
||||
else if (MmNumberOfPhysicalPages <= ((19 * _1MB) / PAGE_SIZE))
|
||||
{
|
||||
/* Set small system and add 100 pages for the cache */
|
||||
MmSystemSize = MmSmallSystem;
|
||||
MmSystemCacheWsMinimum += 100;
|
||||
MmMaximumDeadKernelStacks = 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Set medium system and add 400 pages for the cache */
|
||||
MmSystemSize = MmMediumSystem;
|
||||
MmSystemCacheWsMinimum += 400;
|
||||
MmMaximumDeadKernelStacks = 5;
|
||||
}
|
||||
|
||||
/* Check for less than 24MB */
|
||||
|
|
|
@ -27,6 +27,10 @@ SIZE_T MmAllocatedNonPagedPool;
|
|||
ULONG MmSpecialPoolTag;
|
||||
ULONG MmConsumedPoolPercentage;
|
||||
BOOLEAN MmProtectFreedNonPagedPool;
|
||||
SLIST_HEADER MiNonPagedPoolSListHead;
|
||||
ULONG MiNonPagedPoolSListMaximum = 4;
|
||||
SLIST_HEADER MiPagedPoolSListHead;
|
||||
ULONG MiPagedPoolSListMaximum = 8;
|
||||
|
||||
/* PRIVATE FUNCTIONS **********************************************************/
|
||||
|
||||
|
@ -277,6 +281,34 @@ MiInitializeNonPagedPool(VOID)
|
|||
PMMPTE PointerPte;
|
||||
PAGED_CODE();
|
||||
|
||||
//
|
||||
// Initialize the pool S-LISTs as well as their maximum count. In general,
|
||||
// we'll allow 8 times the default on a 2GB system, and two times the default
|
||||
// on a 1GB system.
|
||||
//
|
||||
InitializeSListHead(&MiPagedPoolSListHead);
|
||||
InitializeSListHead(&MiNonPagedPoolSListHead);
|
||||
if (MmNumberOfPhysicalPages >= ((2 * _1GB) /PAGE_SIZE))
|
||||
{
|
||||
MiNonPagedPoolSListMaximum *= 8;
|
||||
MiPagedPoolSListMaximum *= 8;
|
||||
}
|
||||
else if (MmNumberOfPhysicalPages >= (_1GB /PAGE_SIZE))
|
||||
{
|
||||
MiNonPagedPoolSListMaximum *= 2;
|
||||
MiPagedPoolSListMaximum *= 2;
|
||||
}
|
||||
|
||||
//
|
||||
// However if debugging options for the pool are enabled, turn off the S-LIST
|
||||
// to reduce the risk of messing things up even more
|
||||
//
|
||||
if (MmProtectFreedNonPagedPool)
|
||||
{
|
||||
MiNonPagedPoolSListMaximum = 0;
|
||||
MiPagedPoolSListMaximum = 0;
|
||||
}
|
||||
|
||||
//
|
||||
// We keep 4 lists of free pages (4 lists help avoid contention)
|
||||
//
|
||||
|
@ -410,6 +442,15 @@ MiAllocatePoolPages(IN POOL_TYPE PoolType,
|
|||
//
|
||||
if ((PoolType & BASE_POOL_TYPE_MASK) == PagedPool)
|
||||
{
|
||||
//
|
||||
// If only one page is being requested, try to grab it from the S-LIST
|
||||
//
|
||||
if ((SizeInPages == 1) && (ExQueryDepthSList(&MiPagedPoolSListHead)))
|
||||
{
|
||||
BaseVa = InterlockedPopEntrySList(&MiPagedPoolSListHead);
|
||||
if (BaseVa) return BaseVa;
|
||||
}
|
||||
|
||||
//
|
||||
// Lock the paged pool mutex
|
||||
//
|
||||
|
@ -610,6 +651,15 @@ MiAllocatePoolPages(IN POOL_TYPE PoolType,
|
|||
return BaseVa;
|
||||
}
|
||||
|
||||
//
|
||||
// If only one page is being requested, try to grab it from the S-LIST
|
||||
//
|
||||
if ((SizeInPages == 1) && (ExQueryDepthSList(&MiNonPagedPoolSListHead)))
|
||||
{
|
||||
BaseVa = InterlockedPopEntrySList(&MiNonPagedPoolSListHead);
|
||||
if (BaseVa) return BaseVa;
|
||||
}
|
||||
|
||||
//
|
||||
// Allocations of less than 4 pages go into their individual buckets
|
||||
//
|
||||
|
@ -861,9 +911,16 @@ MiFreePoolPages(IN PVOID StartingVa)
|
|||
while (!RtlTestBit(MmPagedPoolInfo.EndOfPagedPoolBitmap, End)) End++;
|
||||
|
||||
//
|
||||
// Now calculate the total number of pages this allocation spans
|
||||
// Now calculate the total number of pages this allocation spans. If it's
|
||||
// only one page, add it to the S-LIST instead of freeing it
|
||||
//
|
||||
NumberOfPages = End - i + 1;
|
||||
if ((NumberOfPages == 1) &&
|
||||
(ExQueryDepthSList(&MiPagedPoolSListHead) < MiPagedPoolSListMaximum))
|
||||
{
|
||||
InterlockedPushEntrySList(&MiPagedPoolSListHead, StartingVa);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Delete the actual pages */
|
||||
PointerPte = MmPagedPoolInfo.FirstPteForPagedPool + i;
|
||||
|
@ -898,10 +955,18 @@ MiFreePoolPages(IN PVOID StartingVa)
|
|||
}
|
||||
|
||||
//
|
||||
// Get the first PTE and its corresponding PFN entry
|
||||
// Get the first PTE and its corresponding PFN entry. If this is also the
|
||||
// last PTE, meaning that this allocation was only for one page, push it into
|
||||
// the S-LIST instead of freeing it
|
||||
//
|
||||
StartPte = PointerPte = MiAddressToPte(StartingVa);
|
||||
StartPfn = Pfn1 = MiGetPfnEntry(PointerPte->u.Hard.PageFrameNumber);
|
||||
if ((Pfn1->u3.e1.EndOfAllocation == 1) &&
|
||||
(ExQueryDepthSList(&MiNonPagedPoolSListHead) < MiNonPagedPoolSListMaximum))
|
||||
{
|
||||
InterlockedPushEntrySList(&MiNonPagedPoolSListHead, StartingVa);
|
||||
return 1;
|
||||
}
|
||||
|
||||
//
|
||||
// Loop until we find the last PTE
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
|
||||
ULONG MmProcessColorSeed = 0x12345678;
|
||||
PMMWSL MmWorkingSetList;
|
||||
ULONG MmMaximumDeadKernelStacks = 5;
|
||||
SLIST_HEADER MmDeadStackSListHead;
|
||||
|
||||
/* PRIVATE FUNCTIONS **********************************************************/
|
||||
|
||||
|
@ -234,6 +236,19 @@ MmDeleteKernelStack(IN PVOID StackBase,
|
|||
PointerPte = MiAddressToPte(StackBase);
|
||||
PointerPte--;
|
||||
|
||||
//
|
||||
// If this is a small stack, just push the stack onto the dead stack S-LIST
|
||||
//
|
||||
if (!GuiStack)
|
||||
{
|
||||
if (ExQueryDepthSList(&MmDeadStackSListHead) < MmMaximumDeadKernelStacks)
|
||||
{
|
||||
Pfn1 = MiGetPfnEntry(PointerPte->u.Hard.PageFrameNumber);
|
||||
InterlockedPushEntrySList(&MmDeadStackSListHead, &Pfn1->u1.NextStackPfn);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Calculate pages used
|
||||
//
|
||||
|
@ -303,6 +318,7 @@ MmCreateKernelStack(IN BOOLEAN GuiStack,
|
|||
KIRQL OldIrql;
|
||||
PFN_NUMBER PageFrameIndex;
|
||||
ULONG i;
|
||||
PMMPFN Pfn1;
|
||||
|
||||
//
|
||||
// Calculate pages needed
|
||||
|
@ -318,6 +334,21 @@ MmCreateKernelStack(IN BOOLEAN GuiStack,
|
|||
}
|
||||
else
|
||||
{
|
||||
//
|
||||
// If the dead stack S-LIST has a stack on it, use it instead of allocating
|
||||
// new system PTEs for this stack
|
||||
//
|
||||
if (ExQueryDepthSList(&MmDeadStackSListHead))
|
||||
{
|
||||
Pfn1 = (PMMPFN)InterlockedPopEntrySList(&MmDeadStackSListHead);
|
||||
if (Pfn1)
|
||||
{
|
||||
PointerPte = Pfn1->PteAddress;
|
||||
BaseAddress = MiPteToAddress(++PointerPte);
|
||||
return BaseAddress;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// We'll allocate 12K and that's it
|
||||
//
|
||||
|
|
Loading…
Reference in a new issue