[NTOSKRNL] Bring an initial (and not perfect ;-)) implementation of read ahead to our Cc!

This halfplements CcScheduleReadAhead() which is responsible for finding the next reads
to perform given last read and previous reads. I made it very basic for now, at least
to test the whole process.
This also introduces the CcExpressWorkQueue in the lazy writer which is responsible
for dealing with read ahead items and which is dealt with before the regular queue.
In CcCopyData(), if read was fine, schedule read ahead so that it can happen in background
without the FSD to notice it! Also, update the read history so that scheduling as a
bit of data.
Implement (à la "old Cc" ;-)) CcPerformReadAhead() which is responsible for performing
the read. It's only to be called by the worker thread.

Side note on the modifications done in CcRosReleaseFileCache(). Private cache map
is tied to a handle. If it goes away, private cache map gets deleted. Read ahead
can run after the handle was closed (and thus, private cache map deleted), so
it is mandatory to always lock the master lock before accessing the structure in
read ahead or before deleting it in CcRosReleaseFileCache(). Otherwise, you'll
just break everything. You've been warned!

This commit also partly reverts f8b5d27.

CORE-14312
This commit is contained in:
Pierre Schweitzer 2018-02-09 10:06:17 +01:00
parent f4fedb936e
commit c5139563db
No known key found for this signature in database
GPG key ID: 7545556C3D585B0B
6 changed files with 314 additions and 19 deletions

View file

@ -51,6 +51,7 @@ CcInitializeCacheManager(VOID)
/* Initialize lazy-writer lists */
InitializeListHead(&CcIdleWorkerThreadList);
InitializeListHead(&CcExpressWorkQueue);
InitializeListHead(&CcRegularWorkQueue);
InitializeListHead(&CcPostTickWorkQueue);
@ -160,7 +161,99 @@ CcScheduleReadAhead (
IN ULONG Length
)
{
UNIMPLEMENTED;
KIRQL OldIrql;
LARGE_INTEGER NewOffset;
PROS_SHARED_CACHE_MAP SharedCacheMap;
PPRIVATE_CACHE_MAP PrivateCacheMap;
SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
PrivateCacheMap = FileObject->PrivateCacheMap;
/* If file isn't cached, or if read ahead is disabled, this is no op */
if (SharedCacheMap == NULL || PrivateCacheMap == NULL ||
BooleanFlagOn(SharedCacheMap->Flags, READAHEAD_DISABLED))
{
return;
}
/* Round read length with read ahead mask */
Length = ROUND_UP(Length, PrivateCacheMap->ReadAheadMask + 1);
/* Compute the offset we'll reach */
NewOffset.QuadPart = FileOffset->QuadPart + Length;
/* Lock read ahead spin lock */
KeAcquireSpinLock(&PrivateCacheMap->ReadAheadSpinLock, &OldIrql);
/* Easy case: the file is sequentially read */
if (BooleanFlagOn(FileObject->Flags, FO_SEQUENTIAL_ONLY))
{
/* If we went backward, this is no go! */
if (NewOffset.QuadPart < PrivateCacheMap->ReadAheadOffset[1].QuadPart)
{
KeReleaseSpinLock(&PrivateCacheMap->ReadAheadSpinLock, OldIrql);
return;
}
/* FIXME: hackish, but will do the job for now */
PrivateCacheMap->ReadAheadOffset[1].QuadPart = NewOffset.QuadPart;
PrivateCacheMap->ReadAheadLength[1] = Length;
}
/* Other cases: try to find some logic in that mess... */
else
{
/* Let's check if we always read the same way (like going down in the file)
* and pretend it's enough for now
*/
if (PrivateCacheMap->FileOffset2.QuadPart >= PrivateCacheMap->FileOffset1.QuadPart &&
FileOffset->QuadPart >= PrivateCacheMap->FileOffset2.QuadPart)
{
/* FIXME: hackish, but will do the job for now */
PrivateCacheMap->ReadAheadOffset[1].QuadPart = NewOffset.QuadPart;
PrivateCacheMap->ReadAheadLength[1] = Length;
}
else
{
/* FIXME: handle the other cases */
KeReleaseSpinLock(&PrivateCacheMap->ReadAheadSpinLock, OldIrql);
UNIMPLEMENTED;
return;
}
}
/* If read ahead isn't active yet */
if (!PrivateCacheMap->Flags.ReadAheadActive)
{
PWORK_QUEUE_ENTRY WorkItem;
/* It's active now!
* Be careful with the mask, you don't want to mess with node code
*/
InterlockedOr((volatile long *)&PrivateCacheMap->UlongFlags, 0x10000);
KeReleaseSpinLock(&PrivateCacheMap->ReadAheadSpinLock, OldIrql);
/* Get a work item */
WorkItem = ExAllocateFromNPagedLookasideList(&CcTwilightLookasideList);
if (WorkItem != NULL)
{
/* Reference our FO so that it doesn't go in between */
ObReferenceObject(FileObject);
/* We want to do read ahead! */
WorkItem->Function = ReadAhead;
WorkItem->Parameters.Read.FileObject = FileObject;
/* Queue in the read ahead dedicated queue */
CcPostWorkQueue(WorkItem, &CcExpressWorkQueue);
return;
}
/* Fail path: lock again, and revert read ahead active */
KeAcquireSpinLock(&PrivateCacheMap->ReadAheadSpinLock, &OldIrql);
InterlockedAnd((volatile long *)&PrivateCacheMap->UlongFlags, 0xFFFEFFFF);
}
/* Done (fail) */
KeReleaseSpinLock(&PrivateCacheMap->ReadAheadSpinLock, OldIrql);
}
/*

View file

@ -248,8 +248,10 @@ CcCopyData (
ULONG PartialLength;
PVOID BaseAddress;
BOOLEAN Valid;
PPRIVATE_CACHE_MAP PrivateCacheMap;
SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
PrivateCacheMap = FileObject->PrivateCacheMap;
CurrentOffset = FileOffset;
BytesCopied = 0;
@ -356,6 +358,23 @@ CcCopyData (
if (Operation != CcOperationZero)
Buffer = (PVOID)((ULONG_PTR)Buffer + PartialLength);
}
/* If that was a successful sync read operation, let's handle read ahead */
if (Operation == CcOperationRead && Length == 0 && Wait)
{
/* If file isn't random access, schedule next read */
if (!BooleanFlagOn(FileObject->Flags, FO_RANDOM_ACCESS))
{
CcScheduleReadAhead(FileObject, (PLARGE_INTEGER)&FileOffset, BytesCopied);
}
/* And update read history in private cache map */
PrivateCacheMap->FileOffset1.QuadPart = PrivateCacheMap->FileOffset2.QuadPart;
PrivateCacheMap->BeyondLastByte1.QuadPart = PrivateCacheMap->BeyondLastByte2.QuadPart;
PrivateCacheMap->FileOffset2.QuadPart = FileOffset;
PrivateCacheMap->BeyondLastByte2.QuadPart = FileOffset + BytesCopied;
}
IoStatus->Status = STATUS_SUCCESS;
IoStatus->Information = BytesCopied;
return TRUE;
@ -435,6 +454,155 @@ CcPostDeferredWrites(VOID)
}
}
VOID
CcPerformReadAhead(
IN PFILE_OBJECT FileObject)
{
NTSTATUS Status;
LONGLONG CurrentOffset;
KIRQL OldIrql;
PROS_SHARED_CACHE_MAP SharedCacheMap;
PROS_VACB Vacb;
ULONG PartialLength;
PVOID BaseAddress;
BOOLEAN Valid;
ULONG Length;
PPRIVATE_CACHE_MAP PrivateCacheMap;
BOOLEAN Locked;
SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
/* Critical:
* PrivateCacheMap might disappear in-between if the handle
* to the file is closed (private is attached to the handle not to
* the file), so we need to lock the master lock while we deal with
* it. It won't disappear without attempting to lock such lock.
*/
OldIrql = KeAcquireQueuedSpinLock(LockQueueMasterLock);
PrivateCacheMap = FileObject->PrivateCacheMap;
/* If the handle was closed since the read ahead was scheduled, just quit */
if (PrivateCacheMap == NULL)
{
KeReleaseQueuedSpinLock(LockQueueMasterLock, OldIrql);
ObDereferenceObject(FileObject);
return;
}
/* Otherwise, extract read offset and length and release private map */
else
{
KeAcquireSpinLockAtDpcLevel(&PrivateCacheMap->ReadAheadSpinLock);
CurrentOffset = PrivateCacheMap->ReadAheadOffset[1].QuadPart;
Length = PrivateCacheMap->ReadAheadLength[1];
KeReleaseSpinLockFromDpcLevel(&PrivateCacheMap->ReadAheadSpinLock);
}
KeReleaseQueuedSpinLock(LockQueueMasterLock, OldIrql);
/* Time to go! */
DPRINT("Doing ReadAhead for %p\n", FileObject);
/* Lock the file, first */
if (!SharedCacheMap->Callbacks->AcquireForReadAhead(SharedCacheMap->LazyWriteContext, FALSE))
{
Locked = FALSE;
goto Clear;
}
/* Remember it's locked */
Locked = TRUE;
/* Next of the algorithm will lock like CcCopyData with the slight
* difference that we don't copy data back to an user-backed buffer
* We just bring data into Cc
*/
PartialLength = CurrentOffset % VACB_MAPPING_GRANULARITY;
if (PartialLength != 0)
{
PartialLength = min(Length, VACB_MAPPING_GRANULARITY - PartialLength);
Status = CcRosRequestVacb(SharedCacheMap,
ROUND_DOWN(CurrentOffset,
VACB_MAPPING_GRANULARITY),
&BaseAddress,
&Valid,
&Vacb);
if (!NT_SUCCESS(Status))
{
DPRINT1("Failed to request VACB: %lx!\n", Status);
goto Clear;
}
if (!Valid)
{
Status = CcReadVirtualAddress(Vacb);
if (!NT_SUCCESS(Status))
{
CcRosReleaseVacb(SharedCacheMap, Vacb, FALSE, FALSE, FALSE);
DPRINT1("Failed to read data: %lx!\n", Status);
goto Clear;
}
}
CcRosReleaseVacb(SharedCacheMap, Vacb, TRUE, FALSE, FALSE);
Length -= PartialLength;
CurrentOffset += PartialLength;
}
while (Length > 0)
{
ASSERT(CurrentOffset % VACB_MAPPING_GRANULARITY == 0);
PartialLength = min(VACB_MAPPING_GRANULARITY, Length);
Status = CcRosRequestVacb(SharedCacheMap,
CurrentOffset,
&BaseAddress,
&Valid,
&Vacb);
if (!NT_SUCCESS(Status))
{
DPRINT1("Failed to request VACB: %lx!\n", Status);
goto Clear;
}
if (!Valid)
{
Status = CcReadVirtualAddress(Vacb);
if (!NT_SUCCESS(Status))
{
CcRosReleaseVacb(SharedCacheMap, Vacb, FALSE, FALSE, FALSE);
DPRINT1("Failed to read data: %lx!\n", Status);
goto Clear;
}
}
CcRosReleaseVacb(SharedCacheMap, Vacb, TRUE, FALSE, FALSE);
Length -= PartialLength;
CurrentOffset += PartialLength;
}
Clear:
/* See previous comment about private cache map */
OldIrql = KeAcquireQueuedSpinLock(LockQueueMasterLock);
PrivateCacheMap = FileObject->PrivateCacheMap;
if (PrivateCacheMap != NULL)
{
/* Mark read ahead as unactive */
KeAcquireSpinLockAtDpcLevel(&PrivateCacheMap->ReadAheadSpinLock);
InterlockedAnd((volatile long *)&PrivateCacheMap->UlongFlags, 0xFFFEFFFF);
KeReleaseSpinLockFromDpcLevel(&PrivateCacheMap->ReadAheadSpinLock);
}
KeReleaseSpinLock(&PrivateCacheMap->ReadAheadSpinLock, OldIrql);
/* If file was locked, release it */
if (Locked)
{
SharedCacheMap->Callbacks->ReleaseFromReadAhead(SharedCacheMap->LazyWriteContext);
}
/* And drop our extra reference (See: CcScheduleReadAhead) */
ObDereferenceObject(FileObject);
return;
}
/*
* @unimplemented
*/

View file

@ -13,14 +13,6 @@
#define NDEBUG
#include <debug.h>
typedef enum _WORK_QUEUE_FUNCTIONS
{
ReadAhead = 1,
WriteBehind = 2,
LazyWrite = 3,
SetDone = 4,
} WORK_QUEUE_FUNCTIONS, *PWORK_QUEUE_FUNCTIONS;
/* Counters:
* - Amount of pages flushed by lazy writer
* - Number of times lazy writer ran
@ -31,6 +23,7 @@ ULONG CcLazyWriteIos = 0;
/* Internal vars (MS):
* - Lazy writer status structure
* - Lookaside list where to allocate work items
* - Queue for high priority work items (read ahead)
* - Queue for regular work items
* - Available worker threads
* - Queue for stuff to be queued after lazy writer is done
@ -43,6 +36,7 @@ ULONG CcLazyWriteIos = 0;
*/
LAZY_WRITER LazyWriter;
NPAGED_LOOKASIDE_LIST CcTwilightLookasideList;
LIST_ENTRY CcExpressWorkQueue;
LIST_ENTRY CcRegularWorkQueue;
LIST_ENTRY CcIdleWorkerThreadList;
LIST_ENTRY CcPostTickWorkQueue;
@ -231,10 +225,22 @@ CcWorkerThread(
DropThrottle = FALSE;
}
/* If no work to do, we're done */
if (IsListEmpty(&CcRegularWorkQueue))
/* Check first if we have read ahead to do */
if (IsListEmpty(&CcExpressWorkQueue))
{
break;
/* If not, check regular queue */
if (IsListEmpty(&CcRegularWorkQueue))
{
break;
}
else
{
WorkItem = CONTAINING_RECORD(CcRegularWorkQueue.Flink, WORK_QUEUE_ENTRY, WorkQueueLinks);
}
}
else
{
WorkItem = CONTAINING_RECORD(CcExpressWorkQueue.Flink, WORK_QUEUE_ENTRY, WorkQueueLinks);
}
/* Get our work item, if someone is waiting for us to finish
@ -242,7 +248,6 @@ CcWorkerThread(
* then, quit running to let the others do
* and throttle so that noone starts till current activity is over
*/
WorkItem = CONTAINING_RECORD(CcRegularWorkQueue.Flink, WORK_QUEUE_ENTRY, WorkQueueLinks);
if (WorkItem->Function == SetDone && CcNumberActiveWorkerThreads > 1)
{
CcQueueThrottle = TRUE;
@ -256,7 +261,10 @@ CcWorkerThread(
/* And handle it */
switch (WorkItem->Function)
{
/* We only support lazy write now */
case ReadAhead:
CcPerformReadAhead(WorkItem->Parameters.Read.FileObject);
break;
case LazyWrite:
CcLazyWriteScan();
break;
@ -265,6 +273,10 @@ CcWorkerThread(
KeSetEvent(WorkItem->Parameters.Event.Event, IO_NO_INCREMENT, FALSE);
DropThrottle = TRUE;
break;
default:
DPRINT1("Ignored item: %p (%d)\n", WorkItem, WorkItem->Function);
break;
}
/* And release the item */

View file

@ -1191,8 +1191,14 @@ CcRosReleaseFileCache (
RemoveEntryList(&PrivateMap->PrivateLinks);
KeReleaseSpinLock(&SharedCacheMap->CacheMapLock, OldIrql);
/* And free it */
/* And free it.
* Before you event try to remove it from FO, always
* lock the master lock, to be sure not to race
* with a potential read ahead ongoing!
*/
OldIrql = KeAcquireQueuedSpinLock(LockQueueMasterLock);
FileObject->PrivateCacheMap = NULL;
KeReleaseQueuedSpinLock(LockQueueMasterLock, OldIrql);
ExFreePoolWithTag(PrivateMap, TAG_PRIVATE_CACHE_MAP);
if (SharedCacheMap->OpenCount > 0)

View file

@ -47,6 +47,7 @@ extern LIST_ENTRY CcDeferredWrites;
extern KSPIN_LOCK CcDeferredWriteSpinLock;
extern ULONG CcNumberWorkerThreads;
extern LIST_ENTRY CcIdleWorkerThreadList;
extern LIST_ENTRY CcExpressWorkQueue;
extern LIST_ENTRY CcRegularWorkQueue;
extern LIST_ENTRY CcPostTickWorkQueue;
extern NPAGED_LOOKASIDE_LIST CcTwilightLookasideList;
@ -261,6 +262,14 @@ typedef struct _WORK_QUEUE_ENTRY
unsigned char Function;
} WORK_QUEUE_ENTRY, *PWORK_QUEUE_ENTRY;
typedef enum _WORK_QUEUE_FUNCTIONS
{
ReadAhead = 1,
WriteBehind = 2,
LazyWrite = 3,
SetDone = 4,
} WORK_QUEUE_FUNCTIONS, *PWORK_QUEUE_FUNCTIONS;
extern LAZY_WRITER LazyWriter;
#define NODE_TYPE_DEFERRED_WRITE 0x02FC
@ -439,6 +448,15 @@ CcScheduleLazyWriteScan(BOOLEAN NoDelay);
VOID
CcPostDeferredWrites(VOID);
VOID
CcPostWorkQueue(
IN PWORK_QUEUE_ENTRY WorkItem,
IN PLIST_ENTRY WorkQueue);
VOID
CcPerformReadAhead(
IN PFILE_OBJECT FileObject);
FORCEINLINE
NTSTATUS
CcRosAcquireVacbLock(

View file

@ -75,10 +75,8 @@ typedef struct _PRIVATE_CACHE_MAP
LARGE_INTEGER BeyondLastByte1;
LARGE_INTEGER FileOffset2;
LARGE_INTEGER BeyondLastByte2;
ULONG SequentialReadCount;
ULONG ReadAheadLength;
LARGE_INTEGER ReadAheadOffset;
LARGE_INTEGER ReadAheadBeyondLastByte;
LARGE_INTEGER ReadAheadOffset[2];
ULONG ReadAheadLength[2];
KSPIN_LOCK ReadAheadSpinLock;
LIST_ENTRY PrivateLinks;
PVOID ReadAheadWorkItem;