reactos/reactos/ntoskrnl/cc/pin.c

262 lines
5.5 KiB
C
Raw Normal View History

/*
* COPYRIGHT: See COPYING in the top level directory
* PROJECT: ReactOS kernel
* FILE: ntoskrnl/cc/pin.c
* PURPOSE: Implements cache managers pinning interface
*
* PROGRAMMERS:
*/
/* INCLUDES ******************************************************************/
#include <ntoskrnl.h>
#define NDEBUG
#include <internal/debug.h>
/* GLOBALS *******************************************************************/
extern NPAGED_LOOKASIDE_LIST iBcbLookasideList;
/* FUNCTIONS *****************************************************************/
/*
* @implemented
*/
BOOLEAN STDCALL
CcMapData (IN PFILE_OBJECT FileObject,
IN PLARGE_INTEGER FileOffset,
IN ULONG Length,
IN ULONG Flags,
OUT PVOID *pBcb,
OUT PVOID *pBuffer)
{
ULONG ReadOffset;
BOOLEAN Valid;
PBCB Bcb;
PCACHE_SEGMENT CacheSeg;
NTSTATUS Status;
PINTERNAL_BCB iBcb;
ULONG ROffset;
DPRINT("CcMapData(FileObject 0x%p, FileOffset %I64x, Length %d, Flags %d,"
" pBcb 0x%p, pBuffer 0x%p)\n", FileObject, FileOffset->QuadPart,
Length, Flags, pBcb, pBuffer);
ReadOffset = (ULONG)FileOffset->QuadPart;
ASSERT(FileObject);
ASSERT(FileObject->SectionObjectPointer);
ASSERT(FileObject->SectionObjectPointer->SharedCacheMap);
Bcb = FileObject->SectionObjectPointer->SharedCacheMap;
ASSERT(Bcb);
DPRINT("AllocationSize %I64x, FileSize %I64x\n",
Bcb->AllocationSize.QuadPart,
Bcb->FileSize.QuadPart);
if (ReadOffset % Bcb->CacheSegmentSize + Length > Bcb->CacheSegmentSize)
{
return(FALSE);
}
ROffset = ROUND_DOWN (ReadOffset, Bcb->CacheSegmentSize);
Status = CcRosRequestCacheSegment(Bcb,
ROffset,
pBuffer,
&Valid,
&CacheSeg);
if (!NT_SUCCESS(Status))
{
return(FALSE);
}
if (!Valid)
{
if (!(Flags & MAP_WAIT))
{
CcRosReleaseCacheSegment(Bcb, CacheSeg, FALSE, FALSE, FALSE);
return(FALSE);
}
if (!NT_SUCCESS(ReadCacheSegment(CacheSeg)))
{
CcRosReleaseCacheSegment(Bcb, CacheSeg, FALSE, FALSE, FALSE);
return(FALSE);
}
}
*pBuffer = (PVOID)((ULONG_PTR)(*pBuffer) + (ReadOffset % Bcb->CacheSegmentSize));
iBcb = ExAllocateFromNPagedLookasideList(&iBcbLookasideList);
if (iBcb == NULL)
{
CcRosReleaseCacheSegment(Bcb, CacheSeg, TRUE, FALSE, FALSE);
return FALSE;
}
memset(iBcb, 0, sizeof(INTERNAL_BCB));
iBcb->PFCB.NodeTypeCode = 0xDE45; /* Undocumented (CAPTIVE_PUBLIC_BCB_NODETYPECODE) */
iBcb->PFCB.NodeByteSize = sizeof(PUBLIC_BCB);
iBcb->PFCB.MappedLength = Length;
iBcb->PFCB.MappedFileOffset = *FileOffset;
iBcb->CacheSegment = CacheSeg;
2002-08-14 David Welch <welch@computer2.darkstar.org> * subsys/smss/init.c (SmPagingFilesQueryRoutine): If possible take the size of the paging file from the registry. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/mm/section.c (MmCreateDataFileSection): Extend the section if necessary. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/mm/pagefile.c (NtCreatePagingFile): Set the file size using the FileAllocationInformation class. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/mm/anonmem.c (MmWritePageVirtualMemory): Implemented function to write anonymous memory pages to the swap file. * ntoskrnl/mm/anonmem.c (MmFreeVirtualMemoryPage): Free any swap page associated with the page. * ntoskrnl/mm/mpw.c (MmWriteDirtyPages): New function to find pages to write to disk. * ntoskrnl/mm/mpw.c (MmMpwThreadMain): Implemented MPW functionality. * ntoskrnl/mm/rmap.c (MmWritePagePhysicalAddress): New function to write a single page back to disk. * ntoskrnl/mm/rmap.c (MmSetCleanAllRmaps, MmSetDirtyAllRmaps, MmIsDirtyPageRmap): New rmap function to support the MPW thread. * ntoskrnl/mm/section.c (MmWritePageSectionView): Implemented function to write back section pages. * ntoskrnl/mm/section.c (MmFreeSectionPage): Free any swap entry associated with the page; mark pages shared with the cache as dirty if necessary. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/ldr/loader.c (LdrPEProcessModule): Set name of the module into the module text structure. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/io/rw.c (NtReadFile, NtWriteFile): Use the correct test for whether to wait for the completion of i/o. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/cm/ntfunc.c (NtFlushKey): Request synchronous i/o from NtOpenFile. * ntoskrnl/cm/regfile (CmiInitPermanentRegistryHive): Request synchronous i/o from NtCreateFile. * ntoskrnl/dbg/kdb_stabs.c (LdrpLoadModuleSymbols): Request synchronous i/o from NtOpenFile. * ntoskrnl/ldr/sysdll.c (LdrpMapSystemDll): Request synchronous i/o from NtOpenFile. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/cc/view.c (CcRosSuggestFreeCacheSegment): Maintain the correct reference count. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/cc/view.c (CcRosFlushCacheSegment): New function to write back a modified cache segment. * ntoskrnl/cc/view.c (CcRosFlushDirtyPages): New function to flush some dirty pages from the cache. * ntoskrnl/cc/view.c (CcRosMarkDirtyCacheSegment): New function to mark a cache segment modified while mapped into memory as dirty. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/cc/pin.c (CcMapData, CcUnpinData, CcSetDirtyPinnedData): Store the dirty status in the BCB; don't write back dirty data immediately. 2002-08-14 David Welch <welch@computer2.darkstar.org> * include/ntos/mm.h: Added SEC_XXXX defines from 'Windows NT/2000 Native API Reference' 2002-08-14 David Welch <welch@computer2.darkstar.org> * drivers/fs/vfat/ea.c (VfatSetExtendedAttributes): Empty placeholder for extended attribute functions. 2002-08-14 David Welch <welch@computer2.darkstar.org> * drivers/fs/vfat/finfo.c (VfatSetAllocationSizeInformation): Added function to set allocation size. 2002-08-14 David Welch <welch@computer2.darkstar.org> * drivers/fs/vfat/fcb.c (vfatFCBInitializeCache): Renamed to vfatFCBInitializeCacheFromVolume. * drivers/fs/vfat/fcb.c (vfatMakeFCBFromDirEntry): Don't initialise the cache with a file object representing the volume unless the FCB is for a directory. 2002-08-14 David Welch <welch@computer2.darkstar.org> * drivers/fs/vfat/create.c (VfatPagingFileCreate): Added a new function for handling paging file only code. * drivers/fs/vfat/create.c (VfatSupersedeFile): Added a new function for doing a file supersede. * drivers/fs/vfat/create.c (VfatCreateFile): Reformatted and adjusted control flow. Set allocation size and extended attributes on create. * drivers/fs/vfat/create.c (VfatCreate): Removed goto. 2002-08-14 David Welch <welch@computer2.darkstar.org> * drivers/fs/vfat/cleanup.c (VfatCleanupFile): Renamed updEntry to VfatUpdateEntry. * drivers/fs/vfat/close.c (VfatCloseFile): Renamed updEntry to VfatUpdateEntry. * drivers/fs/vfat/dirwr.c (updEntry): Renamed to VfatUpdateEntry. * drivers/fs/vfat/dirwr.c (addEntry): Renamed to VfatAddEntry. 2002-08-14 David Welch <welch@computer2.darkstar.org> * apps/tests/sectest/sectest.c (main): Fixed formatting. svn path=/trunk/; revision=3331
2002-08-14 20:58:39 +00:00
iBcb->Dirty = FALSE;
iBcb->RefCount = 1;
*pBcb = (PVOID)iBcb;
return(TRUE);
}
/*
* @unimplemented
*/
BOOLEAN
STDCALL
CcPinMappedData (
IN PFILE_OBJECT FileObject,
IN PLARGE_INTEGER FileOffset,
IN ULONG Length,
IN ULONG Flags,
OUT PVOID * Bcb
)
{
/* no-op for current implementation. */
return TRUE;
}
/*
* @unimplemented
*/
BOOLEAN
STDCALL
CcPinRead (
IN PFILE_OBJECT FileObject,
IN PLARGE_INTEGER FileOffset,
IN ULONG Length,
IN ULONG Flags,
OUT PVOID * Bcb,
OUT PVOID * Buffer
)
{
if (CcMapData(FileObject, FileOffset, Length, Flags, Bcb, Buffer))
{
if (CcPinMappedData(FileObject, FileOffset, Length, Flags, Bcb))
return TRUE;
else
CcUnpinData(Bcb);
}
return FALSE;
}
/*
* @unimplemented
*/
BOOLEAN
STDCALL
CcPreparePinWrite (
IN PFILE_OBJECT FileObject,
IN PLARGE_INTEGER FileOffset,
IN ULONG Length,
IN BOOLEAN Zero,
IN ULONG Flags,
OUT PVOID * Bcb,
OUT PVOID * Buffer
)
{
/*
* FIXME: This is function is similar to CcPinRead, but doesn't
* read the data if they're not present. Instead it should just
* prepare the cache segments and zero them out if Zero == TRUE.
*
* For now calling CcPinRead is better than returning error or
* just having UNIMPLEMENTED here.
*/
return CcPinRead(FileObject, FileOffset, Length, Flags, Bcb, Buffer);
}
/*
* @implemented
*/
VOID STDCALL
CcSetDirtyPinnedData (IN PVOID Bcb,
IN PLARGE_INTEGER Lsn)
{
PINTERNAL_BCB iBcb = Bcb;
2002-08-14 David Welch <welch@computer2.darkstar.org> * subsys/smss/init.c (SmPagingFilesQueryRoutine): If possible take the size of the paging file from the registry. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/mm/section.c (MmCreateDataFileSection): Extend the section if necessary. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/mm/pagefile.c (NtCreatePagingFile): Set the file size using the FileAllocationInformation class. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/mm/anonmem.c (MmWritePageVirtualMemory): Implemented function to write anonymous memory pages to the swap file. * ntoskrnl/mm/anonmem.c (MmFreeVirtualMemoryPage): Free any swap page associated with the page. * ntoskrnl/mm/mpw.c (MmWriteDirtyPages): New function to find pages to write to disk. * ntoskrnl/mm/mpw.c (MmMpwThreadMain): Implemented MPW functionality. * ntoskrnl/mm/rmap.c (MmWritePagePhysicalAddress): New function to write a single page back to disk. * ntoskrnl/mm/rmap.c (MmSetCleanAllRmaps, MmSetDirtyAllRmaps, MmIsDirtyPageRmap): New rmap function to support the MPW thread. * ntoskrnl/mm/section.c (MmWritePageSectionView): Implemented function to write back section pages. * ntoskrnl/mm/section.c (MmFreeSectionPage): Free any swap entry associated with the page; mark pages shared with the cache as dirty if necessary. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/ldr/loader.c (LdrPEProcessModule): Set name of the module into the module text structure. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/io/rw.c (NtReadFile, NtWriteFile): Use the correct test for whether to wait for the completion of i/o. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/cm/ntfunc.c (NtFlushKey): Request synchronous i/o from NtOpenFile. * ntoskrnl/cm/regfile (CmiInitPermanentRegistryHive): Request synchronous i/o from NtCreateFile. * ntoskrnl/dbg/kdb_stabs.c (LdrpLoadModuleSymbols): Request synchronous i/o from NtOpenFile. * ntoskrnl/ldr/sysdll.c (LdrpMapSystemDll): Request synchronous i/o from NtOpenFile. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/cc/view.c (CcRosSuggestFreeCacheSegment): Maintain the correct reference count. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/cc/view.c (CcRosFlushCacheSegment): New function to write back a modified cache segment. * ntoskrnl/cc/view.c (CcRosFlushDirtyPages): New function to flush some dirty pages from the cache. * ntoskrnl/cc/view.c (CcRosMarkDirtyCacheSegment): New function to mark a cache segment modified while mapped into memory as dirty. 2002-08-14 David Welch <welch@computer2.darkstar.org> * ntoskrnl/cc/pin.c (CcMapData, CcUnpinData, CcSetDirtyPinnedData): Store the dirty status in the BCB; don't write back dirty data immediately. 2002-08-14 David Welch <welch@computer2.darkstar.org> * include/ntos/mm.h: Added SEC_XXXX defines from 'Windows NT/2000 Native API Reference' 2002-08-14 David Welch <welch@computer2.darkstar.org> * drivers/fs/vfat/ea.c (VfatSetExtendedAttributes): Empty placeholder for extended attribute functions. 2002-08-14 David Welch <welch@computer2.darkstar.org> * drivers/fs/vfat/finfo.c (VfatSetAllocationSizeInformation): Added function to set allocation size. 2002-08-14 David Welch <welch@computer2.darkstar.org> * drivers/fs/vfat/fcb.c (vfatFCBInitializeCache): Renamed to vfatFCBInitializeCacheFromVolume. * drivers/fs/vfat/fcb.c (vfatMakeFCBFromDirEntry): Don't initialise the cache with a file object representing the volume unless the FCB is for a directory. 2002-08-14 David Welch <welch@computer2.darkstar.org> * drivers/fs/vfat/create.c (VfatPagingFileCreate): Added a new function for handling paging file only code. * drivers/fs/vfat/create.c (VfatSupersedeFile): Added a new function for doing a file supersede. * drivers/fs/vfat/create.c (VfatCreateFile): Reformatted and adjusted control flow. Set allocation size and extended attributes on create. * drivers/fs/vfat/create.c (VfatCreate): Removed goto. 2002-08-14 David Welch <welch@computer2.darkstar.org> * drivers/fs/vfat/cleanup.c (VfatCleanupFile): Renamed updEntry to VfatUpdateEntry. * drivers/fs/vfat/close.c (VfatCloseFile): Renamed updEntry to VfatUpdateEntry. * drivers/fs/vfat/dirwr.c (updEntry): Renamed to VfatUpdateEntry. * drivers/fs/vfat/dirwr.c (addEntry): Renamed to VfatAddEntry. 2002-08-14 David Welch <welch@computer2.darkstar.org> * apps/tests/sectest/sectest.c (main): Fixed formatting. svn path=/trunk/; revision=3331
2002-08-14 20:58:39 +00:00
iBcb->Dirty = TRUE;
}
/*
* @implemented
*/
VOID STDCALL
CcUnpinData (IN PVOID Bcb)
{
PINTERNAL_BCB iBcb = Bcb;
CcRosReleaseCacheSegment(iBcb->CacheSegment->Bcb, iBcb->CacheSegment, TRUE,
iBcb->Dirty, FALSE);
if (--iBcb->RefCount == 0)
{
ExFreeToNPagedLookasideList(&iBcbLookasideList, iBcb);
}
}
/*
* @unimplemented
*/
VOID
STDCALL
CcUnpinDataForThread (
IN PVOID Bcb,
IN ERESOURCE_THREAD ResourceThreadId
)
{
UNIMPLEMENTED;
}
/*
* @implemented
*/
VOID
STDCALL
CcRepinBcb (
IN PVOID Bcb
)
{
PINTERNAL_BCB iBcb = Bcb;
iBcb->RefCount++;
}
/*
* @unimplemented
*/
VOID
STDCALL
CcUnpinRepinnedBcb (
IN PVOID Bcb,
IN BOOLEAN WriteThrough,
IN PIO_STATUS_BLOCK IoStatus
)
{
PINTERNAL_BCB iBcb = Bcb;
if (--iBcb->RefCount == 0)
{
IoStatus->Information = 0;
if (WriteThrough)
{
- Okay so...listen up. First off: When you acquire a lock such as a fast mutex, you should never acquire it recursively. For example, when you handle a page fault in a section, then page fault while handling that page fault (which is perfectly okay), you shouldn't be trying to re-acquire the address space lock that you're already holding. After this fix, this scenario works and countless others. Apps like QTInfo now work and load, and PictureViewer doesn't BSOD the system anymore. I've fixed this by changing the lock to a pushlock. It not only increases speed inside the memory manager significantly (such as during page fault handling), but does allow recursive acquisition without any problems. - Now if that wasn't bad enough, here's a couple more tips. Fast Mutexes actually require APC_LEVEL to be effective. If you're going to be using a Fast Mutex and calling it with the "Unsafe" version, then don't expect anything to work. Also, using functions like "CcTryToAcquireBrokenMutex" where correct code is duplicated then hacked to work isn't a big help either. And that's not all. Fast Mutex disables kernel APCs by setting the KernelApcDisable flag on, and it's expected that the count inside the fast mutex will match the count inside the thread. In other words, LOCK ACQUISITION AND RELEASE MUST BE ORDERED. You can't acquire LOCK A and B, and then release lock A and B, because that leads to deadlocks and other issues. So of course, the Cache Manager acquired a view lock, then acquired a segment lock, then released the view lock, then released the segment lock, then re-acquired the view lock. Uh, no, that won't work. You know what else doesn't work so well? Disabling APCs about 6-9 times to acquire a single lock, and using spinlocks in the same code path as well. Just how paranoid are you about thread safety, but still manage to get it wrong? Okay, so we've got recursion, out-of-order lock acquision and release, made-up "broken" acquire functions, and using a lock that depends on APC_LEVEL at PASSIVE_LEVEL. The best part is when Cc builds an array of cache segments, and locks each of them... then during release, the list gets parsed head-first, so the first acquired locks get released first. So locks a, b, c, d get acquired, then a, b, c, d get released. Great! Sounds about right for ReactOS's Cache Manager design. I've changed the view lock to a guarded mutex -- which actually properly disables APCs and works at PASSIVE_LEVEL, and changed the segment locks to be push locks. First it'll be 10 times faster then acquiring a bazillion fast mutexes, especially since APCs have already been disabled at this point, and it also allows you to do most of the stupid things the Cache Manager does. Out-of-order release is still not going to work well, so eventually on a multi-processor machine the code will completely die -- but at least it'll work on UP for now. In the end, this makes things like the Inkscape installer and Quicktime Installer to work, and probably countless other things that generated ASSERTS in the fast mutex code. -- Alex Ionescu svn path=/trunk/; revision=30401
2007-11-12 19:00:26 +00:00
ExAcquirePushLockExclusive(&iBcb->CacheSegment->Lock);
if (iBcb->CacheSegment->Dirty)
{
IoStatus->Status = CcRosFlushCacheSegment(iBcb->CacheSegment);
}
else
{
IoStatus->Status = STATUS_SUCCESS;
}
- Okay so...listen up. First off: When you acquire a lock such as a fast mutex, you should never acquire it recursively. For example, when you handle a page fault in a section, then page fault while handling that page fault (which is perfectly okay), you shouldn't be trying to re-acquire the address space lock that you're already holding. After this fix, this scenario works and countless others. Apps like QTInfo now work and load, and PictureViewer doesn't BSOD the system anymore. I've fixed this by changing the lock to a pushlock. It not only increases speed inside the memory manager significantly (such as during page fault handling), but does allow recursive acquisition without any problems. - Now if that wasn't bad enough, here's a couple more tips. Fast Mutexes actually require APC_LEVEL to be effective. If you're going to be using a Fast Mutex and calling it with the "Unsafe" version, then don't expect anything to work. Also, using functions like "CcTryToAcquireBrokenMutex" where correct code is duplicated then hacked to work isn't a big help either. And that's not all. Fast Mutex disables kernel APCs by setting the KernelApcDisable flag on, and it's expected that the count inside the fast mutex will match the count inside the thread. In other words, LOCK ACQUISITION AND RELEASE MUST BE ORDERED. You can't acquire LOCK A and B, and then release lock A and B, because that leads to deadlocks and other issues. So of course, the Cache Manager acquired a view lock, then acquired a segment lock, then released the view lock, then released the segment lock, then re-acquired the view lock. Uh, no, that won't work. You know what else doesn't work so well? Disabling APCs about 6-9 times to acquire a single lock, and using spinlocks in the same code path as well. Just how paranoid are you about thread safety, but still manage to get it wrong? Okay, so we've got recursion, out-of-order lock acquision and release, made-up "broken" acquire functions, and using a lock that depends on APC_LEVEL at PASSIVE_LEVEL. The best part is when Cc builds an array of cache segments, and locks each of them... then during release, the list gets parsed head-first, so the first acquired locks get released first. So locks a, b, c, d get acquired, then a, b, c, d get released. Great! Sounds about right for ReactOS's Cache Manager design. I've changed the view lock to a guarded mutex -- which actually properly disables APCs and works at PASSIVE_LEVEL, and changed the segment locks to be push locks. First it'll be 10 times faster then acquiring a bazillion fast mutexes, especially since APCs have already been disabled at this point, and it also allows you to do most of the stupid things the Cache Manager does. Out-of-order release is still not going to work well, so eventually on a multi-processor machine the code will completely die -- but at least it'll work on UP for now. In the end, this makes things like the Inkscape installer and Quicktime Installer to work, and probably countless other things that generated ASSERTS in the fast mutex code. -- Alex Ionescu svn path=/trunk/; revision=30401
2007-11-12 19:00:26 +00:00
ExReleasePushLockExclusive(&iBcb->CacheSegment->Lock);
}
else
{
IoStatus->Status = STATUS_SUCCESS;
}
ExFreeToNPagedLookasideList(&iBcbLookasideList, iBcb);
}
}