reactos/drivers/filesystems/btrfs/write.c
Pierre Schweitzer eb7fbc253f
[BTRFS] Upgrade to 1.1
CORE-15452
2018-12-16 12:06:46 +01:00

4816 lines
170 KiB
C

/* Copyright (c) Mark Harmstone 2016-17
*
* This file is part of WinBtrfs.
*
* WinBtrfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public Licence as published by
* the Free Software Foundation, either version 3 of the Licence, or
* (at your option) any later version.
*
* WinBtrfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public Licence for more details.
*
* You should have received a copy of the GNU Lesser General Public Licence
* along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
#include "btrfs_drv.h"
typedef struct {
UINT64 start;
UINT64 end;
UINT8* data;
PMDL mdl;
UINT64 irp_offset;
} write_stripe;
_Function_class_(IO_COMPLETION_ROUTINE)
#ifdef __REACTOS__
static NTSTATUS NTAPI write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr);
#else
static NTSTATUS write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr);
#endif
static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback);
extern tPsUpdateDiskCounters fPsUpdateDiskCounters;
extern tCcCopyWriteEx fCcCopyWriteEx;
extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
extern BOOL diskacc;
BOOL find_data_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) {
LIST_ENTRY* le;
space* s;
TRACE("(%p, %llx, %llx, %p)\n", Vcb, c->offset, length, address);
if (length > c->chunk_item->size - c->used)
return FALSE;
if (!c->cache_loaded) {
NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
if (!NT_SUCCESS(Status)) {
ERR("load_cache_chunk returned %08x\n", Status);
return FALSE;
}
}
if (IsListEmpty(&c->space_size))
return FALSE;
le = c->space_size.Flink;
while (le != &c->space_size) {
s = CONTAINING_RECORD(le, space, list_entry_size);
if (s->size == length) {
*address = s->address;
return TRUE;
} else if (s->size < length) {
if (le == c->space_size.Flink)
return FALSE;
s = CONTAINING_RECORD(le->Blink, space, list_entry_size);
*address = s->address;
return TRUE;
}
le = le->Flink;
}
s = CONTAINING_RECORD(c->space_size.Blink, space, list_entry_size);
if (s->size > length) {
*address = s->address;
return TRUE;
}
return FALSE;
}
chunk* get_chunk_from_address(device_extension* Vcb, UINT64 address) {
LIST_ENTRY* le2;
ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
le2 = Vcb->chunks.Flink;
while (le2 != &Vcb->chunks) {
chunk* c = CONTAINING_RECORD(le2, chunk, list_entry);
if (address >= c->offset && address < c->offset + c->chunk_item->size) {
ExReleaseResourceLite(&Vcb->chunk_lock);
return c;
}
le2 = le2->Flink;
}
ExReleaseResourceLite(&Vcb->chunk_lock);
return NULL;
}
typedef struct {
space* dh;
device* device;
} stripe;
static UINT64 find_new_chunk_address(device_extension* Vcb, UINT64 size) {
UINT64 lastaddr;
LIST_ENTRY* le;
lastaddr = 0xc00000;
le = Vcb->chunks.Flink;
while (le != &Vcb->chunks) {
chunk* c = CONTAINING_RECORD(le, chunk, list_entry);
if (c->offset >= lastaddr + size)
return lastaddr;
lastaddr = c->offset + c->chunk_item->size;
le = le->Flink;
}
return lastaddr;
}
static BOOL find_new_dup_stripes(device_extension* Vcb, stripe* stripes, UINT64 max_stripe_size, BOOL full_size) {
UINT64 devusage = 0xffffffffffffffff;
space *devdh1 = NULL, *devdh2 = NULL;
LIST_ENTRY* le;
device* dev2 = NULL;
le = Vcb->devices.Flink;
while (le != &Vcb->devices) {
device* dev = CONTAINING_RECORD(le, device, list_entry);
if (!dev->readonly && !dev->reloc && dev->devobj) {
UINT64 usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes;
// favour devices which have been used the least
if (usage < devusage) {
if (!IsListEmpty(&dev->space)) {
LIST_ENTRY* le2;
space *dh1 = NULL, *dh2 = NULL;
le2 = dev->space.Flink;
while (le2 != &dev->space) {
space* dh = CONTAINING_RECORD(le2, space, list_entry);
if (dh->size >= max_stripe_size && (!dh1 || !dh2 || dh->size < dh1->size)) {
dh2 = dh1;
dh1 = dh;
}
le2 = le2->Flink;
}
if (dh1 && (dh2 || dh1->size >= 2 * max_stripe_size)) {
dev2 = dev;
devusage = usage;
devdh1 = dh1;
devdh2 = dh2 ? dh2 : dh1;
}
}
}
}
le = le->Flink;
}
if (!devdh1) {
UINT64 size = 0;
// Can't find hole of at least max_stripe_size; look for the largest one we can find
if (full_size)
return FALSE;
le = Vcb->devices.Flink;
while (le != &Vcb->devices) {
device* dev = CONTAINING_RECORD(le, device, list_entry);
if (!dev->readonly && !dev->reloc) {
if (!IsListEmpty(&dev->space)) {
LIST_ENTRY* le2;
space *dh1 = NULL, *dh2 = NULL;
le2 = dev->space.Flink;
while (le2 != &dev->space) {
space* dh = CONTAINING_RECORD(le2, space, list_entry);
if (!dh1 || !dh2 || dh->size < dh1->size) {
dh2 = dh1;
dh1 = dh;
}
le2 = le2->Flink;
}
if (dh1) {
UINT64 devsize;
if (dh2)
devsize = max(dh1->size / 2, min(dh1->size, dh2->size));
else
devsize = dh1->size / 2;
if (devsize > size) {
dev2 = dev;
devdh1 = dh1;
if (dh2 && min(dh1->size, dh2->size) > dh1->size / 2)
devdh2 = dh2;
else
devdh2 = dh1;
size = devsize;
}
}
}
}
le = le->Flink;
}
if (!devdh1)
return FALSE;
}
stripes[0].device = stripes[1].device = dev2;
stripes[0].dh = devdh1;
stripes[1].dh = devdh2;
return TRUE;
}
static BOOL find_new_stripe(device_extension* Vcb, stripe* stripes, UINT16 i, UINT64 max_stripe_size, BOOL allow_missing, BOOL full_size) {
UINT64 k, devusage = 0xffffffffffffffff;
space* devdh = NULL;
LIST_ENTRY* le;
device* dev2 = NULL;
le = Vcb->devices.Flink;
while (le != &Vcb->devices) {
device* dev = CONTAINING_RECORD(le, device, list_entry);
UINT64 usage;
BOOL skip = FALSE;
if (dev->readonly || dev->reloc || (!dev->devobj && !allow_missing)) {
le = le->Flink;
continue;
}
// skip this device if it already has a stripe
if (i > 0) {
for (k = 0; k < i; k++) {
if (stripes[k].device == dev) {
skip = TRUE;
break;
}
}
}
if (!skip) {
usage = (dev->devitem.bytes_used * 4096) / dev->devitem.num_bytes;
// favour devices which have been used the least
if (usage < devusage) {
if (!IsListEmpty(&dev->space)) {
LIST_ENTRY* le2;
le2 = dev->space.Flink;
while (le2 != &dev->space) {
space* dh = CONTAINING_RECORD(le2, space, list_entry);
if ((dev2 != dev && dh->size >= max_stripe_size) ||
(dev2 == dev && dh->size >= max_stripe_size && dh->size < devdh->size)
) {
devdh = dh;
dev2 = dev;
devusage = usage;
}
le2 = le2->Flink;
}
}
}
}
le = le->Flink;
}
if (!devdh) {
// Can't find hole of at least max_stripe_size; look for the largest one we can find
if (full_size)
return FALSE;
le = Vcb->devices.Flink;
while (le != &Vcb->devices) {
device* dev = CONTAINING_RECORD(le, device, list_entry);
BOOL skip = FALSE;
if (dev->readonly || dev->reloc || (!dev->devobj && !allow_missing)) {
le = le->Flink;
continue;
}
// skip this device if it already has a stripe
if (i > 0) {
for (k = 0; k < i; k++) {
if (stripes[k].device == dev) {
skip = TRUE;
break;
}
}
}
if (!skip) {
if (!IsListEmpty(&dev->space)) {
LIST_ENTRY* le2;
le2 = dev->space.Flink;
while (le2 != &dev->space) {
space* dh = CONTAINING_RECORD(le2, space, list_entry);
if (!devdh || devdh->size < dh->size) {
devdh = dh;
dev2 = dev;
}
le2 = le2->Flink;
}
}
}
le = le->Flink;
}
if (!devdh)
return FALSE;
}
stripes[i].dh = devdh;
stripes[i].device = dev2;
return TRUE;
}
NTSTATUS alloc_chunk(device_extension* Vcb, UINT64 flags, chunk** pc, BOOL full_size) {
NTSTATUS Status;
UINT64 max_stripe_size, max_chunk_size, stripe_size, stripe_length, factor;
UINT64 total_size = 0, logaddr;
UINT16 i, type, num_stripes, sub_stripes, max_stripes, min_stripes, allowed_missing;
stripe* stripes = NULL;
UINT16 cisize;
CHUNK_ITEM_STRIPE* cis;
chunk* c = NULL;
space* s = NULL;
LIST_ENTRY* le;
le = Vcb->devices.Flink;
while (le != &Vcb->devices) {
device* dev = CONTAINING_RECORD(le, device, list_entry);
total_size += dev->devitem.num_bytes;
le = le->Flink;
}
TRACE("total_size = %llx\n", total_size);
// We purposely check for DATA first - mixed blocks have the same size
// as DATA ones.
if (flags & BLOCK_FLAG_DATA) {
max_stripe_size = 0x40000000; // 1 GB
max_chunk_size = 10 * max_stripe_size;
} else if (flags & BLOCK_FLAG_METADATA) {
if (total_size > 0xC80000000) // 50 GB
max_stripe_size = 0x40000000; // 1 GB
else
max_stripe_size = 0x10000000; // 256 MB
max_chunk_size = max_stripe_size;
} else if (flags & BLOCK_FLAG_SYSTEM) {
max_stripe_size = 0x2000000; // 32 MB
max_chunk_size = 2 * max_stripe_size;
} else {
ERR("unknown chunk type\n");
return STATUS_INTERNAL_ERROR;
}
if (flags & BLOCK_FLAG_DUPLICATE) {
min_stripes = 2;
max_stripes = 2;
sub_stripes = 0;
type = BLOCK_FLAG_DUPLICATE;
allowed_missing = 0;
} else if (flags & BLOCK_FLAG_RAID0) {
min_stripes = 2;
max_stripes = (UINT16)min(0xffff, Vcb->superblock.num_devices);
sub_stripes = 0;
type = BLOCK_FLAG_RAID0;
allowed_missing = 0;
} else if (flags & BLOCK_FLAG_RAID1) {
min_stripes = 2;
max_stripes = 2;
sub_stripes = 1;
type = BLOCK_FLAG_RAID1;
allowed_missing = 1;
} else if (flags & BLOCK_FLAG_RAID10) {
min_stripes = 4;
max_stripes = (UINT16)min(0xffff, Vcb->superblock.num_devices);
sub_stripes = 2;
type = BLOCK_FLAG_RAID10;
allowed_missing = 1;
} else if (flags & BLOCK_FLAG_RAID5) {
min_stripes = 3;
max_stripes = (UINT16)min(0xffff, Vcb->superblock.num_devices);
sub_stripes = 1;
type = BLOCK_FLAG_RAID5;
allowed_missing = 1;
} else if (flags & BLOCK_FLAG_RAID6) {
min_stripes = 4;
max_stripes = 257;
sub_stripes = 1;
type = BLOCK_FLAG_RAID6;
allowed_missing = 2;
} else { // SINGLE
min_stripes = 1;
max_stripes = 1;
sub_stripes = 1;
type = 0;
allowed_missing = 0;
}
if (max_chunk_size > total_size / 10) { // cap at 10%
max_chunk_size = total_size / 10;
max_stripe_size = max_chunk_size / min_stripes;
}
TRACE("would allocate a new chunk of %llx bytes and stripe %llx\n", max_chunk_size, max_stripe_size);
stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * max_stripes, ALLOC_TAG);
if (!stripes) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
num_stripes = 0;
if (type == BLOCK_FLAG_DUPLICATE) {
if (!find_new_dup_stripes(Vcb, stripes, max_stripe_size, full_size)) {
Status = STATUS_DISK_FULL;
goto end;
}
else
num_stripes = max_stripes;
} else {
for (i = 0; i < max_stripes; i++) {
if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, FALSE, full_size))
break;
else
num_stripes++;
}
}
if (num_stripes < min_stripes && Vcb->options.allow_degraded && allowed_missing > 0) {
UINT16 added_missing = 0;
for (i = num_stripes; i < max_stripes; i++) {
if (!find_new_stripe(Vcb, stripes, i, max_stripe_size, TRUE, full_size))
break;
else {
added_missing++;
if (added_missing >= allowed_missing)
break;
}
}
num_stripes += added_missing;
}
// for RAID10, round down to an even number of stripes
if (type == BLOCK_FLAG_RAID10 && (num_stripes % sub_stripes) != 0) {
num_stripes -= num_stripes % sub_stripes;
}
if (num_stripes < min_stripes) {
WARN("found %u stripes, needed at least %u\n", num_stripes, min_stripes);
Status = STATUS_DISK_FULL;
goto end;
}
c = ExAllocatePoolWithTag(NonPagedPool, sizeof(chunk), ALLOC_TAG);
if (!c) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
c->devices = NULL;
cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE));
c->chunk_item = ExAllocatePoolWithTag(NonPagedPool, cisize, ALLOC_TAG);
if (!c->chunk_item) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
if (type == BLOCK_FLAG_DUPLICATE && stripes[1].dh == stripes[0].dh)
stripe_size = min(stripes[0].dh->size / 2, max_stripe_size);
else {
stripe_size = max_stripe_size;
for (i = 0; i < num_stripes; i++) {
if (stripes[i].dh->size < stripe_size)
stripe_size = stripes[i].dh->size;
}
}
if (type == 0 || type == BLOCK_FLAG_DUPLICATE || type == BLOCK_FLAG_RAID1)
factor = 1;
else if (type == BLOCK_FLAG_RAID0)
factor = num_stripes;
else if (type == BLOCK_FLAG_RAID10)
factor = num_stripes / sub_stripes;
else if (type == BLOCK_FLAG_RAID5)
factor = num_stripes - 1;
else if (type == BLOCK_FLAG_RAID6)
factor = num_stripes - 2;
if (stripe_size * factor > max_chunk_size)
stripe_size = max_chunk_size / factor;
if (stripe_size % stripe_length > 0)
stripe_size -= stripe_size % stripe_length;
if (stripe_size == 0) {
ERR("not enough free space found (stripe_size == 0)\n");
Status = STATUS_DISK_FULL;
goto end;
}
c->chunk_item->size = stripe_size * factor;
c->chunk_item->root_id = Vcb->extent_root->id;
c->chunk_item->stripe_length = stripe_length;
c->chunk_item->type = flags;
c->chunk_item->opt_io_alignment = (UINT32)c->chunk_item->stripe_length;
c->chunk_item->opt_io_width = (UINT32)c->chunk_item->stripe_length;
c->chunk_item->sector_size = stripes[0].device->devitem.minimal_io_size;
c->chunk_item->num_stripes = num_stripes;
c->chunk_item->sub_stripes = sub_stripes;
c->devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * num_stripes, ALLOC_TAG);
if (!c->devices) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
for (i = 0; i < num_stripes; i++) {
cis[i].dev_id = stripes[i].device->devitem.dev_id;
if (type == BLOCK_FLAG_DUPLICATE && i == 1 && stripes[i].dh == stripes[0].dh)
cis[i].offset = stripes[0].dh->address + stripe_size;
else
cis[i].offset = stripes[i].dh->address;
cis[i].dev_uuid = stripes[i].device->devitem.device_uuid;
c->devices[i] = stripes[i].device;
}
logaddr = find_new_chunk_address(Vcb, c->chunk_item->size);
Vcb->superblock.chunk_root_generation = Vcb->superblock.generation;
c->size = cisize;
c->offset = logaddr;
c->used = c->oldused = 0;
c->cache = c->old_cache = NULL;
c->readonly = FALSE;
c->reloc = FALSE;
c->last_alloc_set = FALSE;
c->last_stripe = 0;
c->cache_loaded = TRUE;
c->changed = FALSE;
c->space_changed = FALSE;
c->balance_num = 0;
InitializeListHead(&c->space);
InitializeListHead(&c->space_size);
InitializeListHead(&c->deleting);
InitializeListHead(&c->changed_extents);
InitializeListHead(&c->range_locks);
ExInitializeResourceLite(&c->range_locks_lock);
KeInitializeEvent(&c->range_locks_event, NotificationEvent, FALSE);
InitializeListHead(&c->partial_stripes);
ExInitializeResourceLite(&c->partial_stripes_lock);
ExInitializeResourceLite(&c->lock);
ExInitializeResourceLite(&c->changed_extents_lock);
s = ExAllocatePoolWithTag(NonPagedPool, sizeof(space), ALLOC_TAG);
if (!s) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
s->address = c->offset;
s->size = c->chunk_item->size;
InsertTailList(&c->space, &s->list_entry);
InsertTailList(&c->space_size, &s->list_entry_size);
protect_superblocks(c);
for (i = 0; i < num_stripes; i++) {
stripes[i].device->devitem.bytes_used += stripe_size;
space_list_subtract2(&stripes[i].device->space, NULL, cis[i].offset, stripe_size, NULL, NULL);
}
Status = STATUS_SUCCESS;
if (flags & BLOCK_FLAG_RAID5 || flags & BLOCK_FLAG_RAID6)
Vcb->superblock.incompat_flags |= BTRFS_INCOMPAT_FLAGS_RAID56;
end:
if (stripes)
ExFreePool(stripes);
if (!NT_SUCCESS(Status)) {
if (c) {
if (c->devices)
ExFreePool(c->devices);
if (c->chunk_item)
ExFreePool(c->chunk_item);
ExFreePool(c);
}
if (s) ExFreePool(s);
} else {
BOOL done = FALSE;
le = Vcb->chunks.Flink;
while (le != &Vcb->chunks) {
chunk* c2 = CONTAINING_RECORD(le, chunk, list_entry);
if (c2->offset > c->offset) {
InsertHeadList(le->Blink, &c->list_entry);
done = TRUE;
break;
}
le = le->Flink;
}
if (!done)
InsertTailList(&Vcb->chunks, &c->list_entry);
c->created = TRUE;
c->changed = TRUE;
c->space_changed = TRUE;
c->list_entry_balance.Flink = NULL;
*pc = c;
}
return Status;
}
static NTSTATUS prepare_raid0_write(_Pre_satisfies_(_Curr_->chunk_item->num_stripes>0) _In_ chunk* c, _In_ UINT64 address, _In_reads_bytes_(length) void* data,
_In_ UINT32 length, _In_ write_stripe* stripes, _In_ PIRP Irp, _In_ UINT64 irp_offset, _In_ write_data_context* wtc) {
UINT64 startoff, endoff;
UINT16 startoffstripe, endoffstripe, stripenum;
UINT64 pos, *stripeoff;
UINT32 i;
BOOL file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
PMDL master_mdl;
PFN_NUMBER* pfns;
stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes, ALLOC_TAG);
if (!stripeoff) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &startoff, &startoffstripe);
get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes, &endoff, &endoffstripe);
if (file_write) {
master_mdl = Irp->MdlAddress;
pfns = (PFN_NUMBER*)(Irp->MdlAddress + 1);
pfns = &pfns[irp_offset >> PAGE_SHIFT];
} else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
if (!wtc->scratch) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(wtc->scratch, data, length);
master_mdl = IoAllocateMdl(wtc->scratch, length, FALSE, FALSE, NULL);
if (!master_mdl) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
MmBuildMdlForNonPagedPool(master_mdl);
wtc->mdl = master_mdl;
pfns = (PFN_NUMBER*)(master_mdl + 1);
} else {
NTSTATUS Status = STATUS_SUCCESS;
master_mdl = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
if (!master_mdl) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
_SEH2_TRY {
MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
Status = _SEH2_GetExceptionCode();
} _SEH2_END;
if (!NT_SUCCESS(Status)) {
ERR("MmProbeAndLockPages threw exception %08x\n", Status);
IoFreeMdl(master_mdl);
return Status;
}
wtc->mdl = master_mdl;
pfns = (PFN_NUMBER*)(master_mdl + 1);
}
for (i = 0; i < c->chunk_item->num_stripes; i++) {
if (startoffstripe > i)
stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
else if (startoffstripe == i)
stripes[i].start = startoff;
else
stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
if (endoffstripe > i)
stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
else if (endoffstripe == i)
stripes[i].end = endoff + 1;
else
stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
if (stripes[i].start != stripes[i].end) {
stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(stripes[i].end - stripes[i].start), FALSE, FALSE, NULL);
if (!stripes[i].mdl) {
ERR("IoAllocateMdl failed\n");
ExFreePool(stripeoff);
return STATUS_INSUFFICIENT_RESOURCES;
}
}
}
pos = 0;
RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes);
stripenum = startoffstripe;
while (pos < length) {
PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripenum].mdl + 1);
if (pos == 0) {
UINT32 writelen = (UINT32)min(stripes[stripenum].end - stripes[stripenum].start,
c->chunk_item->stripe_length - (stripes[stripenum].start % c->chunk_item->stripe_length));
RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
stripeoff[stripenum] += writelen;
pos += writelen;
} else if (length - pos < c->chunk_item->stripe_length) {
RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)((length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
break;
} else {
RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
stripeoff[stripenum] += c->chunk_item->stripe_length;
pos += c->chunk_item->stripe_length;
}
stripenum = (stripenum + 1) % c->chunk_item->num_stripes;
}
ExFreePool(stripeoff);
return STATUS_SUCCESS;
}
static NTSTATUS prepare_raid10_write(_Pre_satisfies_(_Curr_->chunk_item->sub_stripes>0&&_Curr_->chunk_item->num_stripes>=_Curr_->chunk_item->sub_stripes) _In_ chunk* c,
_In_ UINT64 address, _In_reads_bytes_(length) void* data, _In_ UINT32 length, _In_ write_stripe* stripes,
_In_ PIRP Irp, _In_ UINT64 irp_offset, _In_ write_data_context* wtc) {
UINT64 startoff, endoff;
UINT16 startoffstripe, endoffstripe, stripenum;
UINT64 pos, *stripeoff;
UINT32 i;
BOOL file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
PMDL master_mdl;
PFN_NUMBER* pfns;
get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &startoff, &startoffstripe);
get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, c->chunk_item->num_stripes / c->chunk_item->sub_stripes, &endoff, &endoffstripe);
stripenum = startoffstripe;
startoffstripe *= c->chunk_item->sub_stripes;
endoffstripe *= c->chunk_item->sub_stripes;
if (file_write) {
master_mdl = Irp->MdlAddress;
pfns = (PFN_NUMBER*)(Irp->MdlAddress + 1);
pfns = &pfns[irp_offset >> PAGE_SHIFT];
} else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
if (!wtc->scratch) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(wtc->scratch, data, length);
master_mdl = IoAllocateMdl(wtc->scratch, length, FALSE, FALSE, NULL);
if (!master_mdl) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
MmBuildMdlForNonPagedPool(master_mdl);
wtc->mdl = master_mdl;
pfns = (PFN_NUMBER*)(master_mdl + 1);
} else {
NTSTATUS Status = STATUS_SUCCESS;
master_mdl = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
if (!master_mdl) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
_SEH2_TRY {
MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
Status = _SEH2_GetExceptionCode();
} _SEH2_END;
if (!NT_SUCCESS(Status)) {
ERR("MmProbeAndLockPages threw exception %08x\n", Status);
IoFreeMdl(master_mdl);
return Status;
}
wtc->mdl = master_mdl;
pfns = (PFN_NUMBER*)(master_mdl + 1);
}
for (i = 0; i < c->chunk_item->num_stripes; i += c->chunk_item->sub_stripes) {
UINT16 j;
if (startoffstripe > i)
stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
else if (startoffstripe == i)
stripes[i].start = startoff;
else
stripes[i].start = startoff - (startoff % c->chunk_item->stripe_length);
if (endoffstripe > i)
stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
else if (endoffstripe == i)
stripes[i].end = endoff + 1;
else
stripes[i].end = endoff - (endoff % c->chunk_item->stripe_length);
stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(stripes[i].end - stripes[i].start), FALSE, FALSE, NULL);
if (!stripes[i].mdl) {
ERR("IoAllocateMdl failed\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
for (j = 1; j < c->chunk_item->sub_stripes; j++) {
stripes[i+j].start = stripes[i].start;
stripes[i+j].end = stripes[i].end;
stripes[i+j].data = stripes[i].data;
stripes[i+j].mdl = stripes[i].mdl;
}
}
pos = 0;
stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes, ALLOC_TAG);
if (!stripeoff) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
while (pos < length) {
PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripenum * c->chunk_item->sub_stripes].mdl + 1);
if (pos == 0) {
UINT32 writelen = (UINT32)min(stripes[stripenum * c->chunk_item->sub_stripes].end - stripes[stripenum * c->chunk_item->sub_stripes].start,
c->chunk_item->stripe_length - (stripes[stripenum * c->chunk_item->sub_stripes].start % c->chunk_item->stripe_length));
RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
stripeoff[stripenum] += writelen;
pos += writelen;
} else if (length - pos < c->chunk_item->stripe_length) {
RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)((length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
break;
} else {
RtlCopyMemory(&stripe_pfns[stripeoff[stripenum] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
stripeoff[stripenum] += c->chunk_item->stripe_length;
pos += c->chunk_item->stripe_length;
}
stripenum = (stripenum + 1) % (c->chunk_item->num_stripes / c->chunk_item->sub_stripes);
}
ExFreePool(stripeoff);
return STATUS_SUCCESS;
}
static NTSTATUS add_partial_stripe(device_extension* Vcb, chunk *c, UINT64 address, UINT32 length, void* data) {
NTSTATUS Status;
LIST_ENTRY* le;
partial_stripe* ps;
UINT64 stripe_addr;
UINT16 num_data_stripes;
ULONG bmplen;
num_data_stripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
stripe_addr = address - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
ExAcquireResourceExclusiveLite(&c->partial_stripes_lock, TRUE);
le = c->partial_stripes.Flink;
while (le != &c->partial_stripes) {
ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
if (ps->address == stripe_addr) {
// update existing entry
RtlCopyMemory(ps->data + address - stripe_addr, data, length);
RtlClearBits(&ps->bmp, (ULONG)((address - stripe_addr) / Vcb->superblock.sector_size), length / Vcb->superblock.sector_size);
// if now filled, flush
if (RtlAreBitsClear(&ps->bmp, 0, (ULONG)((num_data_stripes * c->chunk_item->stripe_length) / Vcb->superblock.sector_size))) {
Status = flush_partial_stripe(Vcb, c, ps);
if (!NT_SUCCESS(Status)) {
ERR("flush_partial_stripe returned %08x\n", Status);
goto end;
}
RemoveEntryList(&ps->list_entry);
if (ps->bmparr)
ExFreePool(ps->bmparr);
ExFreePool(ps);
}
Status = STATUS_SUCCESS;
goto end;
} else if (ps->address > stripe_addr)
break;
le = le->Flink;
}
// add new entry
ps = ExAllocatePoolWithTag(NonPagedPool, offsetof(partial_stripe, data[0]) + (ULONG)(num_data_stripes * c->chunk_item->stripe_length), ALLOC_TAG);
if (!ps) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
bmplen = (ULONG)sector_align(((num_data_stripes * c->chunk_item->stripe_length) / (8 * Vcb->superblock.sector_size) + 1), sizeof(ULONG));
ps->address = stripe_addr;
ps->bmparr = ExAllocatePoolWithTag(NonPagedPool, bmplen, ALLOC_TAG);
if (!ps->bmparr) {
ERR("out of memory\n");
ExFreePool(ps);
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
RtlInitializeBitMap(&ps->bmp, ps->bmparr, (ULONG)((num_data_stripes * c->chunk_item->stripe_length) / Vcb->superblock.sector_size));
RtlSetAllBits(&ps->bmp);
RtlCopyMemory(ps->data + address - stripe_addr, data, length);
RtlClearBits(&ps->bmp, (ULONG)((address - stripe_addr) / Vcb->superblock.sector_size), length / Vcb->superblock.sector_size);
InsertHeadList(le->Blink, &ps->list_entry);
Status = STATUS_SUCCESS;
end:
ExReleaseResourceLite(&c->partial_stripes_lock);
return Status;
}
typedef struct {
PMDL mdl;
PFN_NUMBER* pfns;
} log_stripe;
static NTSTATUS prepare_raid5_write(device_extension* Vcb, chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes, PIRP Irp,
UINT64 irp_offset, ULONG priority, write_data_context* wtc) {
UINT64 startoff, endoff, parity_start, parity_end;
UINT16 startoffstripe, endoffstripe, parity, num_data_stripes = c->chunk_item->num_stripes - 1;
UINT64 pos, parity_pos, *stripeoff = NULL;
UINT32 i;
BOOL file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
PMDL master_mdl;
NTSTATUS Status;
PFN_NUMBER *pfns, *parity_pfns;
log_stripe* log_stripes = NULL;
if ((address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
UINT64 delta = (address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length);
delta = min(irp_offset + length, delta);
Status = add_partial_stripe(Vcb, c, address + length - delta, (UINT32)delta, (UINT8*)data + irp_offset + length - delta);
if (!NT_SUCCESS(Status)) {
ERR("add_partial_stripe returned %08x\n", Status);
goto exit;
}
length -= (UINT32)delta;
}
if (length > 0 && (address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
UINT64 delta = (num_data_stripes * c->chunk_item->stripe_length) - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
Status = add_partial_stripe(Vcb, c, address, (UINT32)delta, (UINT8*)data + irp_offset);
if (!NT_SUCCESS(Status)) {
ERR("add_partial_stripe returned %08x\n", Status);
goto exit;
}
address += delta;
length -= (UINT32)delta;
irp_offset += delta;
}
if (length == 0) {
Status = STATUS_SUCCESS;
goto exit;
}
get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, num_data_stripes, &startoff, &startoffstripe);
get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, num_data_stripes, &endoff, &endoffstripe);
pos = 0;
while (pos < length) {
parity = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
if (pos == 0) {
UINT16 stripe = (parity + startoffstripe + 1) % c->chunk_item->num_stripes;
ULONG skip, writelen;
i = startoffstripe;
while (stripe != parity) {
if (i == startoffstripe) {
writelen = (ULONG)min(length, c->chunk_item->stripe_length - (startoff % c->chunk_item->stripe_length));
stripes[stripe].start = startoff;
stripes[stripe].end = startoff + writelen;
pos += writelen;
if (pos == length)
break;
} else {
writelen = (ULONG)min(length - pos, c->chunk_item->stripe_length);
stripes[stripe].start = startoff - (startoff % c->chunk_item->stripe_length);
stripes[stripe].end = stripes[stripe].start + writelen;
pos += writelen;
if (pos == length)
break;
}
i++;
stripe = (stripe + 1) % c->chunk_item->num_stripes;
}
if (pos == length)
break;
for (i = 0; i < startoffstripe; i++) {
stripe = (parity + i + 1) % c->chunk_item->num_stripes;
stripes[stripe].start = stripes[stripe].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
}
stripes[parity].start = stripes[parity].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
if (length - pos > c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length) {
skip = (ULONG)(((length - pos) / (c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length)) - 1);
for (i = 0; i < c->chunk_item->num_stripes; i++) {
stripes[i].end += skip * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
}
pos += skip * num_data_stripes * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
}
} else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
for (i = 0; i < c->chunk_item->num_stripes; i++) {
stripes[i].end += c->chunk_item->stripe_length;
}
pos += c->chunk_item->stripe_length * num_data_stripes;
} else {
UINT16 stripe = (parity + 1) % c->chunk_item->num_stripes;
i = 0;
while (stripe != parity) {
if (endoffstripe == i) {
stripes[stripe].end = endoff + 1;
break;
} else if (endoffstripe > i)
stripes[stripe].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
i++;
stripe = (stripe + 1) % c->chunk_item->num_stripes;
}
break;
}
}
parity_start = 0xffffffffffffffff;
parity_end = 0;
for (i = 0; i < c->chunk_item->num_stripes; i++) {
if (stripes[i].start != 0 || stripes[i].end != 0) {
parity_start = min(stripes[i].start, parity_start);
parity_end = max(stripes[i].end, parity_end);
}
}
if (parity_end == parity_start) {
Status = STATUS_SUCCESS;
goto exit;
}
parity = (((address - c->offset) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
stripes[parity].start = parity_start;
parity = (((address - c->offset + length - 1) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
stripes[parity].end = parity_end;
log_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(log_stripe) * num_data_stripes, ALLOC_TAG);
if (!log_stripes) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
RtlZeroMemory(log_stripes, sizeof(log_stripe) * num_data_stripes);
for (i = 0; i < num_data_stripes; i++) {
log_stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(parity_end - parity_start), FALSE, FALSE, NULL);
if (!log_stripes[i].mdl) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
log_stripes[i].mdl->MdlFlags |= MDL_PARTIAL;
log_stripes[i].pfns = (PFN_NUMBER*)(log_stripes[i].mdl + 1);
}
wtc->parity1 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
if (!wtc->parity1) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
wtc->parity1_mdl = IoAllocateMdl(wtc->parity1, (ULONG)(parity_end - parity_start), FALSE, FALSE, NULL);
if (!wtc->parity1_mdl) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
MmBuildMdlForNonPagedPool(wtc->parity1_mdl);
if (file_write)
master_mdl = Irp->MdlAddress;
else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
if (!wtc->scratch) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
RtlCopyMemory(wtc->scratch, (UINT8*)data + irp_offset, length);
master_mdl = IoAllocateMdl(wtc->scratch, length, FALSE, FALSE, NULL);
if (!master_mdl) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
MmBuildMdlForNonPagedPool(master_mdl);
wtc->mdl = master_mdl;
} else {
master_mdl = IoAllocateMdl((UINT8*)data + irp_offset, length, FALSE, FALSE, NULL);
if (!master_mdl) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
Status = STATUS_SUCCESS;
_SEH2_TRY {
MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
Status = _SEH2_GetExceptionCode();
} _SEH2_END;
if (!NT_SUCCESS(Status)) {
ERR("MmProbeAndLockPages threw exception %08x\n", Status);
IoFreeMdl(master_mdl);
return Status;
}
wtc->mdl = master_mdl;
}
pfns = (PFN_NUMBER*)(master_mdl + 1);
parity_pfns = (PFN_NUMBER*)(wtc->parity1_mdl + 1);
if (file_write)
pfns = &pfns[irp_offset >> PAGE_SHIFT];
for (i = 0; i < c->chunk_item->num_stripes; i++) {
if (stripes[i].start != stripes[i].end) {
stripes[i].mdl = IoAllocateMdl((UINT8*)MmGetMdlVirtualAddress(master_mdl) + irp_offset, (ULONG)(stripes[i].end - stripes[i].start), FALSE, FALSE, NULL);
if (!stripes[i].mdl) {
ERR("IoAllocateMdl failed\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
}
}
stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes, ALLOC_TAG);
if (!stripeoff) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes);
pos = 0;
parity_pos = 0;
while (pos < length) {
PFN_NUMBER* stripe_pfns;
parity = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
if (pos == 0) {
UINT16 stripe = (parity + startoffstripe + 1) % c->chunk_item->num_stripes;
UINT32 writelen = (UINT32)min(length - pos, min(stripes[stripe].end - stripes[stripe].start,
c->chunk_item->stripe_length - (stripes[stripe].start % c->chunk_item->stripe_length)));
UINT32 maxwritelen = writelen;
stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
RtlCopyMemory(log_stripes[startoffstripe].pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
log_stripes[startoffstripe].pfns += writelen >> PAGE_SHIFT;
stripeoff[stripe] = writelen;
pos += writelen;
stripe = (stripe + 1) % c->chunk_item->num_stripes;
i = startoffstripe + 1;
while (stripe != parity) {
stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
writelen = (UINT32)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
if (writelen == 0)
break;
if (writelen > maxwritelen)
maxwritelen = writelen;
RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
log_stripes[i].pfns += writelen >> PAGE_SHIFT;
stripeoff[stripe] = writelen;
pos += writelen;
stripe = (stripe + 1) % c->chunk_item->num_stripes;
i++;
}
stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
RtlCopyMemory(stripe_pfns, parity_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
stripeoff[parity] = maxwritelen;
parity_pos = maxwritelen;
} else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
UINT16 stripe = (parity + 1) % c->chunk_item->num_stripes;
i = 0;
while (stripe != parity) {
stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
log_stripes[i].pfns += c->chunk_item->stripe_length >> PAGE_SHIFT;
stripeoff[stripe] += c->chunk_item->stripe_length;
pos += c->chunk_item->stripe_length;
stripe = (stripe + 1) % c->chunk_item->num_stripes;
i++;
}
stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
RtlCopyMemory(&stripe_pfns[stripeoff[parity] >> PAGE_SHIFT], &parity_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
stripeoff[parity] += c->chunk_item->stripe_length;
parity_pos += c->chunk_item->stripe_length;
} else {
UINT16 stripe = (parity + 1) % c->chunk_item->num_stripes;
UINT32 writelen, maxwritelen = 0;
i = 0;
while (pos < length) {
stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
writelen = (UINT32)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
if (writelen == 0)
break;
if (writelen > maxwritelen)
maxwritelen = writelen;
RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
log_stripes[i].pfns += writelen >> PAGE_SHIFT;
stripeoff[stripe] += writelen;
pos += writelen;
stripe = (stripe + 1) % c->chunk_item->num_stripes;
i++;
}
stripe_pfns = (PFN_NUMBER*)(stripes[parity].mdl + 1);
RtlCopyMemory(&stripe_pfns[stripeoff[parity] >> PAGE_SHIFT], &parity_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
}
}
for (i = 0; i < num_data_stripes; i++) {
UINT8* ss = MmGetSystemAddressForMdlSafe(log_stripes[i].mdl, priority);
if (i == 0)
RtlCopyMemory(wtc->parity1, ss, (UINT32)(parity_end - parity_start));
else
do_xor(wtc->parity1, ss, (UINT32)(parity_end - parity_start));
}
Status = STATUS_SUCCESS;
exit:
if (log_stripes) {
for (i = 0; i < num_data_stripes; i++) {
if (log_stripes[i].mdl)
IoFreeMdl(log_stripes[i].mdl);
}
ExFreePool(log_stripes);
}
if (stripeoff)
ExFreePool(stripeoff);
return Status;
}
static NTSTATUS prepare_raid6_write(device_extension* Vcb, chunk* c, UINT64 address, void* data, UINT32 length, write_stripe* stripes, PIRP Irp,
UINT64 irp_offset, ULONG priority, write_data_context* wtc) {
UINT64 startoff, endoff, parity_start, parity_end;
UINT16 startoffstripe, endoffstripe, parity1, num_data_stripes = c->chunk_item->num_stripes - 2;
UINT64 pos, parity_pos, *stripeoff = NULL;
UINT32 i;
BOOL file_write = Irp && Irp->MdlAddress && (Irp->MdlAddress->ByteOffset == 0);
PMDL master_mdl;
NTSTATUS Status;
PFN_NUMBER *pfns, *parity1_pfns, *parity2_pfns;
log_stripe* log_stripes = NULL;
if ((address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
UINT64 delta = (address + length - c->offset) % (num_data_stripes * c->chunk_item->stripe_length);
delta = min(irp_offset + length, delta);
Status = add_partial_stripe(Vcb, c, address + length - delta, (UINT32)delta, (UINT8*)data + irp_offset + length - delta);
if (!NT_SUCCESS(Status)) {
ERR("add_partial_stripe returned %08x\n", Status);
goto exit;
}
length -= (UINT32)delta;
}
if (length > 0 && (address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length) > 0) {
UINT64 delta = (num_data_stripes * c->chunk_item->stripe_length) - ((address - c->offset) % (num_data_stripes * c->chunk_item->stripe_length));
Status = add_partial_stripe(Vcb, c, address, (UINT32)delta, (UINT8*)data + irp_offset);
if (!NT_SUCCESS(Status)) {
ERR("add_partial_stripe returned %08x\n", Status);
goto exit;
}
address += delta;
length -= (UINT32)delta;
irp_offset += delta;
}
if (length == 0) {
Status = STATUS_SUCCESS;
goto exit;
}
get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, num_data_stripes, &startoff, &startoffstripe);
get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, num_data_stripes, &endoff, &endoffstripe);
pos = 0;
while (pos < length) {
parity1 = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
if (pos == 0) {
UINT16 stripe = (parity1 + startoffstripe + 2) % c->chunk_item->num_stripes;
UINT16 parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
ULONG skip, writelen;
i = startoffstripe;
while (stripe != parity1) {
if (i == startoffstripe) {
writelen = (ULONG)min(length, c->chunk_item->stripe_length - (startoff % c->chunk_item->stripe_length));
stripes[stripe].start = startoff;
stripes[stripe].end = startoff + writelen;
pos += writelen;
if (pos == length)
break;
} else {
writelen = (ULONG)min(length - pos, c->chunk_item->stripe_length);
stripes[stripe].start = startoff - (startoff % c->chunk_item->stripe_length);
stripes[stripe].end = stripes[stripe].start + writelen;
pos += writelen;
if (pos == length)
break;
}
i++;
stripe = (stripe + 1) % c->chunk_item->num_stripes;
}
if (pos == length)
break;
for (i = 0; i < startoffstripe; i++) {
stripe = (parity1 + i + 2) % c->chunk_item->num_stripes;
stripes[stripe].start = stripes[stripe].end = startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
}
stripes[parity1].start = stripes[parity1].end = stripes[parity2].start = stripes[parity2].end =
startoff - (startoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
if (length - pos > c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length) {
skip = (ULONG)(((length - pos) / (c->chunk_item->num_stripes * num_data_stripes * c->chunk_item->stripe_length)) - 1);
for (i = 0; i < c->chunk_item->num_stripes; i++) {
stripes[i].end += skip * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
}
pos += skip * num_data_stripes * c->chunk_item->num_stripes * c->chunk_item->stripe_length;
}
} else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
for (i = 0; i < c->chunk_item->num_stripes; i++) {
stripes[i].end += c->chunk_item->stripe_length;
}
pos += c->chunk_item->stripe_length * num_data_stripes;
} else {
UINT16 stripe = (parity1 + 2) % c->chunk_item->num_stripes;
i = 0;
while (stripe != parity1) {
if (endoffstripe == i) {
stripes[stripe].end = endoff + 1;
break;
} else if (endoffstripe > i)
stripes[stripe].end = endoff - (endoff % c->chunk_item->stripe_length) + c->chunk_item->stripe_length;
i++;
stripe = (stripe + 1) % c->chunk_item->num_stripes;
}
break;
}
}
parity_start = 0xffffffffffffffff;
parity_end = 0;
for (i = 0; i < c->chunk_item->num_stripes; i++) {
if (stripes[i].start != 0 || stripes[i].end != 0) {
parity_start = min(stripes[i].start, parity_start);
parity_end = max(stripes[i].end, parity_end);
}
}
if (parity_end == parity_start) {
Status = STATUS_SUCCESS;
goto exit;
}
parity1 = (((address - c->offset) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
stripes[parity1].start = stripes[(parity1 + 1) % c->chunk_item->num_stripes].start = parity_start;
parity1 = (((address - c->offset + length - 1) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
stripes[parity1].end = stripes[(parity1 + 1) % c->chunk_item->num_stripes].end = parity_end;
log_stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(log_stripe) * num_data_stripes, ALLOC_TAG);
if (!log_stripes) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
RtlZeroMemory(log_stripes, sizeof(log_stripe) * num_data_stripes);
for (i = 0; i < num_data_stripes; i++) {
log_stripes[i].mdl = IoAllocateMdl(NULL, (ULONG)(parity_end - parity_start), FALSE, FALSE, NULL);
if (!log_stripes[i].mdl) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
log_stripes[i].mdl->MdlFlags |= MDL_PARTIAL;
log_stripes[i].pfns = (PFN_NUMBER*)(log_stripes[i].mdl + 1);
}
wtc->parity1 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
if (!wtc->parity1) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
wtc->parity2 = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(parity_end - parity_start), ALLOC_TAG);
if (!wtc->parity2) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
wtc->parity1_mdl = IoAllocateMdl(wtc->parity1, (ULONG)(parity_end - parity_start), FALSE, FALSE, NULL);
if (!wtc->parity1_mdl) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
MmBuildMdlForNonPagedPool(wtc->parity1_mdl);
wtc->parity2_mdl = IoAllocateMdl(wtc->parity2, (ULONG)(parity_end - parity_start), FALSE, FALSE, NULL);
if (!wtc->parity2_mdl) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
MmBuildMdlForNonPagedPool(wtc->parity2_mdl);
if (file_write)
master_mdl = Irp->MdlAddress;
else if (((ULONG_PTR)data % PAGE_SIZE) != 0) {
wtc->scratch = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
if (!wtc->scratch) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
RtlCopyMemory(wtc->scratch, (UINT8*)data + irp_offset, length);
master_mdl = IoAllocateMdl(wtc->scratch, length, FALSE, FALSE, NULL);
if (!master_mdl) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
MmBuildMdlForNonPagedPool(master_mdl);
wtc->mdl = master_mdl;
} else {
master_mdl = IoAllocateMdl((UINT8*)data + irp_offset, length, FALSE, FALSE, NULL);
if (!master_mdl) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
Status = STATUS_SUCCESS;
_SEH2_TRY {
MmProbeAndLockPages(master_mdl, KernelMode, IoReadAccess);
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
Status = _SEH2_GetExceptionCode();
} _SEH2_END;
if (!NT_SUCCESS(Status)) {
ERR("MmProbeAndLockPages threw exception %08x\n", Status);
IoFreeMdl(master_mdl);
goto exit;
}
wtc->mdl = master_mdl;
}
pfns = (PFN_NUMBER*)(master_mdl + 1);
parity1_pfns = (PFN_NUMBER*)(wtc->parity1_mdl + 1);
parity2_pfns = (PFN_NUMBER*)(wtc->parity2_mdl + 1);
if (file_write)
pfns = &pfns[irp_offset >> PAGE_SHIFT];
for (i = 0; i < c->chunk_item->num_stripes; i++) {
if (stripes[i].start != stripes[i].end) {
stripes[i].mdl = IoAllocateMdl((UINT8*)MmGetMdlVirtualAddress(master_mdl) + irp_offset, (ULONG)(stripes[i].end - stripes[i].start), FALSE, FALSE, NULL);
if (!stripes[i].mdl) {
ERR("IoAllocateMdl failed\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
}
}
stripeoff = ExAllocatePoolWithTag(PagedPool, sizeof(UINT64) * c->chunk_item->num_stripes, ALLOC_TAG);
if (!stripeoff) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
RtlZeroMemory(stripeoff, sizeof(UINT64) * c->chunk_item->num_stripes);
pos = 0;
parity_pos = 0;
while (pos < length) {
PFN_NUMBER* stripe_pfns;
parity1 = (((address - c->offset + pos) / (num_data_stripes * c->chunk_item->stripe_length)) + num_data_stripes) % c->chunk_item->num_stripes;
if (pos == 0) {
UINT16 stripe = (parity1 + startoffstripe + 2) % c->chunk_item->num_stripes, parity2;
UINT32 writelen = (UINT32)min(length - pos, min(stripes[stripe].end - stripes[stripe].start,
c->chunk_item->stripe_length - (stripes[stripe].start % c->chunk_item->stripe_length)));
UINT32 maxwritelen = writelen;
stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
RtlCopyMemory(stripe_pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
RtlCopyMemory(log_stripes[startoffstripe].pfns, pfns, writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
log_stripes[startoffstripe].pfns += writelen >> PAGE_SHIFT;
stripeoff[stripe] = writelen;
pos += writelen;
stripe = (stripe + 1) % c->chunk_item->num_stripes;
i = startoffstripe + 1;
while (stripe != parity1) {
stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
writelen = (UINT32)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
if (writelen == 0)
break;
if (writelen > maxwritelen)
maxwritelen = writelen;
RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
log_stripes[i].pfns += writelen >> PAGE_SHIFT;
stripeoff[stripe] = writelen;
pos += writelen;
stripe = (stripe + 1) % c->chunk_item->num_stripes;
i++;
}
stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
RtlCopyMemory(stripe_pfns, parity1_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
stripeoff[parity1] = maxwritelen;
parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
RtlCopyMemory(stripe_pfns, parity2_pfns, maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
stripeoff[parity2] = maxwritelen;
parity_pos = maxwritelen;
} else if (length - pos >= c->chunk_item->stripe_length * num_data_stripes) {
UINT16 stripe = (parity1 + 2) % c->chunk_item->num_stripes, parity2;
i = 0;
while (stripe != parity1) {
stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
log_stripes[i].pfns += c->chunk_item->stripe_length >> PAGE_SHIFT;
stripeoff[stripe] += c->chunk_item->stripe_length;
pos += c->chunk_item->stripe_length;
stripe = (stripe + 1) % c->chunk_item->num_stripes;
i++;
}
stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
RtlCopyMemory(&stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT], &parity1_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
stripeoff[parity1] += c->chunk_item->stripe_length;
parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
RtlCopyMemory(&stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT], &parity2_pfns[parity_pos >> PAGE_SHIFT], (ULONG)(c->chunk_item->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
stripeoff[parity2] += c->chunk_item->stripe_length;
parity_pos += c->chunk_item->stripe_length;
} else {
UINT16 stripe = (parity1 + 2) % c->chunk_item->num_stripes, parity2;
UINT32 writelen, maxwritelen = 0;
i = 0;
while (pos < length) {
stripe_pfns = (PFN_NUMBER*)(stripes[stripe].mdl + 1);
writelen = (UINT32)min(length - pos, min(stripes[stripe].end - stripes[stripe].start, c->chunk_item->stripe_length));
if (writelen == 0)
break;
if (writelen > maxwritelen)
maxwritelen = writelen;
RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
RtlCopyMemory(log_stripes[i].pfns, &pfns[pos >> PAGE_SHIFT], writelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
log_stripes[i].pfns += writelen >> PAGE_SHIFT;
stripeoff[stripe] += writelen;
pos += writelen;
stripe = (stripe + 1) % c->chunk_item->num_stripes;
i++;
}
stripe_pfns = (PFN_NUMBER*)(stripes[parity1].mdl + 1);
RtlCopyMemory(&stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT], &parity1_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
parity2 = (parity1 + 1) % c->chunk_item->num_stripes;
stripe_pfns = (PFN_NUMBER*)(stripes[parity2].mdl + 1);
RtlCopyMemory(&stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT], &parity2_pfns[parity_pos >> PAGE_SHIFT], maxwritelen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
}
}
for (i = 0; i < num_data_stripes; i++) {
UINT8* ss = MmGetSystemAddressForMdlSafe(log_stripes[c->chunk_item->num_stripes - 3 - i].mdl, priority);
if (i == 0) {
RtlCopyMemory(wtc->parity1, ss, (ULONG)(parity_end - parity_start));
RtlCopyMemory(wtc->parity2, ss, (ULONG)(parity_end - parity_start));
} else {
do_xor(wtc->parity1, ss, (UINT32)(parity_end - parity_start));
galois_double(wtc->parity2, (UINT32)(parity_end - parity_start));
do_xor(wtc->parity2, ss, (UINT32)(parity_end - parity_start));
}
}
Status = STATUS_SUCCESS;
exit:
if (log_stripes) {
for (i = 0; i < num_data_stripes; i++) {
if (log_stripes[i].mdl)
IoFreeMdl(log_stripes[i].mdl);
}
ExFreePool(log_stripes);
}
if (stripeoff)
ExFreePool(stripeoff);
return Status;
}
NTSTATUS write_data(_In_ device_extension* Vcb, _In_ UINT64 address, _In_reads_bytes_(length) void* data, _In_ UINT32 length, _In_ write_data_context* wtc,
_In_opt_ PIRP Irp, _In_opt_ chunk* c, _In_ BOOL file_write, _In_ UINT64 irp_offset, _In_ ULONG priority) {
NTSTATUS Status;
UINT32 i;
CHUNK_ITEM_STRIPE* cis;
write_stripe* stripes = NULL;
UINT64 total_writing = 0;
ULONG allowed_missing, missing;
TRACE("(%p, %llx, %p, %x)\n", Vcb, address, data, length);
if (!c) {
c = get_chunk_from_address(Vcb, address);
if (!c) {
ERR("could not get chunk for address %llx\n", address);
return STATUS_INTERNAL_ERROR;
}
}
stripes = ExAllocatePoolWithTag(PagedPool, sizeof(write_stripe) * c->chunk_item->num_stripes, ALLOC_TAG);
if (!stripes) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlZeroMemory(stripes, sizeof(write_stripe) * c->chunk_item->num_stripes);
cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
Status = prepare_raid0_write(c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, wtc);
if (!NT_SUCCESS(Status)) {
ERR("prepare_raid0_write returned %08x\n", Status);
goto prepare_failed;
}
allowed_missing = 0;
} else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
Status = prepare_raid10_write(c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, wtc);
if (!NT_SUCCESS(Status)) {
ERR("prepare_raid10_write returned %08x\n", Status);
goto prepare_failed;
}
allowed_missing = 1;
} else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
Status = prepare_raid5_write(Vcb, c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, priority, wtc);
if (!NT_SUCCESS(Status)) {
ERR("prepare_raid5_write returned %08x\n", Status);
goto prepare_failed;
}
allowed_missing = 1;
} else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
Status = prepare_raid6_write(Vcb, c, address, data, length, stripes, file_write ? Irp : NULL, irp_offset, priority, wtc);
if (!NT_SUCCESS(Status)) {
ERR("prepare_raid6_write returned %08x\n", Status);
goto prepare_failed;
}
allowed_missing = 2;
} else { // write same data to every location - SINGLE, DUP, RAID1
for (i = 0; i < c->chunk_item->num_stripes; i++) {
stripes[i].start = address - c->offset;
stripes[i].end = stripes[i].start + length;
stripes[i].data = data;
stripes[i].irp_offset = irp_offset;
if (c->devices[i]->devobj) {
if (file_write) {
UINT8* va;
ULONG writelen = (ULONG)(stripes[i].end - stripes[i].start);
va = (UINT8*)MmGetMdlVirtualAddress(Irp->MdlAddress) + stripes[i].irp_offset;
stripes[i].mdl = IoAllocateMdl(va, writelen, FALSE, FALSE, NULL);
if (!stripes[i].mdl) {
ERR("IoAllocateMdl failed\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto prepare_failed;
}
IoBuildPartialMdl(Irp->MdlAddress, stripes[i].mdl, va, writelen);
} else {
stripes[i].mdl = IoAllocateMdl(stripes[i].data, (ULONG)(stripes[i].end - stripes[i].start), FALSE, FALSE, NULL);
if (!stripes[i].mdl) {
ERR("IoAllocateMdl failed\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto prepare_failed;
}
Status = STATUS_SUCCESS;
_SEH2_TRY {
MmProbeAndLockPages(stripes[i].mdl, KernelMode, IoReadAccess);
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
Status = _SEH2_GetExceptionCode();
} _SEH2_END;
if (!NT_SUCCESS(Status)) {
ERR("MmProbeAndLockPages threw exception %08x\n", Status);
IoFreeMdl(stripes[i].mdl);
stripes[i].mdl = NULL;
goto prepare_failed;
}
}
}
}
allowed_missing = c->chunk_item->num_stripes - 1;
}
missing = 0;
for (i = 0; i < c->chunk_item->num_stripes; i++) {
if (!c->devices[i]->devobj)
missing++;
}
if (missing > allowed_missing) {
ERR("cannot write as %u missing devices (maximum %u)\n", missing, allowed_missing);
Status = STATUS_DEVICE_NOT_READY;
goto prepare_failed;
}
for (i = 0; i < c->chunk_item->num_stripes; i++) {
write_data_stripe* stripe;
PIO_STACK_LOCATION IrpSp;
stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_data_stripe), ALLOC_TAG);
if (!stripe) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
if (stripes[i].start == stripes[i].end || !c->devices[i]->devobj) {
stripe->status = WriteDataStatus_Ignore;
stripe->Irp = NULL;
stripe->buf = stripes[i].data;
stripe->mdl = NULL;
} else {
stripe->context = (struct _write_data_context*)wtc;
stripe->buf = stripes[i].data;
stripe->device = c->devices[i];
RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
stripe->status = WriteDataStatus_Pending;
stripe->mdl = stripes[i].mdl;
if (!Irp) {
stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, FALSE);
if (!stripe->Irp) {
ERR("IoAllocateIrp failed\n");
ExFreePool(stripe);
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
} else {
stripe->Irp = IoMakeAssociatedIrp(Irp, stripe->device->devobj->StackSize);
if (!stripe->Irp) {
ERR("IoMakeAssociatedIrp failed\n");
ExFreePool(stripe);
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
}
IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
IrpSp->MajorFunction = IRP_MJ_WRITE;
if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
stripe->Irp->AssociatedIrp.SystemBuffer = MmGetSystemAddressForMdlSafe(stripes[i].mdl, priority);
stripe->Irp->Flags = IRP_BUFFERED_IO;
} else if (stripe->device->devobj->Flags & DO_DIRECT_IO)
stripe->Irp->MdlAddress = stripe->mdl;
else
stripe->Irp->UserBuffer = MmGetSystemAddressForMdlSafe(stripes[i].mdl, priority);
#ifdef DEBUG_PARANOID
if (stripes[i].end < stripes[i].start) {
ERR("trying to write stripe with negative length (%llx < %llx)\n", stripes[i].end, stripes[i].start);
int3;
}
#endif
IrpSp->Parameters.Write.Length = (ULONG)(stripes[i].end - stripes[i].start);
IrpSp->Parameters.Write.ByteOffset.QuadPart = stripes[i].start + cis[i].offset;
total_writing += IrpSp->Parameters.Write.Length;
stripe->Irp->UserIosb = &stripe->iosb;
wtc->stripes_left++;
IoSetCompletionRoutine(stripe->Irp, write_data_completion, stripe, TRUE, TRUE, TRUE);
}
InsertTailList(&wtc->stripes, &stripe->list_entry);
}
if (diskacc)
fFsRtlUpdateDiskCounters(0, total_writing);
Status = STATUS_SUCCESS;
end:
if (stripes) ExFreePool(stripes);
if (!NT_SUCCESS(Status))
free_write_data_stripes(wtc);
return Status;
prepare_failed:
for (i = 0; i < c->chunk_item->num_stripes; i++) {
if (stripes[i].mdl && (i == 0 || stripes[i].mdl != stripes[i-1].mdl)) {
if (stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
MmUnlockPages(stripes[i].mdl);
IoFreeMdl(stripes[i].mdl);
}
}
if (wtc->parity1_mdl) {
if (wtc->parity1_mdl->MdlFlags & MDL_PAGES_LOCKED)
MmUnlockPages(wtc->parity1_mdl);
IoFreeMdl(wtc->parity1_mdl);
wtc->parity1_mdl = NULL;
}
if (wtc->parity2_mdl) {
if (wtc->parity2_mdl->MdlFlags & MDL_PAGES_LOCKED)
MmUnlockPages(wtc->parity2_mdl);
IoFreeMdl(wtc->parity2_mdl);
wtc->parity2_mdl = NULL;
}
if (wtc->mdl) {
if (wtc->mdl->MdlFlags & MDL_PAGES_LOCKED)
MmUnlockPages(wtc->mdl);
IoFreeMdl(wtc->mdl);
wtc->mdl = NULL;
}
if (wtc->parity1) {
ExFreePool(wtc->parity1);
wtc->parity1 = NULL;
}
if (wtc->parity2) {
ExFreePool(wtc->parity2);
wtc->parity2 = NULL;
}
if (wtc->scratch) {
ExFreePool(wtc->scratch);
wtc->scratch = NULL;
}
ExFreePool(stripes);
return Status;
}
void get_raid56_lock_range(chunk* c, UINT64 address, UINT64 length, UINT64* lockaddr, UINT64* locklen) {
UINT64 startoff, endoff;
UINT16 startoffstripe, endoffstripe, datastripes;
datastripes = c->chunk_item->num_stripes - (c->chunk_item->type & BLOCK_FLAG_RAID5 ? 1 : 2);
get_raid0_offset(address - c->offset, c->chunk_item->stripe_length, datastripes, &startoff, &startoffstripe);
get_raid0_offset(address + length - c->offset - 1, c->chunk_item->stripe_length, datastripes, &endoff, &endoffstripe);
startoff -= startoff % c->chunk_item->stripe_length;
endoff = sector_align(endoff, c->chunk_item->stripe_length);
*lockaddr = c->offset + (startoff * datastripes);
*locklen = (endoff - startoff) * datastripes;
}
NTSTATUS write_data_complete(device_extension* Vcb, UINT64 address, void* data, UINT32 length, PIRP Irp, chunk* c, BOOL file_write, UINT64 irp_offset, ULONG priority) {
write_data_context wtc;
NTSTATUS Status;
UINT64 lockaddr, locklen;
KeInitializeEvent(&wtc.Event, NotificationEvent, FALSE);
InitializeListHead(&wtc.stripes);
wtc.stripes_left = 0;
wtc.parity1 = wtc.parity2 = wtc.scratch = NULL;
wtc.mdl = wtc.parity1_mdl = wtc.parity2_mdl = NULL;
if (!c) {
c = get_chunk_from_address(Vcb, address);
if (!c) {
ERR("could not get chunk for address %llx\n", address);
return STATUS_INTERNAL_ERROR;
}
}
if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6) {
get_raid56_lock_range(c, address, length, &lockaddr, &locklen);
chunk_lock_range(Vcb, c, lockaddr, locklen);
}
_SEH2_TRY {
Status = write_data(Vcb, address, data, length, &wtc, Irp, c, file_write, irp_offset, priority);
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
Status = _SEH2_GetExceptionCode();
} _SEH2_END;
if (!NT_SUCCESS(Status)) {
ERR("write_data returned %08x\n", Status);
if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
chunk_unlock_range(Vcb, c, lockaddr, locklen);
free_write_data_stripes(&wtc);
return Status;
}
if (wtc.stripes.Flink != &wtc.stripes) {
// launch writes and wait
LIST_ENTRY* le = wtc.stripes.Flink;
BOOL no_wait = TRUE;
while (le != &wtc.stripes) {
write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
if (stripe->status != WriteDataStatus_Ignore) {
IoCallDriver(stripe->device->devobj, stripe->Irp);
no_wait = FALSE;
}
le = le->Flink;
}
if (!no_wait)
KeWaitForSingleObject(&wtc.Event, Executive, KernelMode, FALSE, NULL);
le = wtc.stripes.Flink;
while (le != &wtc.stripes) {
write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
if (stripe->status != WriteDataStatus_Ignore && !NT_SUCCESS(stripe->iosb.Status)) {
Status = stripe->iosb.Status;
log_device_error(Vcb, stripe->device, BTRFS_DEV_STAT_WRITE_ERRORS);
break;
}
le = le->Flink;
}
free_write_data_stripes(&wtc);
}
if (c->chunk_item->type & BLOCK_FLAG_RAID5 || c->chunk_item->type & BLOCK_FLAG_RAID6)
chunk_unlock_range(Vcb, c, lockaddr, locklen);
return Status;
}
_Function_class_(IO_COMPLETION_ROUTINE)
#ifdef __REACTOS__
static NTSTATUS NTAPI write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
#else
static NTSTATUS write_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
#endif
write_data_stripe* stripe = conptr;
write_data_context* context = (write_data_context*)stripe->context;
LIST_ENTRY* le;
UNUSED(DeviceObject);
// FIXME - we need a lock here
if (stripe->status == WriteDataStatus_Cancelling) {
stripe->status = WriteDataStatus_Cancelled;
goto end;
}
stripe->iosb = Irp->IoStatus;
if (NT_SUCCESS(Irp->IoStatus.Status)) {
stripe->status = WriteDataStatus_Success;
} else {
le = context->stripes.Flink;
stripe->status = WriteDataStatus_Error;
while (le != &context->stripes) {
write_data_stripe* s2 = CONTAINING_RECORD(le, write_data_stripe, list_entry);
if (s2->status == WriteDataStatus_Pending) {
s2->status = WriteDataStatus_Cancelling;
IoCancelIrp(s2->Irp);
}
le = le->Flink;
}
}
end:
if (InterlockedDecrement(&context->stripes_left) == 0)
KeSetEvent(&context->Event, 0, FALSE);
return STATUS_MORE_PROCESSING_REQUIRED;
}
void free_write_data_stripes(write_data_context* wtc) {
LIST_ENTRY* le;
PMDL last_mdl = NULL;
if (wtc->parity1_mdl) {
if (wtc->parity1_mdl->MdlFlags & MDL_PAGES_LOCKED)
MmUnlockPages(wtc->parity1_mdl);
IoFreeMdl(wtc->parity1_mdl);
}
if (wtc->parity2_mdl) {
if (wtc->parity2_mdl->MdlFlags & MDL_PAGES_LOCKED)
MmUnlockPages(wtc->parity2_mdl);
IoFreeMdl(wtc->parity2_mdl);
}
if (wtc->mdl) {
if (wtc->mdl->MdlFlags & MDL_PAGES_LOCKED)
MmUnlockPages(wtc->mdl);
IoFreeMdl(wtc->mdl);
}
if (wtc->parity1)
ExFreePool(wtc->parity1);
if (wtc->parity2)
ExFreePool(wtc->parity2);
if (wtc->scratch)
ExFreePool(wtc->scratch);
le = wtc->stripes.Flink;
while (le != &wtc->stripes) {
write_data_stripe* stripe = CONTAINING_RECORD(le, write_data_stripe, list_entry);
if (stripe->mdl && stripe->mdl != last_mdl) {
if (stripe->mdl->MdlFlags & MDL_PAGES_LOCKED)
MmUnlockPages(stripe->mdl);
IoFreeMdl(stripe->mdl);
}
last_mdl = stripe->mdl;
if (stripe->Irp)
IoFreeIrp(stripe->Irp);
le = le->Flink;
}
while (!IsListEmpty(&wtc->stripes)) {
write_data_stripe* stripe = CONTAINING_RECORD(RemoveHeadList(&wtc->stripes), write_data_stripe, list_entry);
ExFreePool(stripe);
}
}
void add_extent(_In_ fcb* fcb, _In_ LIST_ENTRY* prevextle, _In_ __drv_aliasesMem extent* newext) {
LIST_ENTRY* le = prevextle->Flink;
while (le != &fcb->extents) {
extent* ext = CONTAINING_RECORD(le, extent, list_entry);
if (ext->offset >= newext->offset) {
InsertHeadList(ext->list_entry.Blink, &newext->list_entry);
return;
}
le = le->Flink;
}
InsertTailList(&fcb->extents, &newext->list_entry);
}
NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, PIRP Irp, LIST_ENTRY* rollback) {
NTSTATUS Status;
LIST_ENTRY* le;
le = fcb->extents.Flink;
while (le != &fcb->extents) {
LIST_ENTRY* le2 = le->Flink;
extent* ext = CONTAINING_RECORD(le, extent, list_entry);
EXTENT_DATA* ed = &ext->extent_data;
EXTENT_DATA2* ed2 = NULL;
UINT64 len;
if (!ext->ignore) {
if (ed->type != EXTENT_TYPE_INLINE)
ed2 = (EXTENT_DATA2*)ed->data;
len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
if (ext->offset < end_data && ext->offset + len > start_data) {
if (ed->type == EXTENT_TYPE_INLINE) {
if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
remove_fcb_extent(fcb, ext, rollback);
fcb->inode_item.st_blocks -= len;
fcb->inode_item_changed = TRUE;
} else {
ERR("trying to split inline extent\n");
#ifdef DEBUG_PARANOID
int3;
#endif
return STATUS_INTERNAL_ERROR;
}
} else if (ed->type != EXTENT_TYPE_INLINE) {
if (start_data <= ext->offset && end_data >= ext->offset + len) { // remove all
if (ed2->size != 0) {
chunk* c;
fcb->inode_item.st_blocks -= len;
fcb->inode_item_changed = TRUE;
c = get_chunk_from_address(Vcb, ed2->address);
if (!c) {
ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
} else {
Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, -1,
fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
ERR("update_changed_extent_ref returned %08x\n", Status);
goto end;
}
}
}
remove_fcb_extent(fcb, ext, rollback);
} else if (start_data <= ext->offset && end_data < ext->offset + len) { // remove beginning
EXTENT_DATA2* ned2;
extent* newext;
if (ed2->size != 0) {
fcb->inode_item.st_blocks -= end_data - ext->offset;
fcb->inode_item_changed = TRUE;
}
newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
if (!newext) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
ned2 = (EXTENT_DATA2*)newext->extent_data.data;
newext->extent_data.generation = Vcb->superblock.generation;
newext->extent_data.decoded_size = ed->decoded_size;
newext->extent_data.compression = ed->compression;
newext->extent_data.encryption = ed->encryption;
newext->extent_data.encoding = ed->encoding;
newext->extent_data.type = ed->type;
ned2->address = ed2->address;
ned2->size = ed2->size;
ned2->offset = ed2->offset + (end_data - ext->offset);
ned2->num_bytes = ed2->num_bytes - (end_data - ext->offset);
newext->offset = end_data;
newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
newext->unique = ext->unique;
newext->ignore = FALSE;
newext->inserted = TRUE;
if (ext->csum) {
if (ed->compression == BTRFS_COMPRESSION_NONE) {
newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ned2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG);
if (!newext->csum) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
ExFreePool(newext);
goto end;
}
RtlCopyMemory(newext->csum, &ext->csum[(end_data - ext->offset) / Vcb->superblock.sector_size],
(ULONG)(ned2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size));
} else {
newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG);
if (!newext->csum) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
ExFreePool(newext);
goto end;
}
RtlCopyMemory(newext->csum, ext->csum, (ULONG)(ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size));
}
} else
newext->csum = NULL;
add_extent(fcb, &ext->list_entry, newext);
remove_fcb_extent(fcb, ext, rollback);
} else if (start_data > ext->offset && end_data >= ext->offset + len) { // remove end
EXTENT_DATA2* ned2;
extent* newext;
if (ed2->size != 0) {
fcb->inode_item.st_blocks -= ext->offset + len - start_data;
fcb->inode_item_changed = TRUE;
}
newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
if (!newext) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
ned2 = (EXTENT_DATA2*)newext->extent_data.data;
newext->extent_data.generation = Vcb->superblock.generation;
newext->extent_data.decoded_size = ed->decoded_size;
newext->extent_data.compression = ed->compression;
newext->extent_data.encryption = ed->encryption;
newext->extent_data.encoding = ed->encoding;
newext->extent_data.type = ed->type;
ned2->address = ed2->address;
ned2->size = ed2->size;
ned2->offset = ed2->offset;
ned2->num_bytes = start_data - ext->offset;
newext->offset = ext->offset;
newext->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
newext->unique = ext->unique;
newext->ignore = FALSE;
newext->inserted = TRUE;
if (ext->csum) {
if (ed->compression == BTRFS_COMPRESSION_NONE) {
newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ned2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG);
if (!newext->csum) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
ExFreePool(newext);
goto end;
}
RtlCopyMemory(newext->csum, ext->csum, (ULONG)(ned2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size));
} else {
newext->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG);
if (!newext->csum) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
ExFreePool(newext);
goto end;
}
RtlCopyMemory(newext->csum, ext->csum, (ULONG)(ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size));
}
} else
newext->csum = NULL;
InsertHeadList(&ext->list_entry, &newext->list_entry);
remove_fcb_extent(fcb, ext, rollback);
} else if (start_data > ext->offset && end_data < ext->offset + len) { // remove middle
EXTENT_DATA2 *neda2, *nedb2;
extent *newext1, *newext2;
if (ed2->size != 0) {
chunk* c;
fcb->inode_item.st_blocks -= end_data - start_data;
fcb->inode_item_changed = TRUE;
c = get_chunk_from_address(Vcb, ed2->address);
if (!c) {
ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
} else {
Status = update_changed_extent_ref(Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
ERR("update_changed_extent_ref returned %08x\n", Status);
goto end;
}
}
}
newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
if (!newext1) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
if (!newext2) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
ExFreePool(newext1);
goto end;
}
neda2 = (EXTENT_DATA2*)newext1->extent_data.data;
newext1->extent_data.generation = Vcb->superblock.generation;
newext1->extent_data.decoded_size = ed->decoded_size;
newext1->extent_data.compression = ed->compression;
newext1->extent_data.encryption = ed->encryption;
newext1->extent_data.encoding = ed->encoding;
newext1->extent_data.type = ed->type;
neda2->address = ed2->address;
neda2->size = ed2->size;
neda2->offset = ed2->offset;
neda2->num_bytes = start_data - ext->offset;
nedb2 = (EXTENT_DATA2*)newext2->extent_data.data;
newext2->extent_data.generation = Vcb->superblock.generation;
newext2->extent_data.decoded_size = ed->decoded_size;
newext2->extent_data.compression = ed->compression;
newext2->extent_data.encryption = ed->encryption;
newext2->extent_data.encoding = ed->encoding;
newext2->extent_data.type = ed->type;
nedb2->address = ed2->address;
nedb2->size = ed2->size;
nedb2->offset = ed2->offset + (end_data - ext->offset);
nedb2->num_bytes = ext->offset + len - end_data;
newext1->offset = ext->offset;
newext1->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
newext1->unique = ext->unique;
newext1->ignore = FALSE;
newext1->inserted = TRUE;
newext2->offset = end_data;
newext2->datalen = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
newext2->unique = ext->unique;
newext2->ignore = FALSE;
newext2->inserted = TRUE;
if (ext->csum) {
if (ed->compression == BTRFS_COMPRESSION_NONE) {
newext1->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(neda2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG);
if (!newext1->csum) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
ExFreePool(newext1);
ExFreePool(newext2);
goto end;
}
newext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(nedb2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG);
if (!newext2->csum) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
ExFreePool(newext1->csum);
ExFreePool(newext1);
ExFreePool(newext2);
goto end;
}
RtlCopyMemory(newext1->csum, ext->csum, (ULONG)(neda2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size));
RtlCopyMemory(newext2->csum, &ext->csum[(end_data - ext->offset) / Vcb->superblock.sector_size],
(ULONG)(nedb2->num_bytes * sizeof(UINT32) / Vcb->superblock.sector_size));
} else {
newext1->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG);
if (!newext1->csum) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
ExFreePool(newext1);
ExFreePool(newext2);
goto end;
}
newext2->csum = ExAllocatePoolWithTag(PagedPool, (ULONG)(ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size), ALLOC_TAG);
if (!newext2->csum) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
ExFreePool(newext1->csum);
ExFreePool(newext1);
ExFreePool(newext2);
goto end;
}
RtlCopyMemory(newext1->csum, ext->csum, (ULONG)(ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size));
RtlCopyMemory(newext2->csum, ext->csum, (ULONG)(ed2->size * sizeof(UINT32) / Vcb->superblock.sector_size));
}
} else {
newext1->csum = NULL;
newext2->csum = NULL;
}
InsertHeadList(&ext->list_entry, &newext1->list_entry);
add_extent(fcb, &newext1->list_entry, newext2);
remove_fcb_extent(fcb, ext, rollback);
}
}
}
}
le = le2;
}
Status = STATUS_SUCCESS;
end:
fcb->extents_changed = TRUE;
mark_fcb_dirty(fcb);
return Status;
}
void add_insert_extent_rollback(LIST_ENTRY* rollback, fcb* fcb, extent* ext) {
rollback_extent* re;
re = ExAllocatePoolWithTag(NonPagedPool, sizeof(rollback_extent), ALLOC_TAG);
if (!re) {
ERR("out of memory\n");
return;
}
re->fcb = fcb;
re->ext = ext;
add_rollback(rollback, ROLLBACK_INSERT_EXTENT, re);
}
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(suppress: 28194)
#endif
NTSTATUS add_extent_to_fcb(_In_ fcb* fcb, _In_ UINT64 offset, _In_reads_bytes_(edsize) EXTENT_DATA* ed, _In_ UINT16 edsize,
_In_ BOOL unique, _In_opt_ _When_(return >= 0, __drv_aliasesMem) UINT32* csum, _In_ LIST_ENTRY* rollback) {
extent* ext;
LIST_ENTRY* le;
ext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + edsize, ALLOC_TAG);
if (!ext) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
ext->offset = offset;
ext->datalen = edsize;
ext->unique = unique;
ext->ignore = FALSE;
ext->inserted = TRUE;
ext->csum = csum;
RtlCopyMemory(&ext->extent_data, ed, edsize);
le = fcb->extents.Flink;
while (le != &fcb->extents) {
extent* oldext = CONTAINING_RECORD(le, extent, list_entry);
if (oldext->offset >= offset) {
InsertHeadList(le->Blink, &ext->list_entry);
goto end;
}
le = le->Flink;
}
InsertTailList(&fcb->extents, &ext->list_entry);
end:
add_insert_extent_rollback(rollback, fcb, ext);
return STATUS_SUCCESS;
}
#ifdef _MSC_VER
#pragma warning(pop)
#endif
static void remove_fcb_extent(fcb* fcb, extent* ext, LIST_ENTRY* rollback) {
if (!ext->ignore) {
rollback_extent* re;
ext->ignore = TRUE;
re = ExAllocatePoolWithTag(NonPagedPool, sizeof(rollback_extent), ALLOC_TAG);
if (!re) {
ERR("out of memory\n");
return;
}
re->fcb = fcb;
re->ext = ext;
add_rollback(rollback, ROLLBACK_DELETE_EXTENT, re);
}
}
NTSTATUS calc_csum(_In_ device_extension* Vcb, _In_reads_bytes_(sectors*Vcb->superblock.sector_size) UINT8* data,
_In_ UINT32 sectors, _Out_writes_bytes_(sectors*sizeof(UINT32)) UINT32* csum) {
NTSTATUS Status;
calc_job* cj;
// From experimenting, it seems that 40 sectors is roughly the crossover
// point where offloading the crc32 calculation becomes worth it.
if (sectors < 40 || KeQueryActiveProcessorCount(NULL) < 2) {
ULONG j;
for (j = 0; j < sectors; j++) {
csum[j] = ~calc_crc32c(0xffffffff, data + (j * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
}
return STATUS_SUCCESS;
}
Status = add_calc_job(Vcb, data, sectors, csum, &cj);
if (!NT_SUCCESS(Status)) {
ERR("add_calc_job returned %08x\n", Status);
return Status;
}
KeWaitForSingleObject(&cj->event, Executive, KernelMode, FALSE, NULL);
free_calc_job(cj);
return STATUS_SUCCESS;
}
_Requires_lock_held_(c->lock)
_When_(return != 0, _Releases_lock_(c->lock))
BOOL insert_extent_chunk(_In_ device_extension* Vcb, _In_ fcb* fcb, _In_ chunk* c, _In_ UINT64 start_data, _In_ UINT64 length, _In_ BOOL prealloc, _In_opt_ void* data,
_In_opt_ PIRP Irp, _In_ LIST_ENTRY* rollback, _In_ UINT8 compression, _In_ UINT64 decoded_size, _In_ BOOL file_write, _In_ UINT64 irp_offset) {
UINT64 address;
NTSTATUS Status;
EXTENT_DATA* ed;
EXTENT_DATA2* ed2;
UINT16 edsize = (UINT16)(offsetof(EXTENT_DATA, data[0]) + sizeof(EXTENT_DATA2));
UINT32* csum = NULL;
TRACE("(%p, (%llx, %llx), %llx, %llx, %llx, %u, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, c->offset, start_data, length, prealloc, data, rollback);
if (!find_data_address_in_chunk(Vcb, c, length, &address))
return FALSE;
// add extent data to inode
ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
if (!ed) {
ERR("out of memory\n");
return FALSE;
}
ed->generation = Vcb->superblock.generation;
ed->decoded_size = decoded_size;
ed->compression = compression;
ed->encryption = BTRFS_ENCRYPTION_NONE;
ed->encoding = BTRFS_ENCODING_NONE;
ed->type = prealloc ? EXTENT_TYPE_PREALLOC : EXTENT_TYPE_REGULAR;
ed2 = (EXTENT_DATA2*)ed->data;
ed2->address = address;
ed2->size = length;
ed2->offset = 0;
ed2->num_bytes = decoded_size;
if (!prealloc && data && !(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
ULONG sl = (ULONG)(length / Vcb->superblock.sector_size);
csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(UINT32), ALLOC_TAG);
if (!csum) {
ERR("out of memory\n");
ExFreePool(ed);
return FALSE;
}
Status = calc_csum(Vcb, data, sl, csum);
if (!NT_SUCCESS(Status)) {
ERR("calc_csum returned %08x\n", Status);
ExFreePool(csum);
ExFreePool(ed);
return FALSE;
}
}
Status = add_extent_to_fcb(fcb, start_data, ed, edsize, TRUE, csum, rollback);
if (!NT_SUCCESS(Status)) {
ERR("add_extent_to_fcb returned %08x\n", Status);
if (csum) ExFreePool(csum);
ExFreePool(ed);
return FALSE;
}
ExFreePool(ed);
c->used += length;
space_list_subtract(c, FALSE, address, length, rollback);
fcb->inode_item.st_blocks += decoded_size;
fcb->extents_changed = TRUE;
fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
ExAcquireResourceExclusiveLite(&c->changed_extents_lock, TRUE);
add_changed_extent_ref(c, address, length, fcb->subvol->id, fcb->inode, start_data, 1, fcb->inode_item.flags & BTRFS_INODE_NODATASUM);
ExReleaseResourceLite(&c->changed_extents_lock);
release_chunk_lock(c, Vcb);
if (data) {
Status = write_data_complete(Vcb, address, data, (UINT32)length, Irp, NULL, file_write, irp_offset,
fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
if (!NT_SUCCESS(Status))
ERR("write_data_complete returned %08x\n", Status);
}
return TRUE;
}
static BOOL try_extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data,
PIRP Irp, UINT64* written, BOOL file_write, UINT64 irp_offset, LIST_ENTRY* rollback) {
BOOL success = FALSE;
EXTENT_DATA* ed;
EXTENT_DATA2* ed2;
chunk* c;
LIST_ENTRY* le;
extent* ext = NULL;
le = fcb->extents.Flink;
while (le != &fcb->extents) {
extent* nextext = CONTAINING_RECORD(le, extent, list_entry);
if (!nextext->ignore) {
if (nextext->offset == start_data) {
ext = nextext;
break;
} else if (nextext->offset > start_data)
break;
ext = nextext;
}
le = le->Flink;
}
if (!ext)
return FALSE;
ed = &ext->extent_data;
if (ed->type != EXTENT_TYPE_REGULAR && ed->type != EXTENT_TYPE_PREALLOC) {
TRACE("not extending extent which is not regular or prealloc\n");
return FALSE;
}
ed2 = (EXTENT_DATA2*)ed->data;
if (ext->offset + ed2->num_bytes != start_data) {
TRACE("last EXTENT_DATA does not run up to start_data (%llx + %llx != %llx)\n", ext->offset, ed2->num_bytes, start_data);
return FALSE;
}
c = get_chunk_from_address(Vcb, ed2->address);
if (c->reloc || c->readonly || c->chunk_item->type != Vcb->data_flags)
return FALSE;
acquire_chunk_lock(c, Vcb);
if (length > c->chunk_item->size - c->used) {
release_chunk_lock(c, Vcb);
return FALSE;
}
if (!c->cache_loaded) {
NTSTATUS Status = load_cache_chunk(Vcb, c, NULL);
if (!NT_SUCCESS(Status)) {
ERR("load_cache_chunk returned %08x\n", Status);
release_chunk_lock(c, Vcb);
return FALSE;
}
}
le = c->space.Flink;
while (le != &c->space) {
space* s = CONTAINING_RECORD(le, space, list_entry);
if (s->address == ed2->address + ed2->size) {
UINT64 newlen = min(min(s->size, length), MAX_EXTENT_SIZE);
success = insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset);
if (success)
*written += newlen;
else
release_chunk_lock(c, Vcb);
return success;
} else if (s->address > ed2->address + ed2->size)
break;
le = le->Flink;
}
release_chunk_lock(c, Vcb);
return FALSE;
}
static NTSTATUS insert_chunk_fragmented(fcb* fcb, UINT64 start, UINT64 length, UINT8* data, BOOL prealloc, LIST_ENTRY* rollback) {
LIST_ENTRY* le;
UINT64 flags = fcb->Vcb->data_flags;
BOOL page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE;
NTSTATUS Status;
chunk* c;
ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, TRUE);
// first create as many chunks as we can
do {
Status = alloc_chunk(fcb->Vcb, flags, &c, FALSE);
} while (NT_SUCCESS(Status));
if (Status != STATUS_DISK_FULL) {
ERR("alloc_chunk returned %08x\n", Status);
ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
return Status;
}
le = fcb->Vcb->chunks.Flink;
while (le != &fcb->Vcb->chunks) {
c = CONTAINING_RECORD(le, chunk, list_entry);
if (!c->readonly && !c->reloc) {
acquire_chunk_lock(c, fcb->Vcb);
if (c->chunk_item->type == flags) {
while (!IsListEmpty(&c->space_size) && length > 0) {
space* s = CONTAINING_RECORD(c->space_size.Flink, space, list_entry_size);
UINT64 extlen = min(length, s->size);
if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, prealloc && !page_file, data, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, FALSE, 0)) {
start += extlen;
length -= extlen;
if (data) data += extlen;
acquire_chunk_lock(c, fcb->Vcb);
}
}
}
release_chunk_lock(c, fcb->Vcb);
if (length == 0)
break;
}
le = le->Flink;
}
ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
return length == 0 ? STATUS_SUCCESS : STATUS_DISK_FULL;
}
static NTSTATUS insert_prealloc_extent(fcb* fcb, UINT64 start, UINT64 length, LIST_ENTRY* rollback) {
LIST_ENTRY* le;
chunk* c;
UINT64 flags;
NTSTATUS Status;
BOOL page_file = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE;
flags = fcb->Vcb->data_flags;
do {
UINT64 extlen = min(MAX_EXTENT_SIZE, length);
ExAcquireResourceSharedLite(&fcb->Vcb->chunk_lock, TRUE);
le = fcb->Vcb->chunks.Flink;
while (le != &fcb->Vcb->chunks) {
c = CONTAINING_RECORD(le, chunk, list_entry);
if (!c->readonly && !c->reloc) {
acquire_chunk_lock(c, fcb->Vcb);
if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, FALSE, 0)) {
ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
goto cont;
}
}
release_chunk_lock(c, fcb->Vcb);
}
le = le->Flink;
}
ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
ExAcquireResourceExclusiveLite(&fcb->Vcb->chunk_lock, TRUE);
Status = alloc_chunk(fcb->Vcb, flags, &c, FALSE);
ExReleaseResourceLite(&fcb->Vcb->chunk_lock);
if (!NT_SUCCESS(Status)) {
ERR("alloc_chunk returned %08x\n", Status);
goto end;
}
acquire_chunk_lock(c, fcb->Vcb);
if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= extlen) {
if (insert_extent_chunk(fcb->Vcb, fcb, c, start, extlen, !page_file, NULL, NULL, rollback, BTRFS_COMPRESSION_NONE, extlen, FALSE, 0))
goto cont;
}
release_chunk_lock(c, fcb->Vcb);
Status = insert_chunk_fragmented(fcb, start, length, NULL, TRUE, rollback);
if (!NT_SUCCESS(Status))
ERR("insert_chunk_fragmented returned %08x\n", Status);
goto end;
cont:
length -= extlen;
start += extlen;
} while (length > 0);
Status = STATUS_SUCCESS;
end:
return Status;
}
static NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data,
PIRP Irp, BOOL file_write, UINT64 irp_offset, LIST_ENTRY* rollback) {
NTSTATUS Status;
LIST_ENTRY* le;
chunk* c;
UINT64 flags, orig_length = length, written = 0;
TRACE("(%p, (%llx, %llx), %llx, %llx, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data);
if (start_data > 0) {
try_extend_data(Vcb, fcb, start_data, length, data, Irp, &written, file_write, irp_offset, rollback);
if (written == length)
return STATUS_SUCCESS;
else if (written > 0) {
start_data += written;
irp_offset += written;
length -= written;
data = &((UINT8*)data)[written];
}
}
flags = Vcb->data_flags;
while (written < orig_length) {
UINT64 newlen = min(length, MAX_EXTENT_SIZE);
BOOL done = FALSE;
// Rather than necessarily writing the whole extent at once, we deal with it in blocks of 128 MB.
// First, see if we can write the extent part to an existing chunk.
ExAcquireResourceSharedLite(&Vcb->chunk_lock, TRUE);
le = Vcb->chunks.Flink;
while (le != &Vcb->chunks) {
c = CONTAINING_RECORD(le, chunk, list_entry);
if (!c->readonly && !c->reloc) {
acquire_chunk_lock(c, Vcb);
if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset)) {
written += newlen;
if (written == orig_length) {
ExReleaseResourceLite(&Vcb->chunk_lock);
return STATUS_SUCCESS;
} else {
done = TRUE;
start_data += newlen;
irp_offset += newlen;
length -= newlen;
data = &((UINT8*)data)[newlen];
break;
}
} else
release_chunk_lock(c, Vcb);
}
le = le->Flink;
}
ExReleaseResourceLite(&Vcb->chunk_lock);
if (done) continue;
// Otherwise, see if we can put it in a new chunk.
ExAcquireResourceExclusiveLite(&Vcb->chunk_lock, TRUE);
Status = alloc_chunk(Vcb, flags, &c, FALSE);
ExReleaseResourceLite(&Vcb->chunk_lock);
if (!NT_SUCCESS(Status)) {
ERR("alloc_chunk returned %08x\n", Status);
return Status;
}
if (c) {
acquire_chunk_lock(c, Vcb);
if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= newlen &&
insert_extent_chunk(Vcb, fcb, c, start_data, newlen, FALSE, data, Irp, rollback, BTRFS_COMPRESSION_NONE, newlen, file_write, irp_offset)) {
written += newlen;
if (written == orig_length)
return STATUS_SUCCESS;
else {
done = TRUE;
start_data += newlen;
irp_offset += newlen;
length -= newlen;
data = &((UINT8*)data)[newlen];
}
} else
release_chunk_lock(c, Vcb);
}
if (!done) {
Status = insert_chunk_fragmented(fcb, start_data, length, data, FALSE, rollback);
if (!NT_SUCCESS(Status))
ERR("insert_chunk_fragmented returned %08x\n", Status);
return Status;
}
}
return STATUS_DISK_FULL;
}
NTSTATUS truncate_file(fcb* fcb, UINT64 end, PIRP Irp, LIST_ENTRY* rollback) {
NTSTATUS Status;
// FIXME - convert into inline extent if short enough
if (end > 0 && fcb_is_inline(fcb)) {
UINT8* buf;
BOOL make_inline = end <= fcb->Vcb->options.max_inline;
buf = ExAllocatePoolWithTag(PagedPool, (ULONG)(make_inline ? (offsetof(EXTENT_DATA, data[0]) + end) : sector_align(end, fcb->Vcb->superblock.sector_size)), ALLOC_TAG);
if (!buf) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
Status = read_file(fcb, make_inline ? (buf + offsetof(EXTENT_DATA, data[0])) : buf, 0, end, NULL, Irp);
if (!NT_SUCCESS(Status)) {
ERR("read_file returned %08x\n", Status);
ExFreePool(buf);
return Status;
}
Status = excise_extents(fcb->Vcb, fcb, 0, fcb->inode_item.st_size, Irp, rollback);
if (!NT_SUCCESS(Status)) {
ERR("excise_extents returned %08x\n", Status);
ExFreePool(buf);
return Status;
}
if (!make_inline) {
RtlZeroMemory(buf + end, (ULONG)(sector_align(end, fcb->Vcb->superblock.sector_size) - end));
Status = do_write_file(fcb, 0, sector_align(end, fcb->Vcb->superblock.sector_size), buf, Irp, FALSE, 0, rollback);
if (!NT_SUCCESS(Status)) {
ERR("do_write_file returned %08x\n", Status);
ExFreePool(buf);
return Status;
}
} else {
EXTENT_DATA* ed = (EXTENT_DATA*)buf;
ed->generation = fcb->Vcb->superblock.generation;
ed->decoded_size = end;
ed->compression = BTRFS_COMPRESSION_NONE;
ed->encryption = BTRFS_ENCRYPTION_NONE;
ed->encoding = BTRFS_ENCODING_NONE;
ed->type = EXTENT_TYPE_INLINE;
Status = add_extent_to_fcb(fcb, 0, ed, (UINT16)(offsetof(EXTENT_DATA, data[0]) + end), FALSE, NULL, rollback);
if (!NT_SUCCESS(Status)) {
ERR("add_extent_to_fcb returned %08x\n", Status);
ExFreePool(buf);
return Status;
}
fcb->inode_item.st_blocks += end;
}
ExFreePool(buf);
return STATUS_SUCCESS;
}
Status = excise_extents(fcb->Vcb, fcb, sector_align(end, fcb->Vcb->superblock.sector_size),
sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), Irp, rollback);
if (!NT_SUCCESS(Status)) {
ERR("excise_extents returned %08x\n", Status);
return Status;
}
fcb->inode_item.st_size = end;
fcb->inode_item_changed = TRUE;
TRACE("setting st_size to %llx\n", end);
fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
// FIXME - inform cache manager of this
TRACE("fcb %p FileSize = %llx\n", fcb, fcb->Header.FileSize.QuadPart);
return STATUS_SUCCESS;
}
NTSTATUS extend_file(fcb* fcb, file_ref* fileref, UINT64 end, BOOL prealloc, PIRP Irp, LIST_ENTRY* rollback) {
UINT64 oldalloc, newalloc;
BOOL cur_inline;
NTSTATUS Status;
TRACE("(%p, %p, %x, %u)\n", fcb, fileref, end, prealloc);
if (fcb->ads) {
if (end > 0xffff)
return STATUS_DISK_FULL;
return stream_set_end_of_file_information(fcb->Vcb, (UINT16)end, fcb, fileref, FALSE);
} else {
extent* ext = NULL;
LIST_ENTRY* le;
le = fcb->extents.Blink;
while (le != &fcb->extents) {
extent* ext2 = CONTAINING_RECORD(le, extent, list_entry);
if (!ext2->ignore) {
ext = ext2;
break;
}
le = le->Blink;
}
oldalloc = 0;
if (ext) {
EXTENT_DATA* ed = &ext->extent_data;
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
oldalloc = ext->offset + (ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes);
cur_inline = ed->type == EXTENT_TYPE_INLINE;
if (cur_inline && end > fcb->Vcb->options.max_inline) {
UINT64 origlength, length;
UINT8* data;
TRACE("giving inline file proper extents\n");
origlength = ed->decoded_size;
cur_inline = FALSE;
length = sector_align(origlength, fcb->Vcb->superblock.sector_size);
data = ExAllocatePoolWithTag(PagedPool, (ULONG)length, ALLOC_TAG);
if (!data) {
ERR("could not allocate %llx bytes for data\n", length);
return STATUS_INSUFFICIENT_RESOURCES;
}
Status = read_file(fcb, data, 0, origlength, NULL, Irp);
if (!NT_SUCCESS(Status)) {
ERR("read_file returned %08x\n", Status);
ExFreePool(data);
return Status;
}
RtlZeroMemory(data + origlength, (ULONG)(length - origlength));
Status = excise_extents(fcb->Vcb, fcb, 0, fcb->inode_item.st_size, Irp, rollback);
if (!NT_SUCCESS(Status)) {
ERR("excise_extents returned %08x\n", Status);
ExFreePool(data);
return Status;
}
Status = do_write_file(fcb, 0, length, data, Irp, FALSE, 0, rollback);
if (!NT_SUCCESS(Status)) {
ERR("do_write_file returned %08x\n", Status);
ExFreePool(data);
return Status;
}
oldalloc = ext->offset + length;
ExFreePool(data);
}
if (cur_inline) {
UINT16 edsize;
if (end > oldalloc) {
edsize = (UINT16)(offsetof(EXTENT_DATA, data[0]) + end - ext->offset);
ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
if (!ed) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
ed->generation = fcb->Vcb->superblock.generation;
ed->decoded_size = end - ext->offset;
ed->compression = BTRFS_COMPRESSION_NONE;
ed->encryption = BTRFS_ENCRYPTION_NONE;
ed->encoding = BTRFS_ENCODING_NONE;
ed->type = EXTENT_TYPE_INLINE;
Status = read_file(fcb, ed->data, ext->offset, oldalloc, NULL, Irp);
if (!NT_SUCCESS(Status)) {
ERR("read_file returned %08x\n", Status);
ExFreePool(ed);
return Status;
}
RtlZeroMemory(ed->data + oldalloc - ext->offset, (ULONG)(end - oldalloc));
remove_fcb_extent(fcb, ext, rollback);
Status = add_extent_to_fcb(fcb, ext->offset, ed, edsize, ext->unique, NULL, rollback);
if (!NT_SUCCESS(Status)) {
ERR("add_extent_to_fcb returned %08x\n", Status);
ExFreePool(ed);
return Status;
}
ExFreePool(ed);
fcb->extents_changed = TRUE;
mark_fcb_dirty(fcb);
}
TRACE("extending inline file (oldalloc = %llx, end = %llx)\n", oldalloc, end);
fcb->inode_item.st_size = end;
TRACE("setting st_size to %llx\n", end);
fcb->inode_item.st_blocks = end;
fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
} else {
newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
if (newalloc > oldalloc) {
if (prealloc) {
// FIXME - try and extend previous extent first
Status = insert_prealloc_extent(fcb, oldalloc, newalloc - oldalloc, rollback);
if (!NT_SUCCESS(Status)) {
ERR("insert_prealloc_extent returned %08x\n", Status);
return Status;
}
}
fcb->extents_changed = TRUE;
}
fcb->inode_item.st_size = end;
fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
TRACE("setting st_size to %llx\n", end);
TRACE("newalloc = %llx\n", newalloc);
fcb->Header.AllocationSize.QuadPart = newalloc;
fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
}
} else {
if (end > fcb->Vcb->options.max_inline) {
newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
if (prealloc) {
Status = insert_prealloc_extent(fcb, 0, newalloc, rollback);
if (!NT_SUCCESS(Status)) {
ERR("insert_prealloc_extent returned %08x\n", Status);
return Status;
}
}
fcb->extents_changed = TRUE;
fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
fcb->inode_item.st_size = end;
TRACE("setting st_size to %llx\n", end);
TRACE("newalloc = %llx\n", newalloc);
fcb->Header.AllocationSize.QuadPart = newalloc;
fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
} else {
EXTENT_DATA* ed;
UINT16 edsize;
edsize = (UINT16)(offsetof(EXTENT_DATA, data[0]) + end);
ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
if (!ed) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
ed->generation = fcb->Vcb->superblock.generation;
ed->decoded_size = end;
ed->compression = BTRFS_COMPRESSION_NONE;
ed->encryption = BTRFS_ENCRYPTION_NONE;
ed->encoding = BTRFS_ENCODING_NONE;
ed->type = EXTENT_TYPE_INLINE;
RtlZeroMemory(ed->data, (ULONG)end);
Status = add_extent_to_fcb(fcb, 0, ed, edsize, FALSE, NULL, rollback);
if (!NT_SUCCESS(Status)) {
ERR("add_extent_to_fcb returned %08x\n", Status);
ExFreePool(ed);
return Status;
}
ExFreePool(ed);
fcb->extents_changed = TRUE;
fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
fcb->inode_item.st_size = end;
TRACE("setting st_size to %llx\n", end);
fcb->inode_item.st_blocks = end;
fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
}
}
}
return STATUS_SUCCESS;
}
static NTSTATUS do_write_file_prealloc(fcb* fcb, extent* ext, UINT64 start_data, UINT64 end_data, void* data, UINT64* written,
PIRP Irp, BOOL file_write, UINT64 irp_offset, ULONG priority, LIST_ENTRY* rollback) {
EXTENT_DATA* ed = &ext->extent_data;
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
NTSTATUS Status;
chunk* c = NULL;
if (start_data <= ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace all
extent* newext;
newext = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
if (!newext) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(&newext->extent_data, &ext->extent_data, ext->datalen);
newext->extent_data.type = EXTENT_TYPE_REGULAR;
Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (UINT8*)data + ext->offset - start_data, (UINT32)ed2->num_bytes, Irp,
NULL, file_write, irp_offset + ext->offset - start_data, priority);
if (!NT_SUCCESS(Status)) {
ERR("write_data_complete returned %08x\n", Status);
return Status;
}
if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
ULONG sl = (ULONG)(ed2->num_bytes / fcb->Vcb->superblock.sector_size);
UINT32* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(UINT32), ALLOC_TAG);
if (!csum) {
ERR("out of memory\n");
ExFreePool(newext);
return STATUS_INSUFFICIENT_RESOURCES;
}
Status = calc_csum(fcb->Vcb, (UINT8*)data + ext->offset - start_data, sl, csum);
if (!NT_SUCCESS(Status)) {
ERR("calc_csum returned %08x\n", Status);
ExFreePool(csum);
ExFreePool(newext);
return Status;
}
newext->csum = csum;
} else
newext->csum = NULL;
*written = ed2->num_bytes;
newext->offset = ext->offset;
newext->datalen = ext->datalen;
newext->unique = ext->unique;
newext->ignore = FALSE;
newext->inserted = TRUE;
InsertHeadList(&ext->list_entry, &newext->list_entry);
add_insert_extent_rollback(rollback, fcb, newext);
remove_fcb_extent(fcb, ext, rollback);
c = get_chunk_from_address(fcb->Vcb, ed2->address);
} else if (start_data <= ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace beginning
EXTENT_DATA2* ned2;
extent *newext1, *newext2;
newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
if (!newext1) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
if (!newext2) {
ERR("out of memory\n");
ExFreePool(newext1);
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
newext1->extent_data.type = EXTENT_TYPE_REGULAR;
ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
ned2->num_bytes = end_data - ext->offset;
RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
ned2->offset += end_data - ext->offset;
ned2->num_bytes -= end_data - ext->offset;
Status = write_data_complete(fcb->Vcb, ed2->address + ed2->offset, (UINT8*)data + ext->offset - start_data, (UINT32)(end_data - ext->offset),
Irp, NULL, file_write, irp_offset + ext->offset - start_data, priority);
if (!NT_SUCCESS(Status)) {
ERR("write_data_complete returned %08x\n", Status);
ExFreePool(newext1);
ExFreePool(newext2);
return Status;
}
if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
ULONG sl = (ULONG)((end_data - ext->offset) / fcb->Vcb->superblock.sector_size);
UINT32* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(UINT32), ALLOC_TAG);
if (!csum) {
ERR("out of memory\n");
ExFreePool(newext1);
ExFreePool(newext2);
return STATUS_INSUFFICIENT_RESOURCES;
}
Status = calc_csum(fcb->Vcb, (UINT8*)data + ext->offset - start_data, sl, csum);
if (!NT_SUCCESS(Status)) {
ERR("calc_csum returned %08x\n", Status);
ExFreePool(newext1);
ExFreePool(newext2);
ExFreePool(csum);
return Status;
}
newext1->csum = csum;
} else
newext1->csum = NULL;
*written = end_data - ext->offset;
newext1->offset = ext->offset;
newext1->datalen = ext->datalen;
newext1->unique = ext->unique;
newext1->ignore = FALSE;
newext1->inserted = TRUE;
InsertHeadList(&ext->list_entry, &newext1->list_entry);
add_insert_extent_rollback(rollback, fcb, newext1);
newext2->offset = end_data;
newext2->datalen = ext->datalen;
newext2->unique = ext->unique;
newext2->ignore = FALSE;
newext2->inserted = TRUE;
newext2->csum = NULL;
add_extent(fcb, &newext1->list_entry, newext2);
add_insert_extent_rollback(rollback, fcb, newext2);
c = get_chunk_from_address(fcb->Vcb, ed2->address);
if (!c)
ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
else {
Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
ERR("update_changed_extent_ref returned %08x\n", Status);
return Status;
}
}
remove_fcb_extent(fcb, ext, rollback);
} else if (start_data > ext->offset && end_data >= ext->offset + ed2->num_bytes) { // replace end
EXTENT_DATA2* ned2;
extent *newext1, *newext2;
newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
if (!newext1) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
if (!newext2) {
ERR("out of memory\n");
ExFreePool(newext1);
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
ned2->num_bytes = start_data - ext->offset;
RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
newext2->extent_data.type = EXTENT_TYPE_REGULAR;
ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
ned2->offset += start_data - ext->offset;
ned2->num_bytes = ext->offset + ed2->num_bytes - start_data;
Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, (UINT32)ned2->num_bytes, Irp, NULL, file_write, irp_offset, priority);
if (!NT_SUCCESS(Status)) {
ERR("write_data_complete returned %08x\n", Status);
ExFreePool(newext1);
ExFreePool(newext2);
return Status;
}
if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
ULONG sl = (ULONG)(ned2->num_bytes / fcb->Vcb->superblock.sector_size);
UINT32* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(UINT32), ALLOC_TAG);
if (!csum) {
ERR("out of memory\n");
ExFreePool(newext1);
ExFreePool(newext2);
return STATUS_INSUFFICIENT_RESOURCES;
}
Status = calc_csum(fcb->Vcb, data, sl, csum);
if (!NT_SUCCESS(Status)) {
ERR("calc_csum returned %08x\n", Status);
ExFreePool(newext1);
ExFreePool(newext2);
ExFreePool(csum);
return Status;
}
newext2->csum = csum;
} else
newext2->csum = NULL;
*written = ned2->num_bytes;
newext1->offset = ext->offset;
newext1->datalen = ext->datalen;
newext1->unique = ext->unique;
newext1->ignore = FALSE;
newext1->inserted = TRUE;
newext1->csum = NULL;
InsertHeadList(&ext->list_entry, &newext1->list_entry);
add_insert_extent_rollback(rollback, fcb, newext1);
newext2->offset = start_data;
newext2->datalen = ext->datalen;
newext2->unique = ext->unique;
newext2->ignore = FALSE;
newext2->inserted = TRUE;
add_extent(fcb, &newext1->list_entry, newext2);
add_insert_extent_rollback(rollback, fcb, newext2);
c = get_chunk_from_address(fcb->Vcb, ed2->address);
if (!c)
ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
else {
Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 1,
fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
ERR("update_changed_extent_ref returned %08x\n", Status);
return Status;
}
}
remove_fcb_extent(fcb, ext, rollback);
} else if (start_data > ext->offset && end_data < ext->offset + ed2->num_bytes) { // replace middle
EXTENT_DATA2* ned2;
extent *newext1, *newext2, *newext3;
newext1 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
if (!newext1) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
newext2 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
if (!newext2) {
ERR("out of memory\n");
ExFreePool(newext1);
return STATUS_INSUFFICIENT_RESOURCES;
}
newext3 = ExAllocatePoolWithTag(PagedPool, offsetof(extent, extent_data) + ext->datalen, ALLOC_TAG);
if (!newext3) {
ERR("out of memory\n");
ExFreePool(newext1);
ExFreePool(newext2);
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(&newext1->extent_data, &ext->extent_data, ext->datalen);
RtlCopyMemory(&newext2->extent_data, &ext->extent_data, ext->datalen);
RtlCopyMemory(&newext3->extent_data, &ext->extent_data, ext->datalen);
ned2 = (EXTENT_DATA2*)newext1->extent_data.data;
ned2->num_bytes = start_data - ext->offset;
newext2->extent_data.type = EXTENT_TYPE_REGULAR;
ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
ned2->offset += start_data - ext->offset;
ned2->num_bytes = end_data - start_data;
ned2 = (EXTENT_DATA2*)newext3->extent_data.data;
ned2->offset += end_data - ext->offset;
ned2->num_bytes -= end_data - ext->offset;
ned2 = (EXTENT_DATA2*)newext2->extent_data.data;
Status = write_data_complete(fcb->Vcb, ed2->address + ned2->offset, data, (UINT32)(end_data - start_data), Irp, NULL, file_write, irp_offset, priority);
if (!NT_SUCCESS(Status)) {
ERR("write_data_complete returned %08x\n", Status);
ExFreePool(newext1);
ExFreePool(newext2);
ExFreePool(newext3);
return Status;
}
if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
ULONG sl = (ULONG)((end_data - start_data) / fcb->Vcb->superblock.sector_size);
UINT32* csum = ExAllocatePoolWithTag(PagedPool, sl * sizeof(UINT32), ALLOC_TAG);
if (!csum) {
ERR("out of memory\n");
ExFreePool(newext1);
ExFreePool(newext2);
ExFreePool(newext3);
return STATUS_INSUFFICIENT_RESOURCES;
}
Status = calc_csum(fcb->Vcb, data, sl, csum);
if (!NT_SUCCESS(Status)) {
ERR("calc_csum returned %08x\n", Status);
ExFreePool(newext1);
ExFreePool(newext2);
ExFreePool(newext3);
ExFreePool(csum);
return Status;
}
newext2->csum = csum;
} else
newext2->csum = NULL;
*written = end_data - start_data;
newext1->offset = ext->offset;
newext1->datalen = ext->datalen;
newext1->unique = ext->unique;
newext1->ignore = FALSE;
newext1->inserted = TRUE;
newext1->csum = NULL;
InsertHeadList(&ext->list_entry, &newext1->list_entry);
add_insert_extent_rollback(rollback, fcb, newext1);
newext2->offset = start_data;
newext2->datalen = ext->datalen;
newext2->unique = ext->unique;
newext2->ignore = FALSE;
newext2->inserted = TRUE;
add_extent(fcb, &newext1->list_entry, newext2);
add_insert_extent_rollback(rollback, fcb, newext2);
newext3->offset = end_data;
newext3->datalen = ext->datalen;
newext3->unique = ext->unique;
newext3->ignore = FALSE;
newext3->inserted = TRUE;
newext3->csum = NULL;
add_extent(fcb, &newext2->list_entry, newext3);
add_insert_extent_rollback(rollback, fcb, newext3);
c = get_chunk_from_address(fcb->Vcb, ed2->address);
if (!c)
ERR("get_chunk_from_address(%llx) failed\n", ed2->address);
else {
Status = update_changed_extent_ref(fcb->Vcb, c, ed2->address, ed2->size, fcb->subvol->id, fcb->inode, ext->offset - ed2->offset, 2,
fcb->inode_item.flags & BTRFS_INODE_NODATASUM, FALSE, Irp);
if (!NT_SUCCESS(Status)) {
ERR("update_changed_extent_ref returned %08x\n", Status);
return Status;
}
}
remove_fcb_extent(fcb, ext, rollback);
}
if (c)
c->changed = TRUE;
return STATUS_SUCCESS;
}
NTSTATUS do_write_file(fcb* fcb, UINT64 start, UINT64 end_data, void* data, PIRP Irp, BOOL file_write, UINT32 irp_offset, LIST_ENTRY* rollback) {
NTSTATUS Status;
LIST_ENTRY *le, *le2;
UINT64 written = 0, length = end_data - start;
UINT64 last_cow_start;
ULONG priority = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority;
#ifdef DEBUG_PARANOID
UINT64 last_off;
#endif
last_cow_start = 0;
le = fcb->extents.Flink;
while (le != &fcb->extents) {
extent* ext = CONTAINING_RECORD(le, extent, list_entry);
le2 = le->Flink;
if (!ext->ignore) {
EXTENT_DATA* ed = &ext->extent_data;
EXTENT_DATA2* ed2 = ed->type == EXTENT_TYPE_INLINE ? NULL : (EXTENT_DATA2*)ed->data;
UINT64 len;
len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
if (ext->offset + len <= start)
goto nextitem;
if (ext->offset > start + written + length)
break;
if ((fcb->inode_item.flags & BTRFS_INODE_NODATACOW || ed->type == EXTENT_TYPE_PREALLOC) && ext->unique && ed->compression == BTRFS_COMPRESSION_NONE) {
if (max(last_cow_start, start + written) < ext->offset) {
UINT64 start_write = max(last_cow_start, start + written);
Status = excise_extents(fcb->Vcb, fcb, start_write, ext->offset, Irp, rollback);
if (!NT_SUCCESS(Status)) {
ERR("excise_extents returned %08x\n", Status);
return Status;
}
Status = insert_extent(fcb->Vcb, fcb, start_write, ext->offset - start_write, (UINT8*)data + written, Irp, file_write, irp_offset + written, rollback);
if (!NT_SUCCESS(Status)) {
ERR("insert_extent returned %08x\n", Status);
return Status;
}
written += ext->offset - start_write;
length -= ext->offset - start_write;
if (length == 0)
break;
}
if (ed->type == EXTENT_TYPE_REGULAR) {
UINT64 writeaddr = ed2->address + ed2->offset + start + written - ext->offset;
UINT64 write_len = min(len, length);
chunk* c;
TRACE("doing non-COW write to %llx\n", writeaddr);
Status = write_data_complete(fcb->Vcb, writeaddr, (UINT8*)data + written, (UINT32)write_len, Irp, NULL, file_write, irp_offset + written, priority);
if (!NT_SUCCESS(Status)) {
ERR("write_data_complete returned %08x\n", Status);
return Status;
}
c = get_chunk_from_address(fcb->Vcb, writeaddr);
if (c)
c->changed = TRUE;
// This shouldn't ever get called - nocow files should always also be nosum.
if (!(fcb->inode_item.flags & BTRFS_INODE_NODATASUM)) {
calc_csum(fcb->Vcb, (UINT8*)data + written, (UINT32)(write_len / fcb->Vcb->superblock.sector_size),
&ext->csum[(start + written - ext->offset) / fcb->Vcb->superblock.sector_size]);
ext->inserted = TRUE;
}
written += write_len;
length -= write_len;
if (length == 0)
break;
} else if (ed->type == EXTENT_TYPE_PREALLOC) {
UINT64 write_len;
Status = do_write_file_prealloc(fcb, ext, start + written, end_data, (UINT8*)data + written, &write_len,
Irp, file_write, irp_offset + written, priority, rollback);
if (!NT_SUCCESS(Status)) {
ERR("do_write_file_prealloc returned %08x\n", Status);
return Status;
}
written += write_len;
length -= write_len;
if (length == 0)
break;
}
last_cow_start = ext->offset + len;
}
}
nextitem:
le = le2;
}
if (length > 0) {
UINT64 start_write = max(last_cow_start, start + written);
Status = excise_extents(fcb->Vcb, fcb, start_write, end_data, Irp, rollback);
if (!NT_SUCCESS(Status)) {
ERR("excise_extents returned %08x\n", Status);
return Status;
}
Status = insert_extent(fcb->Vcb, fcb, start_write, end_data - start_write, (UINT8*)data + written, Irp, file_write, irp_offset + written, rollback);
if (!NT_SUCCESS(Status)) {
ERR("insert_extent returned %08x\n", Status);
return Status;
}
}
#ifdef DEBUG_PARANOID
last_off = 0xffffffffffffffff;
le = fcb->extents.Flink;
while (le != &fcb->extents) {
extent* ext = CONTAINING_RECORD(le, extent, list_entry);
if (!ext->ignore) {
if (ext->offset == last_off) {
ERR("offset %llx duplicated\n", ext->offset);
int3;
} else if (ext->offset < last_off && last_off != 0xffffffffffffffff) {
ERR("offsets out of order\n");
int3;
}
last_off = ext->offset;
}
le = le->Flink;
}
#endif
fcb->extents_changed = TRUE;
mark_fcb_dirty(fcb);
return STATUS_SUCCESS;
}
NTSTATUS write_compressed(fcb* fcb, UINT64 start_data, UINT64 end_data, void* data, PIRP Irp, LIST_ENTRY* rollback) {
NTSTATUS Status;
UINT64 i;
for (i = 0; i < sector_align(end_data - start_data, COMPRESSED_EXTENT_SIZE) / COMPRESSED_EXTENT_SIZE; i++) {
UINT64 s2, e2;
BOOL compressed;
s2 = start_data + (i * COMPRESSED_EXTENT_SIZE);
e2 = min(s2 + COMPRESSED_EXTENT_SIZE, end_data);
Status = write_compressed_bit(fcb, s2, e2, (UINT8*)data + (i * COMPRESSED_EXTENT_SIZE), &compressed, Irp, rollback);
if (!NT_SUCCESS(Status)) {
ERR("write_compressed_bit returned %08x\n", Status);
return Status;
}
// If the first 128 KB of a file is incompressible, we set the nocompress flag so we don't
// bother with the rest of it.
if (s2 == 0 && e2 == COMPRESSED_EXTENT_SIZE && !compressed && !fcb->Vcb->options.compress_force) {
fcb->inode_item.flags |= BTRFS_INODE_NOCOMPRESS;
fcb->inode_item_changed = TRUE;
mark_fcb_dirty(fcb);
// write subsequent data non-compressed
if (e2 < end_data) {
Status = do_write_file(fcb, e2, end_data, (UINT8*)data + e2, Irp, FALSE, 0, rollback);
if (!NT_SUCCESS(Status)) {
ERR("do_write_file returned %08x\n", Status);
return Status;
}
}
return STATUS_SUCCESS;
}
}
return STATUS_SUCCESS;
}
NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, BOOLEAN paging_io, BOOLEAN no_cache,
BOOLEAN wait, BOOLEAN deferred_write, BOOLEAN write_irp, LIST_ENTRY* rollback) {
PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
PFILE_OBJECT FileObject = IrpSp->FileObject;
EXTENT_DATA* ed2;
UINT64 off64, newlength, start_data, end_data;
UINT32 bufhead;
BOOL make_inline;
UINT8* data;
INODE_ITEM* origii;
BOOL changed_length = FALSE;
NTSTATUS Status;
LARGE_INTEGER time;
BTRFS_TIME now;
fcb* fcb;
ccb* ccb;
file_ref* fileref;
BOOL paging_lock = FALSE, fcb_lock = FALSE, tree_lock = FALSE, pagefile;
ULONG filter = 0;
TRACE("(%p, %p, %llx, %p, %x, %u, %u)\n", Vcb, FileObject, offset.QuadPart, buf, *length, paging_io, no_cache);
if (*length == 0) {
WARN("returning success for zero-length write\n");
return STATUS_SUCCESS;
}
if (!FileObject) {
ERR("error - FileObject was NULL\n");
return STATUS_ACCESS_DENIED;
}
fcb = FileObject->FsContext;
ccb = FileObject->FsContext2;
fileref = ccb ? ccb->fileref : NULL;
if (!fcb->ads && fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) {
WARN("tried to write to something other than a file or symlink (inode %llx, type %u, %p, %p)\n", fcb->inode, fcb->type, &fcb->type, fcb);
return STATUS_INVALID_DEVICE_REQUEST;
}
if (offset.LowPart == FILE_WRITE_TO_END_OF_FILE && offset.HighPart == -1)
offset = fcb->Header.FileSize;
off64 = offset.QuadPart;
TRACE("fcb->Header.Flags = %x\n", fcb->Header.Flags);
if (!no_cache && !CcCanIWrite(FileObject, *length, wait, deferred_write))
return STATUS_PENDING;
if (!wait && no_cache)
return STATUS_PENDING;
if (no_cache && !paging_io && FileObject->SectionObjectPointer->DataSectionObject) {
IO_STATUS_BLOCK iosb;
ExAcquireResourceExclusiveLite(fcb->Header.PagingIoResource, TRUE);
CcFlushCache(FileObject->SectionObjectPointer, &offset, *length, &iosb);
if (!NT_SUCCESS(iosb.Status)) {
ExReleaseResourceLite(fcb->Header.PagingIoResource);
ERR("CcFlushCache returned %08x\n", iosb.Status);
return iosb.Status;
}
paging_lock = TRUE;
CcPurgeCacheSection(FileObject->SectionObjectPointer, &offset, *length, FALSE);
}
if (paging_io) {
if (!ExAcquireResourceSharedLite(fcb->Header.PagingIoResource, wait)) {
Status = STATUS_PENDING;
goto end;
} else
paging_lock = TRUE;
}
pagefile = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE && paging_io;
if (!pagefile && !ExIsResourceAcquiredExclusiveLite(&Vcb->tree_lock)) {
if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) {
Status = STATUS_PENDING;
goto end;
} else
tree_lock = TRUE;
}
if (no_cache) {
if (pagefile) {
if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
Status = STATUS_PENDING;
goto end;
} else
fcb_lock = TRUE;
} else if (!ExIsResourceAcquiredExclusiveLite(fcb->Header.Resource)) {
if (!ExAcquireResourceExclusiveLite(fcb->Header.Resource, wait)) {
Status = STATUS_PENDING;
goto end;
} else
fcb_lock = TRUE;
}
}
newlength = fcb->ads ? fcb->adsdata.Length : fcb->inode_item.st_size;
if (fcb->deleted)
newlength = 0;
TRACE("newlength = %llx\n", newlength);
if (off64 + *length > newlength) {
if (paging_io) {
if (off64 >= newlength) {
TRACE("paging IO tried to write beyond end of file (file size = %llx, offset = %llx, length = %x)\n", newlength, off64, *length);
TRACE("filename %S\n", file_desc(FileObject));
TRACE("FileObject: AllocationSize = %llx, FileSize = %llx, ValidDataLength = %llx\n",
fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
Status = STATUS_SUCCESS;
goto end;
}
*length = (ULONG)(newlength - off64);
} else {
newlength = off64 + *length;
changed_length = TRUE;
TRACE("extending length to %llx\n", newlength);
}
}
if (fcb->ads)
make_inline = FALSE;
else if (fcb->type == BTRFS_TYPE_SYMLINK)
make_inline = newlength <= (Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node) - offsetof(EXTENT_DATA, data[0]));
else
make_inline = newlength <= fcb->Vcb->options.max_inline;
if (changed_length) {
if (newlength > (UINT64)fcb->Header.AllocationSize.QuadPart) {
if (!tree_lock) {
// We need to acquire the tree lock if we don't have it already -
// we can't give an inline file proper extents at the same time as we're
// doing a flush.
if (!ExAcquireResourceSharedLite(&Vcb->tree_lock, wait)) {
Status = STATUS_PENDING;
goto end;
} else
tree_lock = TRUE;
}
Status = extend_file(fcb, fileref, newlength, FALSE, Irp, rollback);
if (!NT_SUCCESS(Status)) {
ERR("extend_file returned %08x\n", Status);
goto end;
}
} else if (!fcb->ads)
fcb->inode_item.st_size = newlength;
fcb->Header.FileSize.QuadPart = newlength;
fcb->Header.ValidDataLength.QuadPart = newlength;
TRACE("AllocationSize = %llx\n", fcb->Header.AllocationSize.QuadPart);
TRACE("FileSize = %llx\n", fcb->Header.FileSize.QuadPart);
TRACE("ValidDataLength = %llx\n", fcb->Header.ValidDataLength.QuadPart);
}
if (!no_cache) {
Status = STATUS_SUCCESS;
_SEH2_TRY {
if (!FileObject->PrivateCacheMap || changed_length) {
CC_FILE_SIZES ccfs;
ccfs.AllocationSize = fcb->Header.AllocationSize;
ccfs.FileSize = fcb->Header.FileSize;
ccfs.ValidDataLength = fcb->Header.ValidDataLength;
if (!FileObject->PrivateCacheMap)
init_file_cache(FileObject, &ccfs);
CcSetFileSizes(FileObject, &ccfs);
}
if (IrpSp->MinorFunction & IRP_MN_MDL) {
CcPrepareMdlWrite(FileObject, &offset, *length, &Irp->MdlAddress, &Irp->IoStatus);
Status = Irp->IoStatus.Status;
goto end;
} else {
if (fCcCopyWriteEx) {
TRACE("CcCopyWriteEx(%p, %llx, %x, %u, %p, %p)\n", FileObject, off64, *length, wait, buf, Irp->Tail.Overlay.Thread);
if (!fCcCopyWriteEx(FileObject, &offset, *length, wait, buf, Irp->Tail.Overlay.Thread)) {
Status = STATUS_PENDING;
goto end;
}
TRACE("CcCopyWriteEx finished\n");
} else {
TRACE("CcCopyWrite(%p, %llx, %x, %u, %p)\n", FileObject, off64, *length, wait, buf);
if (!CcCopyWrite(FileObject, &offset, *length, wait, buf)) {
Status = STATUS_PENDING;
goto end;
}
TRACE("CcCopyWrite finished\n");
}
}
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
Status = _SEH2_GetExceptionCode();
} _SEH2_END;
if (changed_length) {
send_notification_fcb(fcb->ads ? fileref->parent : fileref, fcb->ads ? FILE_NOTIFY_CHANGE_STREAM_SIZE : FILE_NOTIFY_CHANGE_SIZE,
fcb->ads ? FILE_ACTION_MODIFIED_STREAM : FILE_ACTION_MODIFIED, fcb->ads && fileref->dc ? &fileref->dc->name : NULL);
}
goto end;
}
if (fcb->ads) {
if (changed_length) {
char* data2;
if (newlength > fcb->adsmaxlen) {
ERR("error - xattr too long (%llu > %u)\n", newlength, fcb->adsmaxlen);
Status = STATUS_DISK_FULL;
goto end;
}
data2 = ExAllocatePoolWithTag(PagedPool, (ULONG)newlength, ALLOC_TAG);
if (!data2) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
if (fcb->adsdata.Buffer) {
RtlCopyMemory(data2, fcb->adsdata.Buffer, fcb->adsdata.Length);
ExFreePool(fcb->adsdata.Buffer);
}
if (newlength > fcb->adsdata.Length)
RtlZeroMemory(&data2[fcb->adsdata.Length], (ULONG)(newlength - fcb->adsdata.Length));
fcb->adsdata.Buffer = data2;
fcb->adsdata.Length = fcb->adsdata.MaximumLength = (USHORT)newlength;
fcb->Header.AllocationSize.QuadPart = newlength;
fcb->Header.FileSize.QuadPart = newlength;
fcb->Header.ValidDataLength.QuadPart = newlength;
}
if (*length > 0)
RtlCopyMemory(&fcb->adsdata.Buffer[off64], buf, *length);
fcb->Header.ValidDataLength.QuadPart = newlength;
mark_fcb_dirty(fcb);
if (fileref)
mark_fileref_dirty(fileref);
} else {
BOOL compress = write_fcb_compressed(fcb), no_buf = FALSE;
if (make_inline) {
start_data = 0;
end_data = sector_align(newlength, fcb->Vcb->superblock.sector_size);
bufhead = sizeof(EXTENT_DATA) - 1;
} else if (compress) {
start_data = off64 & ~(UINT64)(COMPRESSED_EXTENT_SIZE - 1);
end_data = min(sector_align(off64 + *length, COMPRESSED_EXTENT_SIZE),
sector_align(newlength, fcb->Vcb->superblock.sector_size));
bufhead = 0;
} else {
start_data = off64 & ~(UINT64)(fcb->Vcb->superblock.sector_size - 1);
end_data = sector_align(off64 + *length, fcb->Vcb->superblock.sector_size);
bufhead = 0;
}
if (fcb_is_inline(fcb))
end_data = max(end_data, sector_align(fcb->inode_item.st_size, Vcb->superblock.sector_size));
fcb->Header.ValidDataLength.QuadPart = newlength;
TRACE("fcb %p FileSize = %llx\n", fcb, fcb->Header.FileSize.QuadPart);
if (!make_inline && !compress && off64 == start_data && off64 + *length == end_data) {
data = buf;
no_buf = TRUE;
} else {
data = ExAllocatePoolWithTag(PagedPool, (ULONG)(end_data - start_data + bufhead), ALLOC_TAG);
if (!data) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
RtlZeroMemory(data + bufhead, (ULONG)(end_data - start_data));
TRACE("start_data = %llx\n", start_data);
TRACE("end_data = %llx\n", end_data);
if (off64 > start_data || off64 + *length < end_data) {
if (changed_length) {
if (fcb->inode_item.st_size > start_data)
Status = read_file(fcb, data + bufhead, start_data, fcb->inode_item.st_size - start_data, NULL, Irp);
else
Status = STATUS_SUCCESS;
} else
Status = read_file(fcb, data + bufhead, start_data, end_data - start_data, NULL, Irp);
if (!NT_SUCCESS(Status)) {
ERR("read_file returned %08x\n", Status);
ExFreePool(data);
goto end;
}
}
RtlCopyMemory(data + bufhead + off64 - start_data, buf, *length);
}
if (make_inline) {
Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, Irp, rollback);
if (!NT_SUCCESS(Status)) {
ERR("error - excise_extents returned %08x\n", Status);
ExFreePool(data);
goto end;
}
ed2 = (EXTENT_DATA*)data;
ed2->generation = fcb->Vcb->superblock.generation;
ed2->decoded_size = newlength;
ed2->compression = BTRFS_COMPRESSION_NONE;
ed2->encryption = BTRFS_ENCRYPTION_NONE;
ed2->encoding = BTRFS_ENCODING_NONE;
ed2->type = EXTENT_TYPE_INLINE;
Status = add_extent_to_fcb(fcb, 0, ed2, (UINT16)(offsetof(EXTENT_DATA, data[0]) + newlength), FALSE, NULL, rollback);
if (!NT_SUCCESS(Status)) {
ERR("add_extent_to_fcb returned %08x\n", Status);
ExFreePool(data);
goto end;
}
fcb->inode_item.st_blocks += newlength;
} else if (compress) {
Status = write_compressed(fcb, start_data, end_data, data, Irp, rollback);
if (!NT_SUCCESS(Status)) {
ERR("write_compressed returned %08x\n", Status);
ExFreePool(data);
goto end;
}
ExFreePool(data);
} else {
if (write_irp && Irp->MdlAddress && no_buf) {
BOOL locked = Irp->MdlAddress->MdlFlags & MDL_PAGES_LOCKED;
if (!locked) {
Status = STATUS_SUCCESS;
_SEH2_TRY {
MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoReadAccess);
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
Status = _SEH2_GetExceptionCode();
} _SEH2_END;
if (!NT_SUCCESS(Status)) {
ERR("MmProbeAndLockPages threw exception %08x\n", Status);
goto end;
}
}
_SEH2_TRY {
Status = do_write_file(fcb, start_data, end_data, data, Irp, TRUE, 0, rollback);
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
Status = _SEH2_GetExceptionCode();
} _SEH2_END;
if (!locked)
MmUnlockPages(Irp->MdlAddress);
} else {
_SEH2_TRY {
Status = do_write_file(fcb, start_data, end_data, data, Irp, FALSE, 0, rollback);
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
Status = _SEH2_GetExceptionCode();
} _SEH2_END;
}
if (!NT_SUCCESS(Status)) {
ERR("do_write_file returned %08x\n", Status);
if (!no_buf) ExFreePool(data);
goto end;
}
if (!no_buf)
ExFreePool(data);
}
}
KeQuerySystemTime(&time);
win_time_to_unix(time, &now);
if (!pagefile) {
if (fcb->ads) {
if (fileref && fileref->parent)
origii = &fileref->parent->fcb->inode_item;
else {
ERR("no parent fcb found for stream\n");
Status = STATUS_INTERNAL_ERROR;
goto end;
}
} else
origii = &fcb->inode_item;
origii->transid = Vcb->superblock.generation;
origii->sequence++;
if (!ccb->user_set_change_time)
origii->st_ctime = now;
if (!fcb->ads) {
if (changed_length) {
TRACE("setting st_size to %llx\n", newlength);
origii->st_size = newlength;
filter |= FILE_NOTIFY_CHANGE_SIZE;
}
fcb->inode_item_changed = TRUE;
} else {
fileref->parent->fcb->inode_item_changed = TRUE;
if (changed_length)
filter |= FILE_NOTIFY_CHANGE_STREAM_SIZE;
filter |= FILE_NOTIFY_CHANGE_STREAM_WRITE;
}
if (!ccb->user_set_write_time) {
origii->st_mtime = now;
filter |= FILE_NOTIFY_CHANGE_LAST_WRITE;
}
mark_fcb_dirty(fcb->ads ? fileref->parent->fcb : fcb);
}
if (changed_length) {
CC_FILE_SIZES ccfs;
ccfs.AllocationSize = fcb->Header.AllocationSize;
ccfs.FileSize = fcb->Header.FileSize;
ccfs.ValidDataLength = fcb->Header.ValidDataLength;
_SEH2_TRY {
CcSetFileSizes(FileObject, &ccfs);
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
Status = _SEH2_GetExceptionCode();
goto end;
} _SEH2_END;
}
fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
fcb->subvol->root_item.ctime = now;
Status = STATUS_SUCCESS;
if (filter != 0)
send_notification_fcb(fcb->ads ? fileref->parent : fileref, filter, fcb->ads ? FILE_ACTION_MODIFIED_STREAM : FILE_ACTION_MODIFIED,
fcb->ads && fileref->dc ? &fileref->dc->name : NULL);
end:
if (NT_SUCCESS(Status) && FileObject->Flags & FO_SYNCHRONOUS_IO && !paging_io) {
TRACE("CurrentByteOffset was: %llx\n", FileObject->CurrentByteOffset.QuadPart);
FileObject->CurrentByteOffset.QuadPart = offset.QuadPart + (NT_SUCCESS(Status) ? *length : 0);
TRACE("CurrentByteOffset now: %llx\n", FileObject->CurrentByteOffset.QuadPart);
}
if (fcb_lock)
ExReleaseResourceLite(fcb->Header.Resource);
if (tree_lock)
ExReleaseResourceLite(&Vcb->tree_lock);
if (paging_lock)
ExReleaseResourceLite(fcb->Header.PagingIoResource);
return Status;
}
NTSTATUS write_file(device_extension* Vcb, PIRP Irp, BOOLEAN wait, BOOLEAN deferred_write) {
PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
void* buf;
NTSTATUS Status;
LARGE_INTEGER offset = IrpSp->Parameters.Write.ByteOffset;
PFILE_OBJECT FileObject = IrpSp->FileObject;
fcb* fcb = FileObject ? FileObject->FsContext : NULL;
LIST_ENTRY rollback;
InitializeListHead(&rollback);
TRACE("write\n");
Irp->IoStatus.Information = 0;
TRACE("offset = %llx\n", offset.QuadPart);
TRACE("length = %x\n", IrpSp->Parameters.Write.Length);
if (!Irp->AssociatedIrp.SystemBuffer) {
buf = map_user_buffer(Irp, fcb && fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
if (Irp->MdlAddress && !buf) {
ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
} else
buf = Irp->AssociatedIrp.SystemBuffer;
TRACE("buf = %p\n", buf);
if (fcb && !(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForWriteAccess(&fcb->lock, Irp)) {
WARN("tried to write to locked region\n");
Status = STATUS_FILE_LOCK_CONFLICT;
goto exit;
}
Status = write_file2(Vcb, Irp, offset, buf, &IrpSp->Parameters.Write.Length, Irp->Flags & IRP_PAGING_IO, Irp->Flags & IRP_NOCACHE,
wait, deferred_write, TRUE, &rollback);
if (Status == STATUS_PENDING)
goto exit;
else if (!NT_SUCCESS(Status)) {
ERR("write_file2 returned %08x\n", Status);
goto exit;
}
if (NT_SUCCESS(Status)) {
Irp->IoStatus.Information = IrpSp->Parameters.Write.Length;
if (diskacc && Status != STATUS_PENDING && Irp->Flags & IRP_NOCACHE) {
PETHREAD thread = NULL;
if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
thread = Irp->Tail.Overlay.Thread;
else if (!IoIsSystemThread(PsGetCurrentThread()))
thread = PsGetCurrentThread();
else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp)
thread = PsGetCurrentThread();
if (thread)
fPsUpdateDiskCounters(PsGetThreadProcess(thread), 0, IrpSp->Parameters.Write.Length, 0, 1, 0);
}
}
exit:
if (NT_SUCCESS(Status))
clear_rollback(&rollback);
else
do_rollback(Vcb, &rollback);
return Status;
}
_Dispatch_type_(IRP_MJ_WRITE)
_Function_class_(DRIVER_DISPATCH)
NTSTATUS NTAPI drv_write(IN PDEVICE_OBJECT DeviceObject, IN PIRP Irp) {
NTSTATUS Status;
BOOL top_level;
PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
device_extension* Vcb = DeviceObject->DeviceExtension;
PFILE_OBJECT FileObject = IrpSp->FileObject;
fcb* fcb = FileObject ? FileObject->FsContext : NULL;
ccb* ccb = FileObject ? FileObject->FsContext2 : NULL;
BOOLEAN wait = FileObject ? IoIsOperationSynchronous(Irp) : TRUE;
FsRtlEnterFileSystem();
top_level = is_top_level(Irp);
if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
Status = vol_write(DeviceObject, Irp);
goto exit;
} else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
Status = STATUS_INVALID_PARAMETER;
goto end;
}
if (!fcb) {
ERR("fcb was NULL\n");
Status = STATUS_INVALID_PARAMETER;
goto end;
}
if (!ccb) {
ERR("ccb was NULL\n");
Status = STATUS_INVALID_PARAMETER;
goto end;
}
if (Irp->RequestorMode == UserMode && !(ccb->access & (FILE_WRITE_DATA | FILE_APPEND_DATA))) {
WARN("insufficient permissions\n");
Status = STATUS_ACCESS_DENIED;
goto end;
}
if (fcb == Vcb->volume_fcb) {
if (!Vcb->locked || Vcb->locked_fileobj != FileObject) {
ERR("trying to write to volume when not locked, or locked with another FileObject\n");
Status = STATUS_ACCESS_DENIED;
goto end;
}
TRACE("writing directly to volume\n");
IoSkipCurrentIrpStackLocation(Irp);
Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
goto exit;
}
if (is_subvol_readonly(fcb->subvol, Irp)) {
Status = STATUS_ACCESS_DENIED;
goto end;
}
if (Vcb->readonly) {
Status = STATUS_MEDIA_WRITE_PROTECTED;
goto end;
}
_SEH2_TRY {
if (IrpSp->MinorFunction & IRP_MN_COMPLETE) {
CcMdlWriteComplete(IrpSp->FileObject, &IrpSp->Parameters.Write.ByteOffset, Irp->MdlAddress);
Irp->MdlAddress = NULL;
Status = STATUS_SUCCESS;
} else {
// Don't offload jobs when doing paging IO - otherwise this can lead to
// deadlocks in CcCopyWrite.
if (Irp->Flags & IRP_PAGING_IO)
wait = TRUE;
Status = write_file(Vcb, Irp, wait, FALSE);
}
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
Status = _SEH2_GetExceptionCode();
} _SEH2_END;
end:
Irp->IoStatus.Status = Status;
TRACE("wrote %u bytes\n", Irp->IoStatus.Information);
if (Status != STATUS_PENDING)
IoCompleteRequest(Irp, IO_NO_INCREMENT);
else {
IoMarkIrpPending(Irp);
if (!add_thread_job(Vcb, Irp))
do_write_job(Vcb, Irp);
}
exit:
if (top_level)
IoSetTopLevelIrp(NULL);
TRACE("returning %08x\n", Status);
FsRtlExitFileSystem();
return Status;
}