mirror of
https://github.com/reactos/reactos.git
synced 2025-07-07 21:27:53 +00:00
6711 lines
230 KiB
C
6711 lines
230 KiB
C
/* Copyright (c) Mark Harmstone 2016
|
|
*
|
|
* This file is part of WinBtrfs.
|
|
*
|
|
* WinBtrfs is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Lesser General Public Licence as published by
|
|
* the Free Software Foundation, either version 3 of the Licence, or
|
|
* (at your option) any later version.
|
|
*
|
|
* WinBtrfs is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Lesser General Public Licence for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public Licence
|
|
* along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
|
|
|
|
#include "btrfs_drv.h"
|
|
|
|
#define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
|
|
|
|
// BOOL did_split;
|
|
BOOL chunk_test = FALSE;
|
|
|
|
typedef struct {
|
|
KEVENT Event;
|
|
IO_STATUS_BLOCK iosb;
|
|
} write_context;
|
|
|
|
typedef struct {
|
|
EXTENT_ITEM ei;
|
|
UINT8 type;
|
|
EXTENT_DATA_REF edr;
|
|
} EXTENT_ITEM_DATA_REF;
|
|
|
|
typedef struct {
|
|
EXTENT_ITEM_TREE eit;
|
|
UINT8 type;
|
|
TREE_BLOCK_REF tbr;
|
|
} EXTENT_ITEM_TREE2;
|
|
|
|
typedef struct {
|
|
EXTENT_ITEM ei;
|
|
UINT8 type;
|
|
TREE_BLOCK_REF tbr;
|
|
} EXTENT_ITEM_SKINNY_METADATA;
|
|
|
|
typedef struct {
|
|
CHUNK_ITEM ci;
|
|
CHUNK_ITEM_STRIPE stripes[1];
|
|
} CHUNK_ITEM2;
|
|
|
|
typedef struct {
|
|
LIST_ENTRY list_entry;
|
|
UINT64 key;
|
|
} ordered_list;
|
|
|
|
typedef struct {
|
|
ordered_list ol;
|
|
ULONG length;
|
|
UINT32* checksums;
|
|
BOOL deleted;
|
|
} changed_sector;
|
|
|
|
static NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback);
|
|
static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len);
|
|
static NTSTATUS convert_shared_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback);
|
|
|
|
static NTSTATUS STDCALL write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
|
|
write_context* context = conptr;
|
|
|
|
context->iosb = Irp->IoStatus;
|
|
KeSetEvent(&context->Event, 0, FALSE);
|
|
|
|
// return STATUS_SUCCESS;
|
|
return STATUS_MORE_PROCESSING_REQUIRED;
|
|
}
|
|
|
|
static NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length) {
|
|
NTSTATUS Status;
|
|
LARGE_INTEGER offset;
|
|
PIRP Irp;
|
|
PIO_STACK_LOCATION IrpSp;
|
|
write_context* context = NULL;
|
|
|
|
TRACE("(%p, %llx, %p, %x)\n", device, address, data, length);
|
|
|
|
context = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_context), ALLOC_TAG);
|
|
if (!context) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
RtlZeroMemory(context, sizeof(write_context));
|
|
|
|
KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
|
|
|
|
offset.QuadPart = address;
|
|
|
|
// Irp = IoBuildSynchronousFsdRequest(IRP_MJ_WRITE, Vcb->device, data, length, &offset, NULL, &context->iosb);
|
|
|
|
Irp = IoAllocateIrp(device->StackSize, FALSE);
|
|
|
|
if (!Irp) {
|
|
ERR("IoAllocateIrp failed\n");
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto exit2;
|
|
}
|
|
|
|
IrpSp = IoGetNextIrpStackLocation(Irp);
|
|
IrpSp->MajorFunction = IRP_MJ_WRITE;
|
|
|
|
if (device->Flags & DO_BUFFERED_IO) {
|
|
Irp->AssociatedIrp.SystemBuffer = data;
|
|
|
|
Irp->Flags = IRP_BUFFERED_IO;
|
|
} else if (device->Flags & DO_DIRECT_IO) {
|
|
Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
|
|
if (!Irp->MdlAddress) {
|
|
DbgPrint("IoAllocateMdl failed\n");
|
|
goto exit;
|
|
}
|
|
|
|
MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess);
|
|
} else {
|
|
Irp->UserBuffer = data;
|
|
}
|
|
|
|
IrpSp->Parameters.Write.Length = length;
|
|
IrpSp->Parameters.Write.ByteOffset = offset;
|
|
|
|
Irp->UserIosb = &context->iosb;
|
|
|
|
Irp->UserEvent = &context->Event;
|
|
|
|
IoSetCompletionRoutine(Irp, write_completion, context, TRUE, TRUE, TRUE);
|
|
|
|
// FIXME - support multiple devices
|
|
Status = IoCallDriver(device, Irp);
|
|
|
|
if (Status == STATUS_PENDING) {
|
|
KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
|
|
Status = context->iosb.Status;
|
|
}
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("IoCallDriver returned %08x\n", Status);
|
|
}
|
|
|
|
if (device->Flags & DO_DIRECT_IO) {
|
|
MmUnlockPages(Irp->MdlAddress);
|
|
IoFreeMdl(Irp->MdlAddress);
|
|
}
|
|
|
|
exit:
|
|
IoFreeIrp(Irp);
|
|
|
|
exit2:
|
|
if (context)
|
|
ExFreePool(context);
|
|
|
|
return Status;
|
|
}
|
|
|
|
static NTSTATUS STDCALL write_superblock(device_extension* Vcb, device* device) {
|
|
NTSTATUS Status;
|
|
unsigned int i = 0;
|
|
UINT32 crc32;
|
|
|
|
#ifdef __REACTOS__
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
#endif
|
|
|
|
// FIXME - work with RAID
|
|
|
|
// FIXME - only write one superblock if on SSD (?)
|
|
while (superblock_addrs[i] > 0 && Vcb->length >= superblock_addrs[i] + sizeof(superblock)) {
|
|
TRACE("writing superblock %u\n", i);
|
|
|
|
Vcb->superblock.sb_phys_addr = superblock_addrs[i];
|
|
RtlCopyMemory(&Vcb->superblock.dev_item, &device->devitem, sizeof(DEV_ITEM));
|
|
|
|
crc32 = calc_crc32c(0xffffffff, (UINT8*)&Vcb->superblock.uuid, (ULONG)sizeof(superblock) - sizeof(Vcb->superblock.checksum));
|
|
crc32 = ~crc32;
|
|
TRACE("crc32 is %08x\n", crc32);
|
|
RtlCopyMemory(&Vcb->superblock.checksum, &crc32, sizeof(UINT32));
|
|
|
|
Status = write_data_phys(device->devobj, superblock_addrs[i], &Vcb->superblock, sizeof(superblock));
|
|
|
|
if (!NT_SUCCESS(Status))
|
|
break;
|
|
|
|
i++;
|
|
}
|
|
|
|
return Status;
|
|
}
|
|
|
|
static BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) {
|
|
LIST_ENTRY* le;
|
|
space *s, *bestfit = NULL;
|
|
|
|
TRACE("(%p, %llx, %llx, %p)\n", Vcb, c->offset, length, address);
|
|
|
|
le = c->space.Flink;
|
|
while (le != &c->space) {
|
|
s = CONTAINING_RECORD(le, space, list_entry);
|
|
|
|
if (s->type == SPACE_TYPE_FREE) {
|
|
if (s->size == length) {
|
|
*address = s->offset;
|
|
TRACE("returning exact fit at %llx\n", s->offset);
|
|
return TRUE;
|
|
} else if (s->size > length && (!bestfit || bestfit->size > s->size)) {
|
|
bestfit = s;
|
|
}
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
if (bestfit) {
|
|
TRACE("returning best fit at %llx\n", bestfit->offset);
|
|
*address = bestfit->offset;
|
|
return TRUE;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
void add_to_space_list(chunk* c, UINT64 offset, UINT64 size, UINT8 type) {
|
|
LIST_ENTRY *le = c->space.Flink, *nextle, *insbef;
|
|
space *s, *s2, *s3;
|
|
#ifdef DEBUG_PARANOID
|
|
UINT64 lastaddr;
|
|
#endif
|
|
|
|
TRACE("(%p, %llx, %llx, %x)\n", c, offset, size, type);
|
|
|
|
#ifdef DEBUG_PARANOID
|
|
// TESTING
|
|
le = c->space.Flink;
|
|
while (le != &c->space) {
|
|
s = CONTAINING_RECORD(le, space, list_entry);
|
|
|
|
TRACE("%llx,%llx,%x\n", s->offset, s->size, s->type);
|
|
|
|
le = le->Flink;
|
|
}
|
|
#endif
|
|
|
|
c->space_changed = TRUE;
|
|
|
|
le = c->space.Flink;
|
|
insbef = &c->space;
|
|
while (le != &c->space) {
|
|
s = CONTAINING_RECORD(le, space, list_entry);
|
|
nextle = le->Flink;
|
|
|
|
if (s->offset >= offset + size) {
|
|
insbef = le;
|
|
break;
|
|
}
|
|
|
|
if (s->offset >= offset && s->offset + s->size <= offset + size) { // delete entirely
|
|
if (s->offset + s->size == offset + size) {
|
|
insbef = s->list_entry.Flink;
|
|
RemoveEntryList(&s->list_entry);
|
|
ExFreePool(s);
|
|
break;
|
|
}
|
|
|
|
RemoveEntryList(&s->list_entry);
|
|
ExFreePool(s);
|
|
} else if (s->offset < offset && s->offset + s->size > offset + size) { // split in two
|
|
s3 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
|
|
if (!s3) {
|
|
ERR("out of memory\n");
|
|
return;
|
|
}
|
|
|
|
s3->offset = offset + size;
|
|
s3->size = s->size - size - offset + s->offset;
|
|
s3->type = s->type;
|
|
InsertHeadList(&s->list_entry, &s3->list_entry);
|
|
insbef = &s3->list_entry;
|
|
|
|
s->size = offset - s->offset;
|
|
break;
|
|
} else if (s->offset + s->size > offset && s->offset + s->size <= offset + size) { // truncate before
|
|
s->size = offset - s->offset;
|
|
} else if (s->offset < offset + size && s->offset + s->size > offset + size) { // truncate after
|
|
s->size -= s->offset - offset + size;
|
|
s->offset = offset + size;
|
|
|
|
insbef = le;
|
|
break;
|
|
}
|
|
|
|
le = nextle;
|
|
}
|
|
|
|
s2 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
|
|
if (!s2) {
|
|
ERR("out of memory\n");
|
|
return;
|
|
}
|
|
|
|
s2->offset = offset;
|
|
s2->size = size;
|
|
s2->type = type;
|
|
InsertTailList(insbef, &s2->list_entry);
|
|
|
|
// merge entries if same type
|
|
|
|
if (s2->list_entry.Blink != &c->space) {
|
|
s = CONTAINING_RECORD(s2->list_entry.Blink, space, list_entry);
|
|
|
|
if (s->type == type) {
|
|
s->size += s2->size;
|
|
|
|
RemoveEntryList(&s2->list_entry);
|
|
ExFreePool(s2);
|
|
|
|
s2 = s;
|
|
}
|
|
}
|
|
|
|
if (s2->list_entry.Flink != &c->space) {
|
|
s = CONTAINING_RECORD(s2->list_entry.Flink, space, list_entry);
|
|
|
|
if (s->type == type) {
|
|
s2->size += s->size;
|
|
|
|
RemoveEntryList(&s->list_entry);
|
|
ExFreePool(s);
|
|
}
|
|
}
|
|
|
|
le = c->space.Flink;
|
|
while (le != &c->space) {
|
|
s = CONTAINING_RECORD(le, space, list_entry);
|
|
|
|
TRACE("%llx,%llx,%x\n", s->offset, s->size, s->type);
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
#ifdef DEBUG_PARANOID
|
|
// TESTING
|
|
lastaddr = c->offset;
|
|
le = c->space.Flink;
|
|
while (le != &c->space) {
|
|
s = CONTAINING_RECORD(le, space, list_entry);
|
|
|
|
if (s->offset != lastaddr) {
|
|
ERR("inconsistency detected!\n");
|
|
int3;
|
|
}
|
|
|
|
lastaddr = s->offset + s->size;
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
if (lastaddr != c->offset + c->chunk_item->size) {
|
|
ERR("inconsistency detected - space doesn't run all the way to end of chunk\n");
|
|
int3;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
chunk* get_chunk_from_address(device_extension* Vcb, UINT64 address) {
|
|
LIST_ENTRY* le2;
|
|
chunk* c;
|
|
|
|
le2 = Vcb->chunks.Flink;
|
|
while (le2 != &Vcb->chunks) {
|
|
c = CONTAINING_RECORD(le2, chunk, list_entry);
|
|
|
|
// TRACE("chunk: %llx, %llx\n", c->offset, c->chunk_item->size);
|
|
|
|
if (address >= c->offset && address < c->offset + c->chunk_item->size)
|
|
return c;
|
|
|
|
le2 = le2->Flink;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
typedef struct {
|
|
disk_hole* dh;
|
|
device* device;
|
|
} stripe;
|
|
|
|
static void add_provisional_disk_hole(device_extension* Vcb, stripe* s, UINT64 max_stripe_size) {
|
|
// LIST_ENTRY* le = s->device->disk_holes.Flink;
|
|
// disk_hole* dh;
|
|
|
|
// ERR("old holes:\n");
|
|
// while (le != &s->device->disk_holes) {
|
|
// dh = CONTAINING_RECORD(le, disk_hole, listentry);
|
|
//
|
|
// ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
|
|
//
|
|
// le = le->Flink;
|
|
// }
|
|
|
|
if (s->dh->size <= max_stripe_size) {
|
|
s->dh->provisional = TRUE;
|
|
} else {
|
|
disk_hole* newdh = ExAllocatePoolWithTag(PagedPool, sizeof(disk_hole), ALLOC_TAG);
|
|
if (!newdh) {
|
|
ERR("out of memory\n");
|
|
return;
|
|
}
|
|
|
|
newdh->address = s->dh->address + max_stripe_size;
|
|
newdh->size = s->dh->size - max_stripe_size;
|
|
newdh->provisional = FALSE;
|
|
InsertTailList(&s->device->disk_holes, &newdh->listentry);
|
|
|
|
s->dh->size = max_stripe_size;
|
|
s->dh->provisional = TRUE;
|
|
}
|
|
|
|
// ERR("new holes:\n");
|
|
// le = s->device->disk_holes.Flink;
|
|
// while (le != &s->device->disk_holes) {
|
|
// dh = CONTAINING_RECORD(le, disk_hole, listentry);
|
|
//
|
|
// ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
|
|
//
|
|
// le = le->Flink;
|
|
// }
|
|
}
|
|
|
|
static UINT64 find_new_chunk_address(device_extension* Vcb, UINT64 size) {
|
|
KEY searchkey;
|
|
traverse_ptr tp, next_tp;
|
|
BOOL b;
|
|
UINT64 lastaddr;
|
|
NTSTATUS Status;
|
|
|
|
searchkey.obj_id = 0x100;
|
|
searchkey.obj_type = TYPE_CHUNK_ITEM;
|
|
searchkey.offset = 0;
|
|
|
|
Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return 0xffffffffffffffff;
|
|
}
|
|
|
|
lastaddr = 0;
|
|
|
|
do {
|
|
if (tp.item->key.obj_type == TYPE_CHUNK_ITEM) {
|
|
if (tp.item->size < sizeof(CHUNK_ITEM)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM));
|
|
} else {
|
|
CHUNK_ITEM* ci = (CHUNK_ITEM*)tp.item->data;
|
|
|
|
if (tp.item->key.offset >= lastaddr + size) {
|
|
free_traverse_ptr(&tp);
|
|
return lastaddr;
|
|
}
|
|
|
|
lastaddr = tp.item->key.offset + ci->size;
|
|
}
|
|
}
|
|
|
|
b = find_next_item(Vcb, &tp, &next_tp, FALSE);
|
|
if (b) {
|
|
free_traverse_ptr(&tp);
|
|
tp = next_tp;
|
|
|
|
if (tp.item->key.obj_id > searchkey.obj_id || tp.item->key.obj_type > searchkey.obj_type)
|
|
break;
|
|
}
|
|
} while (b);
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
return lastaddr;
|
|
}
|
|
|
|
static BOOL increase_dev_item_used(device_extension* Vcb, device* device, UINT64 size, LIST_ENTRY* rollback) {
|
|
KEY searchkey;
|
|
traverse_ptr tp;
|
|
DEV_ITEM* di;
|
|
NTSTATUS Status;
|
|
|
|
searchkey.obj_id = 1;
|
|
searchkey.obj_type = TYPE_DEV_ITEM;
|
|
searchkey.offset = device->devitem.dev_id;
|
|
|
|
Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return FALSE;
|
|
}
|
|
|
|
if (keycmp(&tp.item->key, &searchkey)) {
|
|
ERR("error - could not find DEV_ITEM for device %llx\n", device->devitem.dev_id);
|
|
free_traverse_ptr(&tp);
|
|
return FALSE;
|
|
}
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
device->devitem.bytes_used += size;
|
|
|
|
di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
|
|
if (!di) {
|
|
ERR("out of memory\n");
|
|
return FALSE;
|
|
}
|
|
|
|
RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
return FALSE;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static void reset_disk_holes(device* device, BOOL commit) {
|
|
LIST_ENTRY* le = device->disk_holes.Flink;
|
|
disk_hole* dh;
|
|
|
|
// ERR("old holes:\n");
|
|
// while (le != &device->disk_holes) {
|
|
// dh = CONTAINING_RECORD(le, disk_hole, listentry);
|
|
//
|
|
// ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
|
|
//
|
|
// le = le->Flink;
|
|
// }
|
|
|
|
le = device->disk_holes.Flink;
|
|
while (le != &device->disk_holes) {
|
|
LIST_ENTRY* le2 = le->Flink;
|
|
|
|
dh = CONTAINING_RECORD(le, disk_hole, listentry);
|
|
|
|
if (dh->provisional) {
|
|
if (commit) {
|
|
RemoveEntryList(le);
|
|
ExFreePool(dh);
|
|
} else {
|
|
dh->provisional = FALSE;
|
|
}
|
|
}
|
|
|
|
le = le2;
|
|
}
|
|
|
|
if (!commit) {
|
|
le = device->disk_holes.Flink;
|
|
while (le != &device->disk_holes) {
|
|
LIST_ENTRY* le2 = le->Flink;
|
|
|
|
dh = CONTAINING_RECORD(le, disk_hole, listentry);
|
|
|
|
while (le2 != &device->disk_holes) {
|
|
disk_hole* dh2 = CONTAINING_RECORD(le2, disk_hole, listentry);
|
|
|
|
if (dh2->address == dh->address + dh->size) {
|
|
LIST_ENTRY* le3 = le2->Flink;
|
|
dh->size += dh2->size;
|
|
|
|
RemoveEntryList(le2);
|
|
ExFreePool(dh2);
|
|
|
|
le2 = le3;
|
|
} else
|
|
break;
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
}
|
|
|
|
// ERR("new holes:\n");
|
|
// le = device->disk_holes.Flink;
|
|
// while (le != &device->disk_holes) {
|
|
// dh = CONTAINING_RECORD(le, disk_hole, listentry);
|
|
//
|
|
// ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
|
|
//
|
|
// le = le->Flink;
|
|
// }
|
|
}
|
|
|
|
static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, ULONG size) {
|
|
sys_chunk *sc, *sc2;
|
|
LIST_ENTRY* le;
|
|
USHORT i;
|
|
|
|
if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
|
|
ERR("error - bootstrap is full\n");
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
|
|
if (!sc) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
sc->key.obj_id = obj_id;
|
|
sc->key.obj_type = obj_type;
|
|
sc->key.offset = offset;
|
|
sc->size = size;
|
|
sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
|
|
if (!sc->data) {
|
|
ERR("out of memory\n");
|
|
ExFreePool(sc);
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
RtlCopyMemory(sc->data, data, sc->size);
|
|
|
|
le = Vcb->sys_chunks.Flink;
|
|
while (le != &Vcb->sys_chunks) {
|
|
sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
|
|
|
|
if (keycmp(&sc2->key, &sc->key) == 1)
|
|
break;
|
|
|
|
le = le->Flink;
|
|
}
|
|
InsertTailList(le, &sc->list_entry);
|
|
|
|
Vcb->superblock.n += sizeof(KEY) + size;
|
|
|
|
i = 0;
|
|
le = Vcb->sys_chunks.Flink;
|
|
while (le != &Vcb->sys_chunks) {
|
|
sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
|
|
|
|
TRACE("%llx,%x,%llx\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
|
|
|
|
RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
|
|
i += sizeof(KEY);
|
|
|
|
RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
|
|
i += sc2->size;
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
static chunk* alloc_chunk(device_extension* Vcb, UINT64 flags, LIST_ENTRY* rollback) {
|
|
UINT64 max_stripe_size, max_chunk_size, stripe_size;
|
|
UINT64 total_size = 0, i, j, logaddr;
|
|
int num_stripes;
|
|
disk_hole* dh;
|
|
stripe* stripes;
|
|
ULONG cisize;
|
|
CHUNK_ITEM* ci;
|
|
CHUNK_ITEM_STRIPE* cis;
|
|
chunk* c = NULL;
|
|
space* s = NULL;
|
|
BOOL success = FALSE;
|
|
BLOCK_GROUP_ITEM* bgi;
|
|
|
|
for (i = 0; i < Vcb->superblock.num_devices; i++) {
|
|
total_size += Vcb->devices[i].devitem.num_bytes;
|
|
}
|
|
TRACE("total_size = %llx\n", total_size);
|
|
|
|
if (flags & BLOCK_FLAG_DATA) {
|
|
max_stripe_size = 0x40000000; // 1 GB
|
|
max_chunk_size = 10 * max_stripe_size;
|
|
} else if (flags & BLOCK_FLAG_METADATA) {
|
|
if (total_size > 0xC80000000) // 50 GB
|
|
max_stripe_size = 0x40000000; // 1 GB
|
|
else
|
|
max_stripe_size = 0x10000000; // 256 MB
|
|
|
|
max_chunk_size = max_stripe_size;
|
|
} else if (flags & BLOCK_FLAG_SYSTEM) {
|
|
max_stripe_size = 0x2000000; // 32 MB
|
|
max_chunk_size = 2 * max_stripe_size;
|
|
}
|
|
|
|
// FIXME - make sure whole number of sectors?
|
|
max_chunk_size = min(max_chunk_size, total_size / 10); // cap at 10%
|
|
|
|
TRACE("would allocate a new chunk of %llx bytes and stripe %llx\n", max_chunk_size, max_stripe_size);
|
|
|
|
if (flags & BLOCK_FLAG_DUPLICATE) {
|
|
num_stripes = 2;
|
|
} else if (flags & BLOCK_FLAG_RAID0) {
|
|
FIXME("RAID0 not yet supported\n");
|
|
return NULL;
|
|
} else if (flags & BLOCK_FLAG_RAID1) {
|
|
FIXME("RAID1 not yet supported\n");
|
|
return NULL;
|
|
} else if (flags & BLOCK_FLAG_RAID10) {
|
|
FIXME("RAID10 not yet supported\n");
|
|
return NULL;
|
|
} else if (flags & BLOCK_FLAG_RAID5) {
|
|
FIXME("RAID5 not yet supported\n");
|
|
return NULL;
|
|
} else if (flags & BLOCK_FLAG_RAID6) {
|
|
FIXME("RAID6 not yet supported\n");
|
|
return NULL;
|
|
} else { // SINGLE
|
|
num_stripes = 1;
|
|
}
|
|
|
|
stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * num_stripes, ALLOC_TAG);
|
|
if (!stripes) {
|
|
ERR("out of memory\n");
|
|
return NULL;
|
|
}
|
|
|
|
for (i = 0; i < num_stripes; i++) {
|
|
stripes[i].dh = NULL;
|
|
|
|
for (j = 0; j < Vcb->superblock.num_devices; j++) {
|
|
LIST_ENTRY* le = Vcb->devices[j].disk_holes.Flink;
|
|
|
|
while (le != &Vcb->devices[j].disk_holes) {
|
|
dh = CONTAINING_RECORD(le, disk_hole, listentry);
|
|
|
|
if (!dh->provisional) {
|
|
if (!stripes[i].dh || dh->size > stripes[i].dh->size) {
|
|
stripes[i].dh = dh;
|
|
stripes[i].device = &Vcb->devices[j];
|
|
|
|
if (stripes[i].dh->size >= max_stripe_size)
|
|
break;
|
|
}
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
if (stripes[i].dh && stripes[i].dh->size >= max_stripe_size)
|
|
break;
|
|
}
|
|
|
|
if (stripes[i].dh) {
|
|
TRACE("good DH: device %llx, address %llx, size %llx\n", stripes[i].device->devitem.dev_id, stripes[i].dh->address, stripes[i].dh->size);
|
|
} else {
|
|
TRACE("good DH not found\n");
|
|
goto end;
|
|
}
|
|
|
|
add_provisional_disk_hole(Vcb, &stripes[i], max_stripe_size);
|
|
}
|
|
|
|
stripe_size = min(stripes[0].dh->size, max_stripe_size);
|
|
for (i = 1; i < num_stripes; i++) {
|
|
stripe_size = min(stripe_size, stripes[1].dh->size);
|
|
}
|
|
// FIXME - make sure stripe_size aligned properly
|
|
// FIXME - obey max_chunk_size
|
|
|
|
c = ExAllocatePoolWithTag(PagedPool, sizeof(chunk), ALLOC_TAG);
|
|
if (!c) {
|
|
ERR("out of memory\n");
|
|
goto end;
|
|
}
|
|
|
|
// add CHUNK_ITEM to tree 3
|
|
|
|
cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE));
|
|
ci = ExAllocatePoolWithTag(PagedPool, cisize, ALLOC_TAG);
|
|
if (!ci) {
|
|
ERR("out of memory\n");
|
|
goto end;
|
|
}
|
|
|
|
ci->size = stripe_size; // FIXME for RAID
|
|
ci->root_id = Vcb->extent_root->id;
|
|
ci->stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
|
|
ci->type = flags;
|
|
ci->opt_io_alignment = ci->stripe_length;
|
|
ci->opt_io_width = ci->stripe_length;
|
|
ci->sector_size = stripes[0].device->devitem.minimal_io_size;
|
|
ci->num_stripes = num_stripes;
|
|
ci->sub_stripes = 1;
|
|
|
|
c->devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * num_stripes, ALLOC_TAG);
|
|
if (!c->devices) {
|
|
ERR("out of memory\n");
|
|
ExFreePool(ci);
|
|
goto end;
|
|
}
|
|
|
|
for (i = 0; i < num_stripes; i++) {
|
|
if (i == 0)
|
|
cis = (CHUNK_ITEM_STRIPE*)&ci[1];
|
|
else
|
|
cis = &cis[1];
|
|
|
|
cis->dev_id = stripes[i].device->devitem.dev_id;
|
|
cis->offset = stripes[i].dh->address;
|
|
cis->dev_uuid = stripes[i].device->devitem.device_uuid;
|
|
|
|
c->devices[i] = stripes[i].device;
|
|
}
|
|
|
|
logaddr = find_new_chunk_address(Vcb, ci->size);
|
|
if (logaddr == 0xffffffffffffffff) {
|
|
ERR("find_new_chunk_address failed\n");
|
|
ExFreePool(ci);
|
|
goto end;
|
|
}
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, logaddr, ci, cisize, NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(ci);
|
|
goto end;
|
|
}
|
|
|
|
if (flags & BLOCK_FLAG_SYSTEM) {
|
|
NTSTATUS Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, logaddr, ci, cisize);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("add_to_bootstrap returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
}
|
|
|
|
Vcb->superblock.chunk_root_generation = Vcb->superblock.generation;
|
|
|
|
c->chunk_item = ExAllocatePoolWithTag(PagedPool, cisize, ALLOC_TAG);
|
|
if (!c->chunk_item) {
|
|
ERR("out of memory\n");
|
|
goto end;
|
|
}
|
|
|
|
RtlCopyMemory(c->chunk_item, ci, cisize);
|
|
c->size = cisize;
|
|
c->offset = logaddr;
|
|
c->used = c->oldused = 0;
|
|
c->space_changed = FALSE;
|
|
InitializeListHead(&c->space);
|
|
|
|
s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
|
|
if (!s) {
|
|
ERR("out of memory\n");
|
|
goto end;
|
|
}
|
|
|
|
s->offset = c->offset;
|
|
s->size = c->chunk_item->size;
|
|
s->type = SPACE_TYPE_FREE;
|
|
InsertTailList(&c->space, &s->list_entry);
|
|
|
|
protect_superblocks(Vcb, c);
|
|
|
|
// add BLOCK_GROUP_ITEM to tree 2
|
|
|
|
bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
|
|
if (!bgi) {
|
|
ERR("out of memory\n");
|
|
goto end;
|
|
}
|
|
|
|
bgi->used = 0;
|
|
bgi->chunk_tree = 0x100;
|
|
bgi->flags = flags;
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->extent_root, logaddr, TYPE_BLOCK_GROUP_ITEM, ci->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(bgi);
|
|
goto end;
|
|
}
|
|
|
|
// add DEV_EXTENTs to tree 4
|
|
|
|
for (i = 0; i < num_stripes; i++) {
|
|
DEV_EXTENT* de;
|
|
|
|
de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
|
|
if (!de) {
|
|
ERR("out of memory\n");
|
|
goto end;
|
|
}
|
|
|
|
de->chunktree = Vcb->chunk_root->id;
|
|
de->objid = 0x100;
|
|
de->address = logaddr;
|
|
de->length = ci->size;
|
|
de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->dev_root, stripes[i].device->devitem.dev_id, TYPE_DEV_EXTENT, stripes[i].dh->address, de, sizeof(DEV_EXTENT), NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(de);
|
|
goto end;
|
|
}
|
|
|
|
if (!increase_dev_item_used(Vcb, stripes[i].device, ci->size, rollback)) {
|
|
ERR("increase_dev_item_used failed\n");
|
|
goto end;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < num_stripes; i++) {
|
|
BOOL b = FALSE;
|
|
for (j = 0; j < i; j++) {
|
|
if (stripes[j].device == stripes[i].device)
|
|
b = TRUE;
|
|
}
|
|
|
|
if (!b)
|
|
reset_disk_holes(stripes[i].device, TRUE);
|
|
}
|
|
|
|
success = TRUE;
|
|
|
|
end:
|
|
ExFreePool(stripes);
|
|
|
|
if (!success) {
|
|
for (i = 0; i < num_stripes; i++) {
|
|
BOOL b = FALSE;
|
|
for (j = 0; j < i; j++) {
|
|
if (stripes[j].device == stripes[i].device)
|
|
b = TRUE;
|
|
}
|
|
|
|
if (!b)
|
|
reset_disk_holes(stripes[i].device, FALSE);
|
|
}
|
|
|
|
if (c) ExFreePool(c);
|
|
if (s) ExFreePool(s);
|
|
} else
|
|
InsertTailList(&Vcb->chunks, &c->list_entry);
|
|
|
|
return success ? c : NULL;
|
|
}
|
|
|
|
static void decrease_chunk_usage(chunk* c, UINT64 delta) {
|
|
c->used -= delta;
|
|
|
|
TRACE("decreasing size of chunk %llx by %llx\n", c->offset, delta);
|
|
}
|
|
|
|
static void increase_chunk_usage(chunk* c, UINT64 delta) {
|
|
c->used += delta;
|
|
|
|
TRACE("increasing size of chunk %llx by %llx\n", c->offset, delta);
|
|
}
|
|
|
|
static NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, UINT32 length) {
|
|
KEY searchkey;
|
|
traverse_ptr tp;
|
|
CHUNK_ITEM2* ci;
|
|
NTSTATUS Status;
|
|
UINT32 i;
|
|
|
|
TRACE("(%p, %llx, %p, %x)\n", Vcb, address, data, length);
|
|
|
|
// FIXME - use version cached in Vcb
|
|
|
|
searchkey.obj_id = 0x100; // fixed?
|
|
searchkey.obj_type = TYPE_CHUNK_ITEM;
|
|
searchkey.offset = address;
|
|
|
|
Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
|
|
ERR("error - unexpected item in chunk tree\n");
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
if (tp.item->size < sizeof(CHUNK_ITEM2)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM2));
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
ci = (CHUNK_ITEM2*)tp.item->data;
|
|
|
|
if (tp.item->key.offset > address || tp.item->key.offset + ci->ci.size < address) {
|
|
ERR("error - address %llx was out of chunk bounds\n", address);
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
// FIXME - only do this for chunks marked DUPLICATE?
|
|
// FIXME - for multiple writes, if PENDING do waits at the end
|
|
// FIXME - work with RAID
|
|
for (i = 0; i < ci->ci.num_stripes; i++) {
|
|
Status = write_data_phys(Vcb->devices[0].devobj, address - tp.item->key.offset + ci->stripes[i].offset, data, length);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - write_data_phys failed\n");
|
|
goto end;
|
|
}
|
|
}
|
|
|
|
end:
|
|
free_traverse_ptr(&tp);
|
|
|
|
return Status;
|
|
}
|
|
|
|
static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
|
|
LIST_ENTRY *le, *nextle;
|
|
space *s, *s2;
|
|
|
|
// // TESTING
|
|
// le = c->space.Flink;
|
|
// while (le != &c->space) {
|
|
// s = CONTAINING_RECORD(le, space, list_entry);
|
|
//
|
|
// TRACE("%x,%x,%x\n", (UINT32)s->offset, (UINT32)s->size, s->type);
|
|
//
|
|
// le = le->Flink;
|
|
// }
|
|
|
|
le = c->space.Flink;
|
|
while (le != &c->space) {
|
|
s = CONTAINING_RECORD(le, space, list_entry);
|
|
nextle = le->Flink;
|
|
|
|
if (s->type == SPACE_TYPE_DELETING)
|
|
s->type = SPACE_TYPE_FREE;
|
|
else if (s->type == SPACE_TYPE_WRITING)
|
|
s->type = SPACE_TYPE_USED;
|
|
|
|
if (le->Blink != &c->space) {
|
|
s2 = CONTAINING_RECORD(le->Blink, space, list_entry);
|
|
|
|
if (s2->type == s->type) { // do merge
|
|
s2->size += s->size;
|
|
|
|
RemoveEntryList(&s->list_entry);
|
|
ExFreePool(s);
|
|
}
|
|
}
|
|
|
|
le = nextle;
|
|
}
|
|
|
|
// le = c->space.Flink;
|
|
// while (le != &c->space) {
|
|
// s = CONTAINING_RECORD(le, space, list_entry);
|
|
//
|
|
// TRACE("%x,%x,%x\n", (UINT32)s->offset, (UINT32)s->size, s->type);
|
|
//
|
|
// le = le->Flink;
|
|
// }
|
|
}
|
|
|
|
static void clean_space_cache(device_extension* Vcb) {
|
|
LIST_ENTRY* le;
|
|
chunk* c;
|
|
|
|
TRACE("(%p)\n", Vcb);
|
|
|
|
le = Vcb->chunks.Flink;
|
|
while (le != &Vcb->chunks) {
|
|
c = CONTAINING_RECORD(le, chunk, list_entry);
|
|
|
|
if (c->space_changed) {
|
|
clean_space_cache_chunk(Vcb, c);
|
|
c->space_changed = FALSE;
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
}
|
|
|
|
static BOOL trees_consistent(device_extension* Vcb) {
|
|
ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
|
|
LIST_ENTRY* le;
|
|
|
|
le = Vcb->tree_cache.Flink;
|
|
while (le != &Vcb->tree_cache) {
|
|
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
|
|
|
|
if (tc2->write) {
|
|
if (tc2->tree->header.num_items == 0 && tc2->tree->parent)
|
|
return FALSE;
|
|
|
|
if (tc2->tree->size > maxsize)
|
|
return FALSE;
|
|
|
|
if (!tc2->tree->has_new_address)
|
|
return FALSE;
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static NTSTATUS add_parents(device_extension* Vcb, LIST_ENTRY* rollback) {
|
|
LIST_ENTRY* le;
|
|
NTSTATUS Status;
|
|
|
|
le = Vcb->tree_cache.Flink;
|
|
while (le != &Vcb->tree_cache) {
|
|
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
|
|
|
|
if (tc2->write) {
|
|
if (tc2->tree->parent)
|
|
add_to_tree_cache(Vcb, tc2->tree->parent, TRUE);
|
|
else if (tc2->tree->root != Vcb->chunk_root && tc2->tree->root != Vcb->root_root) {
|
|
KEY searchkey;
|
|
traverse_ptr tp;
|
|
|
|
searchkey.obj_id = tc2->tree->root->id;
|
|
searchkey.obj_type = TYPE_ROOT_ITEM;
|
|
searchkey.offset = 0xffffffffffffffff;
|
|
|
|
Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
|
|
ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
|
|
ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
|
|
if (!ri) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
if (tp.item->size > 0)
|
|
RtlCopyMemory(ri, tp.item->data, tp.item->size);
|
|
|
|
RtlZeroMemory(((UINT8*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, 0, ri, sizeof(ROOT_ITEM), NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
} else {
|
|
add_to_tree_cache(Vcb, tp.tree, TRUE);
|
|
}
|
|
|
|
free_traverse_ptr(&tp);
|
|
}
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
void print_trees(LIST_ENTRY* tc) {
|
|
LIST_ENTRY *le, *le2;
|
|
|
|
le = tc->Flink;
|
|
while (le != tc) {
|
|
KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc};
|
|
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
|
|
UINT32 num_items = 0;
|
|
|
|
le2 = tc2->tree->itemlist.Flink;
|
|
while (le2 != &tc2->tree->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
|
|
if (!td->ignore) {
|
|
firstitem = td->key;
|
|
num_items++;
|
|
}
|
|
le2 = le2->Flink;
|
|
}
|
|
|
|
ERR("tree: root %llx, first key %llx,%x,%llx, level %x, num_items %x / %x\n",
|
|
tc2->tree->header.tree_id, firstitem.obj_id, firstitem.obj_type, firstitem.offset, tc2->tree->header.level, num_items, tc2->tree->header.num_items);
|
|
|
|
le = le->Flink;
|
|
}
|
|
}
|
|
|
|
static void add_parents_to_cache(device_extension* Vcb, tree* t) {
|
|
KEY searchkey;
|
|
traverse_ptr tp;
|
|
NTSTATUS Status;
|
|
|
|
while (t->parent) {
|
|
t = t->parent;
|
|
|
|
add_to_tree_cache(Vcb, t, TRUE);
|
|
}
|
|
|
|
if (t->root == Vcb->root_root || t->root == Vcb->chunk_root)
|
|
return;
|
|
|
|
searchkey.obj_id = t->root->id;
|
|
searchkey.obj_type = TYPE_ROOT_ITEM;
|
|
searchkey.offset = 0xffffffffffffffff;
|
|
|
|
Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return;
|
|
}
|
|
|
|
if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
|
|
ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
|
|
free_traverse_ptr(&tp);
|
|
return;
|
|
}
|
|
|
|
add_to_tree_cache(Vcb, tp.tree, TRUE);
|
|
|
|
free_traverse_ptr(&tp);
|
|
}
|
|
|
|
static BOOL insert_tree_extent_skinny(device_extension* Vcb, tree* t, chunk* c, UINT64 address, LIST_ENTRY* rollback) {
|
|
EXTENT_ITEM_SKINNY_METADATA* eism;
|
|
traverse_ptr insert_tp;
|
|
|
|
eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
|
|
if (!eism) {
|
|
ERR("out of memory\n");
|
|
return FALSE;
|
|
}
|
|
|
|
eism->ei.refcount = 1;
|
|
eism->ei.generation = Vcb->superblock.generation;
|
|
eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
|
|
eism->type = TYPE_TREE_BLOCK_REF;
|
|
eism->tbr.offset = t->header.tree_id;
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, t->header.level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(eism);
|
|
return FALSE;
|
|
}
|
|
|
|
add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_WRITING);
|
|
|
|
// add_to_tree_cache(tc, insert_tp.tree, TRUE);
|
|
add_parents_to_cache(Vcb, insert_tp.tree);
|
|
|
|
free_traverse_ptr(&insert_tp);
|
|
|
|
t->new_address = address;
|
|
t->has_new_address = TRUE;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static BOOL insert_tree_extent(device_extension* Vcb, tree* t, chunk* c, LIST_ENTRY* rollback) {
|
|
UINT64 address;
|
|
EXTENT_ITEM_TREE2* eit2;
|
|
traverse_ptr insert_tp;
|
|
|
|
TRACE("(%p, %p, %p, %p)\n", Vcb, t, c, rollback);
|
|
|
|
if (!find_address_in_chunk(Vcb, c, Vcb->superblock.node_size, &address))
|
|
return FALSE;
|
|
|
|
if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)
|
|
return insert_tree_extent_skinny(Vcb, t, c, address, rollback);
|
|
|
|
eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
|
|
if (!eit2) {
|
|
ERR("out of memory\n");
|
|
return FALSE;
|
|
}
|
|
|
|
eit2->eit.extent_item.refcount = 1;
|
|
eit2->eit.extent_item.generation = Vcb->superblock.generation;
|
|
eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
|
|
// eit2->eit.firstitem = wt->firstitem;
|
|
eit2->eit.level = t->header.level;
|
|
eit2->type = TYPE_TREE_BLOCK_REF;
|
|
eit2->tbr.offset = t->header.tree_id;
|
|
|
|
// #ifdef DEBUG_PARANOID
|
|
// if (wt->firstitem.obj_type == 0xcc) { // TESTING
|
|
// ERR("error - firstitem not set (wt = %p, tree = %p, address = %x)\n", wt, wt->tree, (UINT32)address);
|
|
// ERR("num_items = %u, level = %u, root = %x, delete = %u\n", wt->tree->header.num_items, wt->tree->header.level, (UINT32)wt->tree->root->id, wt->delete);
|
|
// int3;
|
|
// }
|
|
// #endif
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(eit2);
|
|
return FALSE;
|
|
}
|
|
|
|
add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_WRITING);
|
|
|
|
// add_to_tree_cache(tc, insert_tp.tree, TRUE);
|
|
add_parents_to_cache(Vcb, insert_tp.tree);
|
|
|
|
free_traverse_ptr(&insert_tp);
|
|
|
|
t->new_address = address;
|
|
t->has_new_address = TRUE;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, LIST_ENTRY* rollback) {
|
|
chunk *origchunk = NULL, *c;
|
|
LIST_ENTRY* le;
|
|
UINT64 flags = t->flags;
|
|
|
|
if (flags == 0)
|
|
flags = (t->root->id == BTRFS_ROOT_CHUNK ? BLOCK_FLAG_SYSTEM : BLOCK_FLAG_METADATA) | BLOCK_FLAG_DUPLICATE;
|
|
|
|
// TRACE("flags = %x\n", (UINT32)wt->flags);
|
|
|
|
// if (!chunk_test) { // TESTING
|
|
// if ((c = alloc_chunk(Vcb, flags))) {
|
|
// if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
|
|
// if (insert_tree_extent(Vcb, t, c)) {
|
|
// chunk_test = TRUE;
|
|
// return STATUS_SUCCESS;
|
|
// }
|
|
// }
|
|
// }
|
|
// }
|
|
|
|
if (t->has_address) {
|
|
origchunk = get_chunk_from_address(Vcb, t->header.address);
|
|
|
|
if (insert_tree_extent(Vcb, t, origchunk, rollback))
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
le = Vcb->chunks.Flink;
|
|
while (le != &Vcb->chunks) {
|
|
c = CONTAINING_RECORD(le, chunk, list_entry);
|
|
|
|
// FIXME - make sure to avoid superblocks
|
|
|
|
if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
|
|
if (insert_tree_extent(Vcb, t, c, rollback))
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
// allocate new chunk if necessary
|
|
if ((c = alloc_chunk(Vcb, flags, rollback))) {
|
|
if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
|
|
if (insert_tree_extent(Vcb, t, c, rollback))
|
|
return STATUS_SUCCESS;
|
|
}
|
|
}
|
|
|
|
ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
|
|
|
|
return STATUS_DISK_FULL;
|
|
}
|
|
|
|
static BOOL reduce_tree_extent_skinny(device_extension* Vcb, UINT64 address, tree* t, LIST_ENTRY* rollback) {
|
|
KEY searchkey;
|
|
traverse_ptr tp;
|
|
chunk* c;
|
|
EXTENT_ITEM_SKINNY_METADATA* eism;
|
|
NTSTATUS Status;
|
|
|
|
searchkey.obj_id = address;
|
|
searchkey.obj_type = TYPE_METADATA_ITEM;
|
|
searchkey.offset = 0xffffffffffffffff;
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return FALSE;
|
|
}
|
|
|
|
if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
|
|
TRACE("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
|
|
free_traverse_ptr(&tp);
|
|
return FALSE;
|
|
}
|
|
|
|
if (tp.item->size < sizeof(EXTENT_ITEM_SKINNY_METADATA)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_SKINNY_METADATA));
|
|
free_traverse_ptr(&tp);
|
|
return FALSE;
|
|
}
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
eism = (EXTENT_ITEM_SKINNY_METADATA*)tp.item->data;
|
|
if (t->header.level == 0 && eism->ei.flags & EXTENT_ITEM_SHARED_BACKREFS && eism->type == TYPE_TREE_BLOCK_REF) {
|
|
// convert shared data extents
|
|
|
|
LIST_ENTRY* le = t->itemlist.Flink;
|
|
while (le != &t->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
|
|
|
|
TRACE("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
|
|
|
|
if (!td->ignore && !td->inserted) {
|
|
if (td->key.obj_type == TYPE_EXTENT_DATA) {
|
|
EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
|
|
|
|
if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
|
|
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
|
|
|
|
if (ed2->address != 0) {
|
|
TRACE("trying to convert shared data extent %llx,%llx\n", ed2->address, ed2->size);
|
|
convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
|
|
}
|
|
|
|
c = get_chunk_from_address(Vcb, address);
|
|
|
|
if (c) {
|
|
decrease_chunk_usage(c, Vcb->superblock.node_size);
|
|
|
|
add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_DELETING);
|
|
} else
|
|
ERR("could not find chunk for address %llx\n", address);
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
// TESTING
|
|
// static void check_tree_num_items(tree* t) {
|
|
// LIST_ENTRY* le2;
|
|
// UINT32 ni;
|
|
//
|
|
// le2 = t->itemlist.Flink;
|
|
// ni = 0;
|
|
// while (le2 != &t->itemlist) {
|
|
// tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
|
|
// if (!td->ignore)
|
|
// ni++;
|
|
// le2 = le2->Flink;
|
|
// }
|
|
//
|
|
// if (t->header.num_items != ni) {
|
|
// ERR("tree %p not okay: num_items was %x, expecting %x\n", t, ni, t->header.num_items);
|
|
// int3;
|
|
// } else {
|
|
// ERR("tree %p okay\n", t);
|
|
// }
|
|
// }
|
|
//
|
|
// static void check_trees_num_items(LIST_ENTRY* tc) {
|
|
// LIST_ENTRY* le = tc->Flink;
|
|
// while (le != tc) {
|
|
// tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
|
|
//
|
|
// check_tree_num_items(tc2->tree);
|
|
//
|
|
// le = le->Flink;
|
|
// }
|
|
// }
|
|
|
|
static void convert_old_tree_extent(device_extension* Vcb, tree_data* td, tree* t, LIST_ENTRY* rollback) {
|
|
KEY searchkey;
|
|
traverse_ptr tp, tp2, insert_tp;
|
|
EXTENT_REF_V0* erv0;
|
|
NTSTATUS Status;
|
|
|
|
TRACE("(%p, %p, %p)\n", Vcb, td, t);
|
|
|
|
searchkey.obj_id = td->treeholder.address;
|
|
searchkey.obj_type = TYPE_EXTENT_REF_V0;
|
|
searchkey.offset = 0xffffffffffffffff;
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return;
|
|
}
|
|
|
|
if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
|
|
TRACE("could not find EXTENT_REF_V0 for %llx\n", searchkey.obj_id);
|
|
free_traverse_ptr(&tp);
|
|
return;
|
|
}
|
|
|
|
searchkey.obj_id = td->treeholder.address;
|
|
searchkey.obj_type = TYPE_EXTENT_ITEM;
|
|
searchkey.offset = Vcb->superblock.node_size;
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
free_traverse_ptr(&tp);
|
|
return;
|
|
}
|
|
|
|
if (keycmp(&searchkey, &tp2.item->key)) {
|
|
ERR("could not find %llx,%x,%llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
|
|
free_traverse_ptr(&tp2);
|
|
free_traverse_ptr(&tp);
|
|
return;
|
|
}
|
|
|
|
if (tp.item->size < sizeof(EXTENT_REF_V0)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_REF_V0));
|
|
free_traverse_ptr(&tp2);
|
|
free_traverse_ptr(&tp);
|
|
return;
|
|
}
|
|
|
|
erv0 = (EXTENT_REF_V0*)tp.item->data;
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
delete_tree_item(Vcb, &tp2, rollback);
|
|
|
|
if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
|
|
EXTENT_ITEM_SKINNY_METADATA* eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
|
|
|
|
if (!eism) {
|
|
ERR("out of memory\n");
|
|
free_traverse_ptr(&tp2);
|
|
free_traverse_ptr(&tp);
|
|
return;
|
|
}
|
|
|
|
eism->ei.refcount = 1;
|
|
eism->ei.generation = erv0->gen;
|
|
eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
|
|
eism->type = TYPE_TREE_BLOCK_REF;
|
|
eism->tbr.offset = t->header.tree_id;
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_METADATA_ITEM, t->header.level -1, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
free_traverse_ptr(&tp2);
|
|
free_traverse_ptr(&tp);
|
|
return;
|
|
}
|
|
} else {
|
|
EXTENT_ITEM_TREE2* eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
|
|
|
|
if (!eit2) {
|
|
ERR("out of memory\n");
|
|
free_traverse_ptr(&tp2);
|
|
free_traverse_ptr(&tp);
|
|
return;
|
|
}
|
|
|
|
eit2->eit.extent_item.refcount = 1;
|
|
eit2->eit.extent_item.generation = erv0->gen;
|
|
eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
|
|
eit2->eit.firstitem = td->key;
|
|
eit2->eit.level = t->header.level - 1;
|
|
eit2->type = TYPE_TREE_BLOCK_REF;
|
|
eit2->tbr.offset = t->header.tree_id;
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
free_traverse_ptr(&tp2);
|
|
free_traverse_ptr(&tp);
|
|
return;
|
|
}
|
|
}
|
|
|
|
// add_to_tree_cache(tc, insert_tp.tree, TRUE);
|
|
add_parents_to_cache(Vcb, insert_tp.tree);
|
|
add_parents_to_cache(Vcb, tp.tree);
|
|
add_parents_to_cache(Vcb, tp2.tree);
|
|
|
|
free_traverse_ptr(&insert_tp);
|
|
|
|
free_traverse_ptr(&tp2);
|
|
free_traverse_ptr(&tp);
|
|
}
|
|
|
|
static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, LIST_ENTRY* rollback) {
|
|
KEY searchkey;
|
|
traverse_ptr tp;
|
|
EXTENT_ITEM* ei;
|
|
EXTENT_ITEM_V0* eiv0;
|
|
chunk* c;
|
|
NTSTATUS Status;
|
|
|
|
// FIXME - deal with refcounts > 1
|
|
|
|
TRACE("(%p, %llx, %p)\n", Vcb, address, t);
|
|
|
|
if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
|
|
if (reduce_tree_extent_skinny(Vcb, address, t, rollback)) {
|
|
return STATUS_SUCCESS;
|
|
}
|
|
}
|
|
|
|
searchkey.obj_id = address;
|
|
searchkey.obj_type = TYPE_EXTENT_ITEM;
|
|
searchkey.offset = Vcb->superblock.node_size;
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (keycmp(&tp.item->key, &searchkey)) {
|
|
ERR("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
|
|
int3;
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
|
|
eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
|
|
|
|
if (eiv0->refcount > 1) {
|
|
FIXME("FIXME - cannot deal with refcounts larger than 1 at present (eiv0->refcount == %llx)\n", eiv0->refcount);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
} else {
|
|
if (tp.item->size < sizeof(EXTENT_ITEM)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
ei = (EXTENT_ITEM*)tp.item->data;
|
|
|
|
if (ei->refcount > 1) {
|
|
FIXME("FIXME - cannot deal with refcounts larger than 1 at present (ei->refcount == %llx)\n", ei->refcount);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
if (t->header.level == 0 && ei->flags & EXTENT_ITEM_SHARED_BACKREFS) {
|
|
// convert shared data extents
|
|
|
|
LIST_ENTRY* le = t->itemlist.Flink;
|
|
while (le != &t->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
|
|
|
|
TRACE("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
|
|
|
|
if (!td->ignore && !td->inserted) {
|
|
if (td->key.obj_type == TYPE_EXTENT_DATA) {
|
|
EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
|
|
|
|
if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
|
|
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
|
|
|
|
if (ed2->address != 0) {
|
|
TRACE("trying to convert shared data extent %llx,%llx\n", ed2->address, ed2->size);
|
|
convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
|
|
}
|
|
}
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
// if EXTENT_ITEM_V0, delete corresponding B4 item
|
|
if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
|
|
traverse_ptr tp2;
|
|
|
|
searchkey.obj_id = address;
|
|
searchkey.obj_type = TYPE_EXTENT_REF_V0;
|
|
searchkey.offset = 0xffffffffffffffff;
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
free_traverse_ptr(&tp);
|
|
return Status;
|
|
}
|
|
|
|
if (tp2.item->key.obj_id == searchkey.obj_id && tp2.item->key.obj_type == searchkey.obj_type) {
|
|
delete_tree_item(Vcb, &tp2, rollback);
|
|
}
|
|
free_traverse_ptr(&tp2);
|
|
}
|
|
|
|
if (!(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
|
|
LIST_ENTRY* le;
|
|
|
|
// when writing old internal trees, convert related extents
|
|
|
|
le = t->itemlist.Flink;
|
|
while (le != &t->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
|
|
|
|
// ERR("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
|
|
|
|
if (!td->ignore && !td->inserted) {
|
|
if (t->header.level > 0) {
|
|
convert_old_tree_extent(Vcb, td, t, rollback);
|
|
} else if (td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA)) {
|
|
EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
|
|
|
|
if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
|
|
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
|
|
|
|
if (ed2->address != 0) {
|
|
TRACE("trying to convert old data extent %llx,%llx\n", ed2->address, ed2->size);
|
|
convert_old_data_extent(Vcb, ed2->address, ed2->size, rollback);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
}
|
|
|
|
c = get_chunk_from_address(Vcb, address);
|
|
|
|
if (c) {
|
|
decrease_chunk_usage(c, tp.item->key.offset);
|
|
|
|
add_to_space_list(c, address, tp.item->key.offset, SPACE_TYPE_DELETING);
|
|
} else
|
|
ERR("could not find chunk for address %llx\n", address);
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
static NTSTATUS allocate_tree_extents(device_extension* Vcb, LIST_ENTRY* rollback) {
|
|
LIST_ENTRY* le;
|
|
NTSTATUS Status;
|
|
|
|
TRACE("(%p)\n", Vcb);
|
|
|
|
le = Vcb->tree_cache.Flink;
|
|
while (le != &Vcb->tree_cache) {
|
|
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
|
|
|
|
if (tc2->write && !tc2->tree->has_new_address) {
|
|
chunk* c;
|
|
|
|
Status = get_tree_new_address(Vcb, tc2->tree, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("get_tree_new_address returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
TRACE("allocated extent %llx\n", tc2->tree->new_address);
|
|
|
|
if (tc2->tree->has_address) {
|
|
Status = reduce_tree_extent(Vcb, tc2->tree->header.address, tc2->tree, rollback);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("reduce_tree_extent returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
}
|
|
|
|
c = get_chunk_from_address(Vcb, tc2->tree->new_address);
|
|
|
|
if (c) {
|
|
increase_chunk_usage(c, Vcb->superblock.node_size);
|
|
} else {
|
|
ERR("could not find chunk for address %llx\n", tc2->tree->new_address);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
static NTSTATUS update_root_root(device_extension* Vcb, LIST_ENTRY* rollback) {
|
|
LIST_ENTRY* le;
|
|
NTSTATUS Status;
|
|
|
|
TRACE("(%p)\n", Vcb);
|
|
|
|
le = Vcb->tree_cache.Flink;
|
|
while (le != &Vcb->tree_cache) {
|
|
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
|
|
|
|
if (tc2->write && !tc2->tree->parent) {
|
|
if (tc2->tree->root != Vcb->root_root && tc2->tree->root != Vcb->chunk_root) {
|
|
KEY searchkey;
|
|
traverse_ptr tp;
|
|
|
|
searchkey.obj_id = tc2->tree->root->id;
|
|
searchkey.obj_type = TYPE_ROOT_ITEM;
|
|
searchkey.offset = 0xffffffffffffffff;
|
|
|
|
Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
|
|
ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
TRACE("updating the address for root %llx to %llx\n", searchkey.obj_id, tc2->tree->new_address);
|
|
|
|
tc2->tree->root->root_item.block_number = tc2->tree->new_address;
|
|
tc2->tree->root->root_item.root_level = tc2->tree->header.level;
|
|
tc2->tree->root->root_item.generation = Vcb->superblock.generation;
|
|
tc2->tree->root->root_item.generation2 = Vcb->superblock.generation;
|
|
|
|
if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
|
|
ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
|
|
|
|
if (!ri) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
RtlCopyMemory(ri, &tc2->tree->root->root_item, sizeof(ROOT_ITEM));
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, 0, ri, sizeof(ROOT_ITEM), NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
} else
|
|
RtlCopyMemory(tp.item->data, &tc2->tree->root->root_item, sizeof(ROOT_ITEM));
|
|
|
|
free_traverse_ptr(&tp);
|
|
}
|
|
|
|
tc2->tree->root->treeholder.address = tc2->tree->new_address;
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
enum write_tree_status {
|
|
WriteTreeStatus_Pending,
|
|
WriteTreeStatus_Success,
|
|
WriteTreeStatus_Error,
|
|
WriteTreeStatus_Cancelling,
|
|
WriteTreeStatus_Cancelled
|
|
};
|
|
|
|
struct write_tree_context;
|
|
|
|
typedef struct {
|
|
struct write_tree_context* context;
|
|
UINT8* buf;
|
|
device* device;
|
|
PIRP Irp;
|
|
IO_STATUS_BLOCK iosb;
|
|
enum write_tree_status status;
|
|
LIST_ENTRY list_entry;
|
|
} write_tree_stripe;
|
|
|
|
typedef struct {
|
|
KEVENT Event;
|
|
LIST_ENTRY stripes;
|
|
} write_tree_context;
|
|
|
|
static NTSTATUS STDCALL write_tree_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
|
|
write_tree_stripe* stripe = conptr;
|
|
write_tree_context* context = (write_tree_context*)stripe->context;
|
|
LIST_ENTRY* le;
|
|
BOOL complete;
|
|
|
|
if (stripe->status == WriteTreeStatus_Cancelling) {
|
|
stripe->status = WriteTreeStatus_Cancelled;
|
|
goto end;
|
|
}
|
|
|
|
stripe->iosb = Irp->IoStatus;
|
|
|
|
if (NT_SUCCESS(Irp->IoStatus.Status)) {
|
|
stripe->status = WriteTreeStatus_Success;
|
|
} else {
|
|
le = context->stripes.Flink;
|
|
|
|
stripe->status = WriteTreeStatus_Error;
|
|
|
|
while (le != &context->stripes) {
|
|
write_tree_stripe* s2 = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
|
|
|
|
if (s2->status == WriteTreeStatus_Pending) {
|
|
s2->status = WriteTreeStatus_Cancelling;
|
|
IoCancelIrp(s2->Irp);
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
}
|
|
|
|
end:
|
|
le = context->stripes.Flink;
|
|
complete = TRUE;
|
|
|
|
while (le != &context->stripes) {
|
|
write_tree_stripe* s2 = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
|
|
|
|
if (s2->status == WriteTreeStatus_Pending || s2->status == WriteTreeStatus_Cancelling) {
|
|
complete = FALSE;
|
|
break;
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
if (complete)
|
|
KeSetEvent(&context->Event, 0, FALSE);
|
|
|
|
return STATUS_MORE_PROCESSING_REQUIRED;
|
|
}
|
|
|
|
static NTSTATUS write_tree(device_extension* Vcb, UINT64 addr, UINT8* data, write_tree_context* wtc) {
|
|
chunk* c;
|
|
CHUNK_ITEM_STRIPE* cis;
|
|
write_tree_stripe* stripe;
|
|
UINT64 i;
|
|
|
|
c = get_chunk_from_address(Vcb, addr);
|
|
|
|
if (!c) {
|
|
ERR("get_chunk_from_address failed\n");
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
|
|
|
|
// FIXME - make this work with RAID
|
|
|
|
for (i = 0; i < c->chunk_item->num_stripes; i++) {
|
|
PIO_STACK_LOCATION IrpSp;
|
|
|
|
// FIXME - handle missing devices
|
|
|
|
stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_tree_stripe), ALLOC_TAG);
|
|
if (!stripe) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
stripe->context = (struct write_tree_context*)wtc;
|
|
stripe->buf = data;
|
|
stripe->device = c->devices[i];
|
|
RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
|
|
stripe->status = WriteTreeStatus_Pending;
|
|
|
|
stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, FALSE);
|
|
|
|
if (!stripe->Irp) {
|
|
ERR("IoAllocateIrp failed\n");
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
|
|
IrpSp->MajorFunction = IRP_MJ_WRITE;
|
|
|
|
if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
|
|
stripe->Irp->AssociatedIrp.SystemBuffer = data;
|
|
|
|
stripe->Irp->Flags = IRP_BUFFERED_IO;
|
|
} else if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
|
|
stripe->Irp->MdlAddress = IoAllocateMdl(data, Vcb->superblock.node_size, FALSE, FALSE, NULL);
|
|
if (!stripe->Irp->MdlAddress) {
|
|
ERR("IoAllocateMdl failed\n");
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
MmProbeAndLockPages(stripe->Irp->MdlAddress, KernelMode, IoWriteAccess);
|
|
} else {
|
|
stripe->Irp->UserBuffer = data;
|
|
}
|
|
|
|
IrpSp->Parameters.Write.Length = Vcb->superblock.node_size;
|
|
IrpSp->Parameters.Write.ByteOffset.QuadPart = addr - c->offset + cis[i].offset;
|
|
|
|
stripe->Irp->UserIosb = &stripe->iosb;
|
|
|
|
IoSetCompletionRoutine(stripe->Irp, write_tree_completion, stripe, TRUE, TRUE, TRUE);
|
|
|
|
InsertTailList(&wtc->stripes, &stripe->list_entry);
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
static void free_stripes(write_tree_context* wtc) {
|
|
LIST_ENTRY *le, *le2, *nextle;
|
|
|
|
le = wtc->stripes.Flink;
|
|
while (le != &wtc->stripes) {
|
|
write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
|
|
|
|
if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
|
|
MmUnlockPages(stripe->Irp->MdlAddress);
|
|
IoFreeMdl(stripe->Irp->MdlAddress);
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
le = wtc->stripes.Flink;
|
|
while (le != &wtc->stripes) {
|
|
write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
|
|
|
|
nextle = le->Flink;
|
|
|
|
if (stripe->buf) {
|
|
ExFreePool(stripe->buf);
|
|
|
|
le2 = le->Flink;
|
|
while (le2 != &wtc->stripes) {
|
|
write_tree_stripe* s2 = CONTAINING_RECORD(le2, write_tree_stripe, list_entry);
|
|
|
|
if (s2->buf == stripe->buf)
|
|
s2->buf = NULL;
|
|
|
|
le2 = le2->Flink;
|
|
}
|
|
|
|
}
|
|
|
|
ExFreePool(stripe);
|
|
|
|
le = nextle;
|
|
}
|
|
}
|
|
|
|
static NTSTATUS write_trees(device_extension* Vcb) {
|
|
UINT8 level;
|
|
UINT8 *data, *body;
|
|
UINT32 crc32;
|
|
NTSTATUS Status;
|
|
LIST_ENTRY* le;
|
|
write_tree_context* wtc;
|
|
|
|
TRACE("(%p)\n", Vcb);
|
|
|
|
for (level = 0; level <= 255; level++) {
|
|
BOOL nothing_found = TRUE;
|
|
|
|
TRACE("level = %u\n", level);
|
|
|
|
le = Vcb->tree_cache.Flink;
|
|
while (le != &Vcb->tree_cache) {
|
|
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
|
|
|
|
if (tc2->write && tc2->tree->header.level == level) {
|
|
KEY firstitem, searchkey;
|
|
LIST_ENTRY* le2;
|
|
traverse_ptr tp;
|
|
EXTENT_ITEM_TREE* eit;
|
|
|
|
if (!tc2->tree->has_new_address) {
|
|
ERR("error - tried to write tree with no new address\n");
|
|
int3;
|
|
}
|
|
|
|
le2 = tc2->tree->itemlist.Flink;
|
|
while (le2 != &tc2->tree->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
|
|
if (!td->ignore) {
|
|
firstitem = td->key;
|
|
break;
|
|
}
|
|
le2 = le2->Flink;
|
|
}
|
|
|
|
if (tc2->tree->parent) {
|
|
tc2->tree->paritem->key = firstitem;
|
|
tc2->tree->paritem->treeholder.address = tc2->tree->new_address;
|
|
tc2->tree->paritem->treeholder.generation = Vcb->superblock.generation;
|
|
}
|
|
|
|
if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
|
|
searchkey.obj_id = tc2->tree->new_address;
|
|
searchkey.obj_type = TYPE_EXTENT_ITEM;
|
|
searchkey.offset = Vcb->superblock.node_size;
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (keycmp(&searchkey, &tp.item->key)) {
|
|
// traverse_ptr next_tp;
|
|
// BOOL b;
|
|
// tree_data* paritem;
|
|
|
|
ERR("could not find %llx,%x,%llx in extent_root (found %llx,%x,%llx instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
|
|
free_traverse_ptr(&tp);
|
|
|
|
// searchkey.obj_id = 0;
|
|
// searchkey.obj_type = 0;
|
|
// searchkey.offset = 0;
|
|
//
|
|
// find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
//
|
|
// paritem = NULL;
|
|
// do {
|
|
// if (tp.tree->paritem != paritem) {
|
|
// paritem = tp.tree->paritem;
|
|
// ERR("paritem: %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset);
|
|
// }
|
|
//
|
|
// ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
|
|
//
|
|
// b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
|
|
// if (b) {
|
|
// free_traverse_ptr(&tp);
|
|
// tp = next_tp;
|
|
// }
|
|
// } while (b);
|
|
//
|
|
// free_traverse_ptr(&tp);
|
|
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
eit = (EXTENT_ITEM_TREE*)tp.item->data;
|
|
eit->firstitem = firstitem;
|
|
|
|
free_traverse_ptr(&tp);
|
|
}
|
|
|
|
nothing_found = FALSE;
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
if (nothing_found)
|
|
break;
|
|
}
|
|
|
|
TRACE("allocated tree extents\n");
|
|
|
|
wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_tree_context), ALLOC_TAG);
|
|
if (!wtc) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
|
|
InitializeListHead(&wtc->stripes);
|
|
|
|
le = Vcb->tree_cache.Flink;
|
|
while (le != &Vcb->tree_cache) {
|
|
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
|
|
#ifdef DEBUG_PARANOID
|
|
UINT32 num_items = 0, size = 0;
|
|
LIST_ENTRY* le2;
|
|
BOOL crash = FALSE;
|
|
#endif
|
|
|
|
if (tc2->write) {
|
|
#ifdef DEBUG_PARANOID
|
|
le2 = tc2->tree->itemlist.Flink;
|
|
while (le2 != &tc2->tree->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
|
|
if (!td->ignore) {
|
|
num_items++;
|
|
|
|
if (tc2->tree->header.level == 0)
|
|
size += td->size;
|
|
}
|
|
le2 = le2->Flink;
|
|
}
|
|
|
|
if (tc2->tree->header.level == 0)
|
|
size += num_items * sizeof(leaf_node);
|
|
else
|
|
size += num_items * sizeof(internal_node);
|
|
|
|
if (num_items != tc2->tree->header.num_items) {
|
|
ERR("tree %llx, level %x: num_items was %x, expected %x\n", tc2->tree->root->id, tc2->tree->header.level, num_items, tc2->tree->header.num_items);
|
|
crash = TRUE;
|
|
}
|
|
|
|
if (size != tc2->tree->size) {
|
|
ERR("tree %llx, level %x: size was %x, expected %x\n", tc2->tree->root->id, tc2->tree->header.level, size, tc2->tree->size);
|
|
crash = TRUE;
|
|
}
|
|
|
|
if (tc2->tree->header.num_items == 0 && tc2->tree->parent) {
|
|
ERR("tree %llx, level %x: tried to write empty tree with parent\n", tc2->tree->root->id, tc2->tree->header.level);
|
|
crash = TRUE;
|
|
}
|
|
|
|
if (tc2->tree->size > Vcb->superblock.node_size - sizeof(tree_header)) {
|
|
ERR("tree %llx, level %x: tried to write overlarge tree (%x > %x)\n", tc2->tree->root->id, tc2->tree->header.level, tc2->tree->size, Vcb->superblock.node_size - sizeof(tree_header));
|
|
crash = TRUE;
|
|
}
|
|
|
|
if (crash) {
|
|
ERR("tree %p\n", tc2->tree);
|
|
le2 = tc2->tree->itemlist.Flink;
|
|
while (le2 != &tc2->tree->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
|
|
if (!td->ignore) {
|
|
ERR("%llx,%x,%llx inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
|
|
}
|
|
le2 = le2->Flink;
|
|
}
|
|
int3;
|
|
}
|
|
#endif
|
|
tc2->tree->header.address = tc2->tree->new_address;
|
|
tc2->tree->header.generation = Vcb->superblock.generation;
|
|
tc2->tree->header.flags |= HEADER_FLAG_MIXED_BACKREF;
|
|
tc2->tree->has_address = TRUE;
|
|
|
|
data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
|
|
if (!data) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto end;
|
|
}
|
|
|
|
body = data + sizeof(tree_header);
|
|
|
|
RtlCopyMemory(data, &tc2->tree->header, sizeof(tree_header));
|
|
RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
|
|
|
|
if (tc2->tree->header.level == 0) {
|
|
leaf_node* itemptr = (leaf_node*)body;
|
|
int i = 0;
|
|
LIST_ENTRY* le2;
|
|
UINT8* dataptr = data + Vcb->superblock.node_size;
|
|
|
|
le2 = tc2->tree->itemlist.Flink;
|
|
while (le2 != &tc2->tree->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
|
|
if (!td->ignore) {
|
|
dataptr = dataptr - td->size;
|
|
|
|
itemptr[i].key = td->key;
|
|
itemptr[i].offset = (UINT8*)dataptr - (UINT8*)body;
|
|
itemptr[i].size = td->size;
|
|
i++;
|
|
|
|
if (td->size > 0)
|
|
RtlCopyMemory(dataptr, td->data, td->size);
|
|
}
|
|
|
|
le2 = le2->Flink;
|
|
}
|
|
} else {
|
|
internal_node* itemptr = (internal_node*)body;
|
|
int i = 0;
|
|
LIST_ENTRY* le2;
|
|
|
|
le2 = tc2->tree->itemlist.Flink;
|
|
while (le2 != &tc2->tree->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
|
|
if (!td->ignore) {
|
|
itemptr[i].key = td->key;
|
|
itemptr[i].address = td->treeholder.address;
|
|
itemptr[i].generation = td->treeholder.generation;
|
|
i++;
|
|
}
|
|
|
|
le2 = le2->Flink;
|
|
}
|
|
}
|
|
|
|
crc32 = calc_crc32c(0xffffffff, (UINT8*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
|
|
crc32 = ~crc32;
|
|
*((UINT32*)data) = crc32;
|
|
TRACE("setting crc32 to %08x\n", crc32);
|
|
|
|
Status = write_tree(Vcb, tc2->tree->new_address, data, wtc);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("write_tree returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
Status = STATUS_SUCCESS;
|
|
|
|
if (wtc->stripes.Flink != &wtc->stripes) {
|
|
// launch writes and wait
|
|
le = wtc->stripes.Flink;
|
|
while (le != &wtc->stripes) {
|
|
write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
|
|
|
|
IoCallDriver(stripe->device->devobj, stripe->Irp);
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
|
|
|
|
le = wtc->stripes.Flink;
|
|
while (le != &wtc->stripes) {
|
|
write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
|
|
|
|
if (!NT_SUCCESS(stripe->iosb.Status)) {
|
|
Status = stripe->iosb.Status;
|
|
break;
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
free_stripes(wtc);
|
|
}
|
|
|
|
end:
|
|
ExFreePool(wtc);
|
|
|
|
return Status;
|
|
}
|
|
|
|
static NTSTATUS write_superblocks(device_extension* Vcb) {
|
|
UINT64 i;
|
|
NTSTATUS Status;
|
|
LIST_ENTRY* le;
|
|
|
|
TRACE("(%p)\n", Vcb);
|
|
|
|
le = Vcb->tree_cache.Flink;
|
|
while (le != &Vcb->tree_cache) {
|
|
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
|
|
|
|
if (tc2->write && !tc2->tree->parent) {
|
|
if (tc2->tree->root == Vcb->root_root) {
|
|
Vcb->superblock.root_tree_addr = tc2->tree->new_address;
|
|
Vcb->superblock.root_level = tc2->tree->header.level;
|
|
} else if (tc2->tree->root == Vcb->chunk_root) {
|
|
Vcb->superblock.chunk_tree_addr = tc2->tree->new_address;
|
|
Vcb->superblock.chunk_root_generation = tc2->tree->header.generation;
|
|
Vcb->superblock.chunk_root_level = tc2->tree->header.level;
|
|
}
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
for (i = 0; i < Vcb->superblock.num_devices; i++) {
|
|
if (Vcb->devices[i].devobj) {
|
|
Status = write_superblock(Vcb, &Vcb->devices[i]);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("write_superblock returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
}
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
static NTSTATUS update_chunk_usage(device_extension* Vcb, LIST_ENTRY* rollback) {
|
|
LIST_ENTRY* le = Vcb->chunks.Flink;
|
|
chunk* c;
|
|
KEY searchkey;
|
|
traverse_ptr tp;
|
|
BLOCK_GROUP_ITEM* bgi;
|
|
NTSTATUS Status;
|
|
|
|
TRACE("(%p)\n", Vcb);
|
|
|
|
while (le != &Vcb->chunks) {
|
|
c = CONTAINING_RECORD(le, chunk, list_entry);
|
|
|
|
if (c->used != c->oldused) {
|
|
searchkey.obj_id = c->offset;
|
|
searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
|
|
searchkey.offset = c->chunk_item->size;
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (keycmp(&searchkey, &tp.item->key)) {
|
|
ERR("could not find (%llx,%x,%llx) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
|
|
int3;
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
|
|
if (!bgi) {
|
|
ERR("out of memory\n");
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
RtlCopyMemory(bgi, tp.item->data, tp.item->size);
|
|
bgi->used = c->used;
|
|
|
|
TRACE("adjusting usage of chunk %llx to %llx\n", c->offset, c->used);
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(bgi);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
|
|
TRACE("chunk_item type = %llx\n", c->chunk_item->type);
|
|
|
|
if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
|
|
FIXME("RAID0 not yet supported\n");
|
|
ExFreePool(bgi);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
} else if (c->chunk_item->type & BLOCK_FLAG_RAID1) {
|
|
FIXME("RAID1 not yet supported\n");
|
|
ExFreePool(bgi);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
} else if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) {
|
|
Vcb->superblock.bytes_used = Vcb->superblock.bytes_used + (2 * (c->used - c->oldused));
|
|
} else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
|
|
FIXME("RAID10 not yet supported\n");
|
|
ExFreePool(bgi);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
} else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
|
|
FIXME("RAID5 not yet supported\n");
|
|
ExFreePool(bgi);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
} else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
|
|
FIXME("RAID6 not yet supported\n");
|
|
ExFreePool(bgi);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
} else { // SINGLE
|
|
Vcb->superblock.bytes_used = Vcb->superblock.bytes_used + c->used - c->oldused;
|
|
}
|
|
|
|
TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
c->oldused = c->used;
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
static void get_first_item(tree* t, KEY* key) {
|
|
LIST_ENTRY* le;
|
|
|
|
le = t->itemlist.Flink;
|
|
while (le != &t->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
|
|
|
|
*key = td->key;
|
|
return;
|
|
}
|
|
}
|
|
|
|
static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) {
|
|
tree *nt, *pt;
|
|
tree_data* td;
|
|
tree_data* oldlastitem;
|
|
// write_tree* wt2;
|
|
// // tree_data *firsttd, *lasttd;
|
|
// // LIST_ENTRY* le;
|
|
// #ifdef DEBUG_PARANOID
|
|
// KEY lastkey1, lastkey2;
|
|
// traverse_ptr tp, next_tp;
|
|
// ULONG numitems1, numitems2;
|
|
// #endif
|
|
|
|
TRACE("splitting tree in %llx at (%llx,%x,%llx)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
|
|
|
|
// #ifdef DEBUG_PARANOID
|
|
// lastkey1.obj_id = 0xffffffffffffffff;
|
|
// lastkey1.obj_type = 0xff;
|
|
// lastkey1.offset = 0xffffffffffffffff;
|
|
//
|
|
// if (!find_item(Vcb, t->root, &tp, &lastkey1, NULL, FALSE))
|
|
// ERR("error - find_item failed\n");
|
|
// else {
|
|
// lastkey1 = tp.item->key;
|
|
// numitems1 = 0;
|
|
// while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
|
|
// free_traverse_ptr(&tp);
|
|
// tp = next_tp;
|
|
// numitems1++;
|
|
// }
|
|
// free_traverse_ptr(&tp);
|
|
// }
|
|
// #endif
|
|
|
|
nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
|
|
if (!nt) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
|
|
nt->header.address = 0;
|
|
nt->header.generation = Vcb->superblock.generation;
|
|
nt->header.num_items = t->header.num_items - numitems;
|
|
nt->header.flags = HEADER_FLAG_MIXED_BACKREF;
|
|
|
|
nt->refcount = 0;
|
|
nt->has_address = FALSE;
|
|
nt->Vcb = Vcb;
|
|
nt->parent = t->parent;
|
|
nt->root = t->root;
|
|
// nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
|
|
nt->new_address = 0;
|
|
nt->has_new_address = FALSE;
|
|
nt->flags = t->flags;
|
|
InitializeListHead(&nt->itemlist);
|
|
|
|
// ExInitializeResourceLite(&nt->nonpaged->load_tree_lock);
|
|
|
|
oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
|
|
|
|
// // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
|
|
// // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
|
|
// //
|
|
// // TRACE("old tree in %x was from (%x,%x,%x) to (%x,%x,%x)\n",
|
|
// // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
|
|
// // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
|
|
// //
|
|
// // le = wt->tree->itemlist.Flink;
|
|
// // while (le != &wt->tree->itemlist) {
|
|
// // td = CONTAINING_RECORD(le, tree_data, list_entry);
|
|
// // TRACE("old tree item was (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
|
|
// // le = le->Flink;
|
|
// // }
|
|
|
|
nt->itemlist.Flink = &newfirstitem->list_entry;
|
|
nt->itemlist.Blink = t->itemlist.Blink;
|
|
nt->itemlist.Flink->Blink = &nt->itemlist;
|
|
nt->itemlist.Blink->Flink = &nt->itemlist;
|
|
|
|
t->itemlist.Blink = &oldlastitem->list_entry;
|
|
t->itemlist.Blink->Flink = &t->itemlist;
|
|
|
|
// // le = wt->tree->itemlist.Flink;
|
|
// // while (le != &wt->tree->itemlist) {
|
|
// // td = CONTAINING_RECORD(le, tree_data, list_entry);
|
|
// // TRACE("old tree item now (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
|
|
// // le = le->Flink;
|
|
// // }
|
|
// //
|
|
// // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
|
|
// // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
|
|
// //
|
|
// // TRACE("old tree in %x is now from (%x,%x,%x) to (%x,%x,%x)\n",
|
|
// // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
|
|
// // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
|
|
|
|
nt->size = t->size - size;
|
|
t->size = size;
|
|
t->header.num_items = numitems;
|
|
add_to_tree_cache(Vcb, nt, TRUE);
|
|
|
|
InterlockedIncrement(&Vcb->open_trees);
|
|
#ifdef DEBUG_TREE_REFCOUNTS
|
|
TRACE("created new split tree %p\n", nt);
|
|
#endif
|
|
InsertTailList(&Vcb->trees, &nt->list_entry);
|
|
|
|
// // // TESTING
|
|
// // td = wt->tree->items;
|
|
// // while (td) {
|
|
// // if (!td->ignore) {
|
|
// // TRACE("old tree item: (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
|
|
// // }
|
|
// // td = td->next;
|
|
// // }
|
|
|
|
// // oldlastitem->next = NULL;
|
|
// // wt->tree->lastitem = oldlastitem;
|
|
|
|
// // TRACE("last item is now (%x,%x,%x)\n", (UINT32)oldlastitem->key.obj_id, oldlastitem->key.obj_type, (UINT32)oldlastitem->key.offset);
|
|
|
|
if (nt->header.level > 0) {
|
|
LIST_ENTRY* le = nt->itemlist.Flink;
|
|
|
|
while (le != &nt->itemlist) {
|
|
tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
|
|
|
|
if (td2->treeholder.tree) {
|
|
td2->treeholder.tree->parent = nt;
|
|
increase_tree_rc(nt);
|
|
free_tree(t);
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
}
|
|
|
|
if (nt->parent) {
|
|
increase_tree_rc(nt->parent);
|
|
|
|
td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
|
|
if (!td) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
td->key = newfirstitem->key;
|
|
|
|
InsertHeadList(&t->paritem->list_entry, &td->list_entry);
|
|
|
|
td->ignore = FALSE;
|
|
td->inserted = TRUE;
|
|
td->treeholder.tree = nt;
|
|
init_tree_holder(&td->treeholder);
|
|
// td->treeholder.nonpaged->status = tree_holder_loaded;
|
|
nt->paritem = td;
|
|
|
|
nt->parent->header.num_items++;
|
|
nt->parent->size += sizeof(internal_node);
|
|
|
|
goto end;
|
|
}
|
|
|
|
TRACE("adding new tree parent\n");
|
|
|
|
if (nt->header.level == 255) {
|
|
ERR("cannot add parent to tree at level 255\n");
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
|
|
if (!pt) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
|
|
pt->header.address = 0;
|
|
pt->header.num_items = 2;
|
|
pt->header.level = nt->header.level + 1;
|
|
pt->header.flags = HEADER_FLAG_MIXED_BACKREF;
|
|
|
|
pt->refcount = 2;
|
|
pt->has_address = FALSE;
|
|
pt->Vcb = Vcb;
|
|
pt->parent = NULL;
|
|
pt->paritem = NULL;
|
|
pt->root = t->root;
|
|
pt->new_address = 0;
|
|
pt->has_new_address = FALSE;
|
|
// pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
|
|
pt->size = pt->header.num_items * sizeof(internal_node);
|
|
pt->flags = t->flags;
|
|
InitializeListHead(&pt->itemlist);
|
|
|
|
// ExInitializeResourceLite(&pt->nonpaged->load_tree_lock);
|
|
|
|
InterlockedIncrement(&Vcb->open_trees);
|
|
#ifdef DEBUG_TREE_REFCOUNTS
|
|
TRACE("created new parent tree %p\n", pt);
|
|
#endif
|
|
InsertTailList(&Vcb->trees, &pt->list_entry);
|
|
|
|
td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
|
|
if (!td) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
get_first_item(t, &td->key);
|
|
td->ignore = FALSE;
|
|
td->inserted = FALSE;
|
|
td->treeholder.address = 0;
|
|
td->treeholder.generation = Vcb->superblock.generation;
|
|
td->treeholder.tree = t;
|
|
init_tree_holder(&td->treeholder);
|
|
// td->treeholder.nonpaged->status = tree_holder_loaded;
|
|
InsertTailList(&pt->itemlist, &td->list_entry);
|
|
t->paritem = td;
|
|
|
|
td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
|
|
if (!td) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
td->key = newfirstitem->key;
|
|
td->ignore = FALSE;
|
|
td->inserted = FALSE;
|
|
td->treeholder.address = 0;
|
|
td->treeholder.generation = Vcb->superblock.generation;
|
|
td->treeholder.tree = nt;
|
|
init_tree_holder(&td->treeholder);
|
|
// td->treeholder.nonpaged->status = tree_holder_loaded;
|
|
InsertTailList(&pt->itemlist, &td->list_entry);
|
|
nt->paritem = td;
|
|
|
|
add_to_tree_cache(Vcb, pt, TRUE);
|
|
|
|
t->root->treeholder.tree = pt;
|
|
|
|
t->parent = pt;
|
|
nt->parent = pt;
|
|
|
|
end:
|
|
t->root->root_item.bytes_used += Vcb->superblock.node_size;
|
|
|
|
// #ifdef DEBUG_PARANOID
|
|
// lastkey2.obj_id = 0xffffffffffffffff;
|
|
// lastkey2.obj_type = 0xff;
|
|
// lastkey2.offset = 0xffffffffffffffff;
|
|
//
|
|
// if (!find_item(Vcb, wt->tree->root, &tp, &lastkey2, NULL, FALSE))
|
|
// ERR("error - find_item failed\n");
|
|
// else {
|
|
// lastkey2 = tp.item->key;
|
|
//
|
|
// numitems2 = 0;
|
|
// while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
|
|
// free_traverse_ptr(&tp);
|
|
// tp = next_tp;
|
|
// numitems2++;
|
|
// }
|
|
// free_traverse_ptr(&tp);
|
|
// }
|
|
//
|
|
// ERR("lastkey1 = %llx,%x,%llx\n", lastkey1.obj_id, lastkey1.obj_type, lastkey1.offset);
|
|
// ERR("lastkey2 = %llx,%x,%llx\n", lastkey2.obj_id, lastkey2.obj_type, lastkey2.offset);
|
|
// ERR("numitems1 = %u\n", numitems1);
|
|
// ERR("numitems2 = %u\n", numitems2);
|
|
// #endif
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
static NTSTATUS STDCALL split_tree(device_extension* Vcb, tree* t) {
|
|
LIST_ENTRY* le;
|
|
UINT32 size, ds, numitems;
|
|
|
|
size = 0;
|
|
numitems = 0;
|
|
|
|
// FIXME - naïve implementation: maximizes number of filled trees
|
|
|
|
le = t->itemlist.Flink;
|
|
while (le != &t->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
|
|
|
|
if (!td->ignore) {
|
|
if (t->header.level == 0)
|
|
ds = sizeof(leaf_node) + td->size;
|
|
else
|
|
ds = sizeof(internal_node);
|
|
|
|
// FIXME - move back if previous item was deleted item with same key
|
|
if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
|
|
return split_tree_at(Vcb, t, td, numitems, size);
|
|
|
|
size += ds;
|
|
numitems++;
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, LIST_ENTRY* rollback) {
|
|
LIST_ENTRY* le;
|
|
tree_data* nextparitem = NULL;
|
|
NTSTATUS Status;
|
|
tree *next_tree, *par;
|
|
BOOL loaded;
|
|
|
|
TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size);
|
|
|
|
// FIXME - doesn't capture everything, as it doesn't ascend
|
|
// FIXME - write proper function and put it in treefuncs.c
|
|
le = t->paritem->list_entry.Flink;
|
|
while (le != &t->parent->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
|
|
|
|
if (!td->ignore) {
|
|
nextparitem = td;
|
|
break;
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
if (!nextparitem)
|
|
return STATUS_SUCCESS;
|
|
|
|
// FIXME - loop, and capture more than one tree if we can
|
|
|
|
TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
|
|
// nextparitem = t->paritem;
|
|
|
|
// ExAcquireResourceExclusiveLite(&t->parent->nonpaged->load_tree_lock, TRUE);
|
|
|
|
Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, &loaded);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("do_load_tree returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (loaded)
|
|
increase_tree_rc(t->parent);
|
|
|
|
// ExReleaseResourceLite(&t->parent->nonpaged->load_tree_lock);
|
|
|
|
next_tree = nextparitem->treeholder.tree;
|
|
|
|
if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
|
|
// merge two trees into one
|
|
|
|
t->header.num_items += next_tree->header.num_items;
|
|
t->size += next_tree->size;
|
|
|
|
if (next_tree->header.level > 0) {
|
|
le = next_tree->itemlist.Flink;
|
|
|
|
while (le != &next_tree->itemlist) {
|
|
tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
|
|
|
|
if (td2->treeholder.tree) {
|
|
td2->treeholder.tree->parent = t;
|
|
increase_tree_rc(t);
|
|
free_tree(next_tree);
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
}
|
|
|
|
t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
|
|
t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
|
|
t->itemlist.Blink = next_tree->itemlist.Blink;
|
|
t->itemlist.Blink->Flink = &t->itemlist;
|
|
|
|
// // TESTING
|
|
// le = t->itemlist.Flink;
|
|
// while (le != &t->itemlist) {
|
|
// tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
|
|
// if (!td->ignore) {
|
|
// ERR("key: %llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
|
|
// }
|
|
// le = le->Flink;
|
|
// }
|
|
|
|
next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
|
|
|
|
next_tree->header.num_items = 0;
|
|
next_tree->size = 0;
|
|
|
|
if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
|
|
Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, rollback);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("reduce_tree_extent returned %08x\n", Status);
|
|
free_tree(next_tree);
|
|
return Status;
|
|
}
|
|
} else if (next_tree->has_address) {
|
|
Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, rollback);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("reduce_tree_extent returned %08x\n", Status);
|
|
free_tree(next_tree);
|
|
return Status;
|
|
}
|
|
}
|
|
|
|
if (!nextparitem->ignore) {
|
|
nextparitem->ignore = TRUE;
|
|
next_tree->parent->header.num_items--;
|
|
next_tree->parent->size -= sizeof(internal_node);
|
|
}
|
|
|
|
par = next_tree->parent;
|
|
while (par) {
|
|
add_to_tree_cache(Vcb, par, TRUE);
|
|
par = par->parent;
|
|
}
|
|
|
|
RemoveEntryList(&nextparitem->list_entry);
|
|
ExFreePool(next_tree->paritem);
|
|
next_tree->paritem = NULL;
|
|
|
|
next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
|
|
|
|
free_tree(next_tree);
|
|
|
|
// remove next_tree from tree cache
|
|
le = Vcb->tree_cache.Flink;
|
|
while (le != &Vcb->tree_cache) {
|
|
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
|
|
|
|
if (tc2->tree == next_tree) {
|
|
free_tree(next_tree);
|
|
RemoveEntryList(le);
|
|
ExFreePool(tc2);
|
|
break;
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
} else {
|
|
// rebalance by moving items from second tree into first
|
|
ULONG avg_size = (t->size + next_tree->size) / 2;
|
|
KEY firstitem = {0, 0, 0};
|
|
|
|
TRACE("attempting rebalance\n");
|
|
|
|
le = next_tree->itemlist.Flink;
|
|
while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
|
|
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
|
|
ULONG size;
|
|
|
|
if (!td->ignore) {
|
|
if (next_tree->header.level == 0)
|
|
size = sizeof(leaf_node) + td->size;
|
|
else
|
|
size = sizeof(internal_node);
|
|
} else
|
|
size = 0;
|
|
|
|
if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
|
|
RemoveEntryList(&td->list_entry);
|
|
InsertTailList(&t->itemlist, &td->list_entry);
|
|
|
|
if (next_tree->header.level > 0 && td->treeholder.tree) {
|
|
td->treeholder.tree->parent = t;
|
|
increase_tree_rc(t);
|
|
free_tree(next_tree);
|
|
}
|
|
|
|
if (!td->ignore) {
|
|
next_tree->size -= size;
|
|
t->size += size;
|
|
next_tree->header.num_items--;
|
|
t->header.num_items++;
|
|
}
|
|
} else
|
|
break;
|
|
|
|
le = next_tree->itemlist.Flink;
|
|
}
|
|
|
|
le = next_tree->itemlist.Flink;
|
|
while (le != &next_tree->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
|
|
|
|
if (!td->ignore) {
|
|
firstitem = td->key;
|
|
break;
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
// ERR("firstitem = %llx,%x,%llx\n", firstitem.obj_id, firstitem.obj_type, firstitem.offset);
|
|
|
|
// FIXME - once ascension is working, make this work with parent's parent, etc.
|
|
if (next_tree->paritem)
|
|
next_tree->paritem->key = firstitem;
|
|
|
|
par = next_tree;
|
|
while (par) {
|
|
add_to_tree_cache(Vcb, par, TRUE);
|
|
par = par->parent;
|
|
}
|
|
|
|
free_tree(next_tree);
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* t, UINT8 level, LIST_ENTRY* rollback) {
|
|
KEY searchkey;
|
|
traverse_ptr tp;
|
|
NTSTATUS Status;
|
|
|
|
if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
|
|
searchkey.obj_id = address;
|
|
searchkey.obj_type = TYPE_METADATA_ITEM;
|
|
searchkey.offset = t->header.level;
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (!keycmp(&tp.item->key, &searchkey)) {
|
|
EXTENT_ITEM_SKINNY_METADATA* eism;
|
|
|
|
if (tp.item->size > 0) {
|
|
eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
|
|
|
|
if (!eism) {
|
|
ERR("out of memory\n");
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
RtlCopyMemory(eism, tp.item->data, tp.item->size);
|
|
} else
|
|
eism = NULL;
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(eism);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
free_traverse_ptr(&tp);
|
|
}
|
|
|
|
searchkey.obj_id = address;
|
|
searchkey.obj_type = TYPE_EXTENT_ITEM;
|
|
searchkey.offset = 0xffffffffffffffff;
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
|
|
EXTENT_ITEM_TREE* eit;
|
|
|
|
if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
|
|
|
|
if (!eit) {
|
|
ERR("out of memory\n");
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
RtlCopyMemory(eit, tp.item->data, tp.item->size);
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
eit->level = level;
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(eit);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
ERR("could not find EXTENT_ITEM for address %llx\n", address);
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
static NTSTATUS STDCALL do_splits(device_extension* Vcb, LIST_ENTRY* rollback) {
|
|
// LIST_ENTRY *le, *le2;
|
|
// write_tree* wt;
|
|
// tree_data* td;
|
|
UINT8 level, max_level;
|
|
UINT32 min_size;
|
|
BOOL empty, done_deletions = FALSE;
|
|
NTSTATUS Status;
|
|
tree_cache* tc2;
|
|
|
|
TRACE("(%p)\n", Vcb);
|
|
|
|
max_level = 0;
|
|
|
|
for (level = 0; level <= 255; level++) {
|
|
LIST_ENTRY *le, *nextle;
|
|
|
|
empty = TRUE;
|
|
|
|
TRACE("doing level %u\n", level);
|
|
|
|
le = Vcb->tree_cache.Flink;
|
|
|
|
while (le != &Vcb->tree_cache) {
|
|
tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
|
|
|
|
nextle = le->Flink;
|
|
|
|
if (tc2->write && tc2->tree->header.level == level) {
|
|
empty = FALSE;
|
|
|
|
if (tc2->tree->header.num_items == 0) {
|
|
if (tc2->tree->parent) {
|
|
LIST_ENTRY* le2;
|
|
KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc};
|
|
|
|
done_deletions = TRUE;
|
|
|
|
le2 = tc2->tree->itemlist.Flink;
|
|
while (le2 != &tc2->tree->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
|
|
firstitem = td->key;
|
|
break;
|
|
}
|
|
TRACE("deleting tree in root %llx (first item was %llx,%x,%llx)\n",
|
|
tc2->tree->root->id, firstitem.obj_id, firstitem.obj_type, firstitem.offset);
|
|
|
|
tc2->tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
|
|
|
|
if (tc2->tree->has_new_address) { // delete associated EXTENT_ITEM
|
|
Status = reduce_tree_extent(Vcb, tc2->tree->new_address, tc2->tree, rollback);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("reduce_tree_extent returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
} else if (tc2->tree->has_address) {
|
|
Status = reduce_tree_extent(Vcb,tc2->tree->header.address, tc2->tree, rollback);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("reduce_tree_extent returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
}
|
|
|
|
if (!tc2->tree->paritem->ignore) {
|
|
tc2->tree->paritem->ignore = TRUE;
|
|
tc2->tree->parent->header.num_items--;
|
|
tc2->tree->parent->size -= sizeof(internal_node);
|
|
}
|
|
|
|
RemoveEntryList(&tc2->tree->paritem->list_entry);
|
|
ExFreePool(tc2->tree->paritem);
|
|
tc2->tree->paritem = NULL;
|
|
|
|
free_tree(tc2->tree);
|
|
|
|
RemoveEntryList(le);
|
|
ExFreePool(tc2);
|
|
} else if (tc2->tree->header.level != 0) {
|
|
if (tc2->tree->has_new_address) {
|
|
Status = update_extent_level(Vcb, tc2->tree->new_address, tc2->tree, 0, rollback);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("update_extent_level returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
}
|
|
|
|
tc2->tree->header.level = 0;
|
|
}
|
|
} else if (tc2->tree->size > Vcb->superblock.node_size - sizeof(tree_header)) {
|
|
TRACE("splitting overlarge tree (%x > %x)\n", tc2->tree->size, Vcb->superblock.node_size - sizeof(tree_header));
|
|
Status = split_tree(Vcb, tc2->tree);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("split_tree returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
}
|
|
}
|
|
|
|
le = nextle;
|
|
}
|
|
|
|
if (!empty) {
|
|
max_level = level;
|
|
} else {
|
|
TRACE("nothing found for level %u\n", level);
|
|
break;
|
|
}
|
|
}
|
|
|
|
min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2;
|
|
|
|
for (level = 0; level <= max_level; level++) {
|
|
LIST_ENTRY* le;
|
|
|
|
le = Vcb->tree_cache.Flink;
|
|
|
|
while (le != &Vcb->tree_cache) {
|
|
tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
|
|
|
|
if (tc2->write && tc2->tree->header.level == level && tc2->tree->header.num_items > 0 && tc2->tree->parent && tc2->tree->size < min_size) {
|
|
Status = try_tree_amalgamate(Vcb, tc2->tree, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("try_tree_amalgamate returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
}
|
|
|
|
// simplify trees if top tree only has one entry
|
|
|
|
if (done_deletions) {
|
|
for (level = max_level; level > 0; level--) {
|
|
LIST_ENTRY *le, *nextle;
|
|
|
|
le = Vcb->tree_cache.Flink;
|
|
while (le != &Vcb->tree_cache) {
|
|
nextle = le->Flink;
|
|
tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
|
|
|
|
if (tc2->write && tc2->tree->header.level == level) {
|
|
if (!tc2->tree->parent && tc2->tree->header.num_items == 1) {
|
|
LIST_ENTRY* le2 = tc2->tree->itemlist.Flink;
|
|
tree_data* td;
|
|
tree* child_tree = NULL;
|
|
|
|
while (le2 != &tc2->tree->itemlist) {
|
|
td = CONTAINING_RECORD(le2, tree_data, list_entry);
|
|
if (!td->ignore)
|
|
break;
|
|
le2 = le2->Flink;
|
|
}
|
|
|
|
TRACE("deleting top-level tree in root %llx with one item\n", tc2->tree->root->id);
|
|
|
|
if (tc2->tree->has_new_address) { // delete associated EXTENT_ITEM
|
|
Status = reduce_tree_extent(Vcb, tc2->tree->new_address, tc2->tree, rollback);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("reduce_tree_extent returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
} else if (tc2->tree->has_address) {
|
|
Status = reduce_tree_extent(Vcb,tc2->tree->header.address, tc2->tree, rollback);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("reduce_tree_extent returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
}
|
|
|
|
if (!td->treeholder.tree) { // load first item if not already loaded
|
|
KEY searchkey = {0,0,0};
|
|
traverse_ptr tp;
|
|
|
|
Status = find_item(Vcb, tc2->tree->root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
free_traverse_ptr(&tp);
|
|
}
|
|
|
|
child_tree = td->treeholder.tree;
|
|
|
|
if (child_tree) {
|
|
child_tree->parent = NULL;
|
|
child_tree->paritem = NULL;
|
|
free_tree(tc2->tree);
|
|
}
|
|
|
|
tc2->tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
|
|
|
|
free_tree(tc2->tree);
|
|
|
|
if (child_tree)
|
|
child_tree->root->treeholder.tree = child_tree;
|
|
|
|
RemoveEntryList(le);
|
|
ExFreePool(tc2);
|
|
}
|
|
}
|
|
|
|
le = nextle;
|
|
}
|
|
}
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
NTSTATUS STDCALL do_write(device_extension* Vcb, LIST_ENTRY* rollback) {
|
|
NTSTATUS Status;
|
|
LIST_ENTRY* le;
|
|
|
|
TRACE("(%p)\n", Vcb);
|
|
|
|
// If only changing superblock, e.g. changing label, we still need to rewrite
|
|
// the root tree so the generations match, otherwise you won't be able to mount on Linux.
|
|
if (Vcb->write_trees > 0) {
|
|
KEY searchkey;
|
|
traverse_ptr tp;
|
|
|
|
searchkey.obj_id = 0;
|
|
searchkey.obj_type = 0;
|
|
searchkey.offset = 0;
|
|
|
|
Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
add_to_tree_cache(Vcb, Vcb->root_root->treeholder.tree, TRUE);
|
|
|
|
free_traverse_ptr(&tp);
|
|
}
|
|
|
|
do {
|
|
Status = add_parents(Vcb, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("add_parents returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
|
|
Status = do_splits(Vcb, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("do_splits returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
|
|
Status = allocate_tree_extents(Vcb, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("add_parents returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
|
|
Status = update_chunk_usage(Vcb, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("update_chunk_usage returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
} while (!trees_consistent(Vcb));
|
|
|
|
TRACE("trees consistent\n");
|
|
|
|
Status = update_root_root(Vcb, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("update_root_root returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
|
|
Status = write_trees(Vcb);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("write_trees returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
|
|
Status = write_superblocks(Vcb);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("write_superblocks returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
|
|
clean_space_cache(Vcb);
|
|
|
|
Vcb->superblock.generation++;
|
|
|
|
// print_trees(tc); // TESTING
|
|
|
|
Status = STATUS_SUCCESS;
|
|
|
|
le = Vcb->tree_cache.Flink;
|
|
while (le != &Vcb->tree_cache) {
|
|
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
|
|
|
|
tc2->write = FALSE;
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
Vcb->write_trees = 0;
|
|
|
|
end:
|
|
TRACE("do_write returning %08x\n", Status);
|
|
|
|
return Status;
|
|
}
|
|
|
|
NTSTATUS consider_write(device_extension* Vcb) {
|
|
// FIXME - call do_write if Vcb->write_trees high
|
|
#if 0
|
|
LIST_ENTRY rollback;
|
|
NTSTATUS Status = STATUS_SUCCESS;
|
|
|
|
InitializeListHead(&rollback);
|
|
|
|
if (Vcb->write_trees > 0)
|
|
Status = do_write(Vcb, &rollback);
|
|
|
|
free_tree_cache(&Vcb->tree_cache);
|
|
|
|
if (!NT_SUCCESS(Status))
|
|
do_rollback(Vcb, &rollback);
|
|
else
|
|
clear_rollback(&rollback);
|
|
|
|
return Status;
|
|
#else
|
|
return STATUS_SUCCESS;
|
|
#endif
|
|
}
|
|
|
|
static __inline void insert_into_ordered_list(LIST_ENTRY* list, ordered_list* ins) {
|
|
LIST_ENTRY* le = list->Flink;
|
|
ordered_list* ol;
|
|
|
|
while (le != list) {
|
|
ol = (ordered_list*)le;
|
|
|
|
if (ol->key > ins->key) {
|
|
le->Blink->Flink = &ins->list_entry;
|
|
ins->list_entry.Blink = le->Blink;
|
|
le->Blink = &ins->list_entry;
|
|
ins->list_entry.Flink = le;
|
|
return;
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
InsertTailList(list, &ins->list_entry);
|
|
}
|
|
|
|
static UINT64 get_extent_data_ref_hash(UINT64 root, UINT64 objid, UINT64 offset) {
|
|
UINT32 high_crc = 0xffffffff, low_crc = 0xffffffff;
|
|
|
|
// FIXME - can we test this?
|
|
|
|
// FIXME - make sure numbers here are little-endian
|
|
high_crc = calc_crc32c(high_crc, (UINT8*)&root, sizeof(UINT64));
|
|
low_crc = calc_crc32c(low_crc, (UINT8*)&objid, sizeof(UINT64));
|
|
low_crc = calc_crc32c(low_crc, (UINT8*)&offset, sizeof(UINT64));
|
|
|
|
return ((UINT64)high_crc << 31) ^ (UINT64)low_crc;
|
|
}
|
|
|
|
NTSTATUS STDCALL add_extent_ref(device_extension* Vcb, UINT64 address, UINT64 size, root* subvol, UINT64 inode, UINT64 offset, LIST_ENTRY* rollback) {
|
|
KEY searchkey;
|
|
traverse_ptr tp;
|
|
EXTENT_ITEM* ei;
|
|
UINT8 *siptr, *type;
|
|
ULONG len;
|
|
UINT64 hash;
|
|
EXTENT_DATA_REF* edr;
|
|
NTSTATUS Status;
|
|
|
|
TRACE("(%p, %llx, %llx, %llx, %llx, %llx)\n", Vcb, address, size, subvol->id, inode, offset);
|
|
|
|
searchkey.obj_id = address;
|
|
searchkey.obj_type = TYPE_EXTENT_ITEM;
|
|
searchkey.offset = size;
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (keycmp(&tp.item->key, &searchkey)) {
|
|
// create new entry
|
|
|
|
len = sizeof(EXTENT_ITEM) + sizeof(UINT8) + sizeof(EXTENT_DATA_REF);
|
|
free_traverse_ptr(&tp);
|
|
|
|
ei = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
|
|
if (!ei) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
ei->refcount = 1;
|
|
ei->generation = Vcb->superblock.generation;
|
|
ei->flags = EXTENT_ITEM_DATA;
|
|
|
|
type = (UINT8*)&ei[1];
|
|
*type = TYPE_EXTENT_DATA_REF;
|
|
|
|
edr = (EXTENT_DATA_REF*)&type[1];
|
|
edr->root = subvol->id;
|
|
edr->objid = inode;
|
|
edr->offset = offset;
|
|
edr->count = 1;
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, len, NULL, rollback)) {
|
|
ERR("error - failed to insert item\n");
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
// FIXME - update free space in superblock and CHUNK_ITEM
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
if (tp.item->size == sizeof(EXTENT_ITEM_V0)) { // old extent ref, convert
|
|
NTSTATUS Status = convert_old_data_extent(Vcb, address, size, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("convert_old_data_extent returned %08x\n", Status);
|
|
free_traverse_ptr(&tp);
|
|
return Status;
|
|
}
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (keycmp(&tp.item->key, &searchkey)) {
|
|
WARN("extent item not found for address %llx, size %llx\n", address, size);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_SUCCESS;
|
|
}
|
|
}
|
|
|
|
ei = (EXTENT_ITEM*)tp.item->data;
|
|
|
|
if (tp.item->size < sizeof(EXTENT_ITEM)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
if (extent_item_is_shared(ei, tp.item->size - sizeof(EXTENT_ITEM))) {
|
|
NTSTATUS Status = convert_shared_data_extent(Vcb, address, size, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("convert_shared_data_extent returned %08x\n", Status);
|
|
free_traverse_ptr(&tp);
|
|
return Status;
|
|
}
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (keycmp(&tp.item->key, &searchkey)) {
|
|
WARN("extent item not found for address %llx, size %llx\n", address, size);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
ei = (EXTENT_ITEM*)tp.item->data;
|
|
}
|
|
|
|
if (ei->flags != EXTENT_ITEM_DATA) {
|
|
ERR("error - flag was not EXTENT_ITEM_DATA\n");
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
// FIXME - is ei->refcount definitely the number of items, or is it the sum of the subitem refcounts?
|
|
|
|
hash = get_extent_data_ref_hash(subvol->id, inode, offset);
|
|
|
|
len = tp.item->size - sizeof(EXTENT_ITEM);
|
|
siptr = (UINT8*)&ei[1];
|
|
|
|
// FIXME - increase subitem refcount if there already?
|
|
do {
|
|
if (*siptr == TYPE_EXTENT_DATA_REF) {
|
|
UINT64 sihash;
|
|
|
|
edr = (EXTENT_DATA_REF*)&siptr[1];
|
|
sihash = get_extent_data_ref_hash(edr->root, edr->objid, edr->offset);
|
|
|
|
if (sihash >= hash)
|
|
break;
|
|
|
|
siptr += sizeof(UINT8) + sizeof(EXTENT_DATA_REF);
|
|
|
|
if (len > sizeof(EXTENT_DATA_REF) + sizeof(UINT8)) {
|
|
len -= sizeof(EXTENT_DATA_REF) + sizeof(UINT8);
|
|
} else
|
|
break;
|
|
// FIXME - TYPE_TREE_BLOCK_REF 0xB0
|
|
} else {
|
|
ERR("unrecognized extent subitem %x\n", *siptr);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
} while (len > 0);
|
|
|
|
len = tp.item->size + sizeof(UINT8) + sizeof(EXTENT_DATA_REF); // FIXME - die if too big
|
|
ei = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
|
|
if (!ei) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
RtlCopyMemory(ei, tp.item->data, siptr - tp.item->data);
|
|
ei->refcount++;
|
|
|
|
type = (UINT8*)ei + (siptr - tp.item->data);
|
|
*type = TYPE_EXTENT_DATA_REF;
|
|
|
|
edr = (EXTENT_DATA_REF*)&type[1];
|
|
edr->root = subvol->id;
|
|
edr->objid = inode;
|
|
edr->offset = offset;
|
|
edr->count = 1;
|
|
|
|
if (siptr < tp.item->data + tp.item->size)
|
|
RtlCopyMemory(&edr[1], siptr, tp.item->data + tp.item->size - siptr);
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, len, NULL, rollback)) {
|
|
ERR("error - failed to insert item\n");
|
|
ExFreePool(ei);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
typedef struct {
|
|
EXTENT_DATA_REF edr;
|
|
LIST_ENTRY list_entry;
|
|
} data_ref;
|
|
|
|
static void add_data_ref(LIST_ENTRY* data_refs, UINT64 root, UINT64 objid, UINT64 offset) {
|
|
data_ref* dr = ExAllocatePoolWithTag(PagedPool, sizeof(data_ref), ALLOC_TAG);
|
|
|
|
if (!dr) {
|
|
ERR("out of memory\n");
|
|
return;
|
|
}
|
|
|
|
// FIXME - increase count if entry there already
|
|
// FIXME - put in order?
|
|
|
|
dr->edr.root = root;
|
|
dr->edr.objid = objid;
|
|
dr->edr.offset = offset;
|
|
dr->edr.count = 1;
|
|
|
|
InsertTailList(data_refs, &dr->list_entry);
|
|
}
|
|
|
|
static void free_data_refs(LIST_ENTRY* data_refs) {
|
|
while (!IsListEmpty(data_refs)) {
|
|
LIST_ENTRY* le = RemoveHeadList(data_refs);
|
|
data_ref* dr = CONTAINING_RECORD(le, data_ref, list_entry);
|
|
|
|
ExFreePool(dr);
|
|
}
|
|
}
|
|
|
|
static NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback) {
|
|
KEY searchkey;
|
|
traverse_ptr tp, next_tp;
|
|
BOOL b;
|
|
LIST_ENTRY data_refs;
|
|
LIST_ENTRY* le;
|
|
UINT64 refcount;
|
|
EXTENT_ITEM* ei;
|
|
ULONG eisize;
|
|
UINT8* type;
|
|
NTSTATUS Status;
|
|
|
|
searchkey.obj_id = address;
|
|
searchkey.obj_type = TYPE_EXTENT_ITEM;
|
|
searchkey.offset = size;
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (keycmp(&tp.item->key, &searchkey)) {
|
|
WARN("extent item not found for address %llx, size %llx\n", address, size);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
if (tp.item->size != sizeof(EXTENT_ITEM_V0)) {
|
|
TRACE("extent does not appear to be old - returning STATUS_SUCCESS\n");
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
searchkey.obj_id = address;
|
|
searchkey.obj_type = TYPE_EXTENT_REF_V0;
|
|
searchkey.offset = 0;
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
InitializeListHead(&data_refs);
|
|
|
|
do {
|
|
b = find_next_item(Vcb, &tp, &next_tp, FALSE);
|
|
|
|
if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
|
|
tree* t;
|
|
|
|
// normally we'd need to acquire load_tree_lock here, but we're protected by the write tree lock
|
|
|
|
Status = load_tree(Vcb, tp.item->key.offset, NULL, &t);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("load tree for address %llx returned %08x\n", tp.item->key.offset, Status);
|
|
free_traverse_ptr(&tp);
|
|
free_data_refs(&data_refs);
|
|
return Status;
|
|
}
|
|
|
|
if (t->header.level == 0) {
|
|
le = t->itemlist.Flink;
|
|
while (le != &t->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
|
|
|
|
if (!td->ignore && td->key.obj_type == TYPE_EXTENT_DATA) {
|
|
EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
|
|
|
|
if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
|
|
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
|
|
|
|
if (ed2->address == address)
|
|
add_data_ref(&data_refs, t->header.tree_id, td->key.obj_id, td->key.offset);
|
|
}
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
}
|
|
|
|
free_tree(t);
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
}
|
|
|
|
if (b) {
|
|
free_traverse_ptr(&tp);
|
|
tp = next_tp;
|
|
|
|
if (tp.item->key.obj_id > searchkey.obj_id || tp.item->key.obj_type > searchkey.obj_type)
|
|
break;
|
|
}
|
|
} while (b);
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
if (IsListEmpty(&data_refs)) {
|
|
WARN("no data refs found\n");
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
// create new entry
|
|
|
|
refcount = 0;
|
|
|
|
le = data_refs.Flink;
|
|
while (le != &data_refs) {
|
|
refcount++;
|
|
le = le->Flink;
|
|
}
|
|
|
|
eisize = sizeof(EXTENT_ITEM) + ((sizeof(char) + sizeof(EXTENT_DATA_REF)) * refcount);
|
|
ei = ExAllocatePoolWithTag(PagedPool, eisize, ALLOC_TAG);
|
|
if (!ei) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
ei->refcount = refcount;
|
|
ei->generation = Vcb->superblock.generation;
|
|
ei->flags = EXTENT_ITEM_DATA;
|
|
|
|
type = (UINT8*)&ei[1];
|
|
|
|
le = data_refs.Flink;
|
|
while (le != &data_refs) {
|
|
data_ref* dr = CONTAINING_RECORD(le, data_ref, list_entry);
|
|
|
|
type[0] = TYPE_EXTENT_DATA_REF;
|
|
RtlCopyMemory(&type[1], &dr->edr, sizeof(EXTENT_DATA_REF));
|
|
|
|
type = &type[1 + sizeof(EXTENT_DATA_REF)];
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, ei, eisize, NULL, rollback)) {
|
|
ERR("error - failed to insert item\n");
|
|
ExFreePool(ei);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
free_data_refs(&data_refs);
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
typedef struct {
|
|
UINT8 type;
|
|
void* data;
|
|
BOOL allocated;
|
|
LIST_ENTRY list_entry;
|
|
} extent_ref;
|
|
|
|
static void free_extent_refs(LIST_ENTRY* extent_refs) {
|
|
while (!IsListEmpty(extent_refs)) {
|
|
LIST_ENTRY* le = RemoveHeadList(extent_refs);
|
|
extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
|
|
|
|
if (er->allocated)
|
|
ExFreePool(er->data);
|
|
|
|
ExFreePool(er);
|
|
}
|
|
}
|
|
|
|
static NTSTATUS convert_shared_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback) {
|
|
KEY searchkey;
|
|
traverse_ptr tp;
|
|
LIST_ENTRY extent_refs;
|
|
LIST_ENTRY *le, *next_le;
|
|
EXTENT_ITEM *ei, *newei;
|
|
UINT8* siptr;
|
|
ULONG len;
|
|
UINT64 count;
|
|
NTSTATUS Status;
|
|
|
|
searchkey.obj_id = address;
|
|
searchkey.obj_type = TYPE_EXTENT_ITEM;
|
|
searchkey.offset = size;
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (keycmp(&tp.item->key, &searchkey)) {
|
|
WARN("extent item not found for address %llx, size %llx\n", address, size);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
if (tp.item->size < sizeof(EXTENT_ITEM)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
ei = (EXTENT_ITEM*)tp.item->data;
|
|
len = tp.item->size - sizeof(EXTENT_ITEM);
|
|
|
|
InitializeListHead(&extent_refs);
|
|
|
|
siptr = (UINT8*)&ei[1];
|
|
|
|
do {
|
|
extent_ref* er = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG);
|
|
if (!er) {
|
|
ERR("out of memory\n");
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
er->type = *siptr;
|
|
er->data = siptr+1;
|
|
er->allocated = FALSE;
|
|
|
|
InsertTailList(&extent_refs, &er->list_entry);
|
|
|
|
if (*siptr == TYPE_TREE_BLOCK_REF) {
|
|
siptr += sizeof(TREE_BLOCK_REF);
|
|
len -= sizeof(TREE_BLOCK_REF) + 1;
|
|
} else if (*siptr == TYPE_EXTENT_DATA_REF) {
|
|
siptr += sizeof(EXTENT_DATA_REF);
|
|
len -= sizeof(EXTENT_DATA_REF) + 1;
|
|
} else if (*siptr == TYPE_SHARED_BLOCK_REF) {
|
|
siptr += sizeof(SHARED_BLOCK_REF);
|
|
len -= sizeof(SHARED_BLOCK_REF) + 1;
|
|
} else if (*siptr == TYPE_SHARED_DATA_REF) {
|
|
siptr += sizeof(SHARED_DATA_REF);
|
|
len -= sizeof(SHARED_DATA_REF) + 1;
|
|
} else {
|
|
ERR("unrecognized extent subitem %x\n", *siptr);
|
|
free_traverse_ptr(&tp);
|
|
free_extent_refs(&extent_refs);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
} while (len > 0);
|
|
|
|
le = extent_refs.Flink;
|
|
while (le != &extent_refs) {
|
|
extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
|
|
next_le = le->Flink;
|
|
|
|
if (er->type == TYPE_SHARED_DATA_REF) {
|
|
// normally we'd need to acquire load_tree_lock here, but we're protected by the write tree lock
|
|
SHARED_DATA_REF* sdr = er->data;
|
|
tree* t;
|
|
|
|
Status = load_tree(Vcb, sdr->offset, NULL, &t);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("load_tree for address %llx returned %08x\n", sdr->offset, Status);
|
|
free_traverse_ptr(&tp);
|
|
free_data_refs(&extent_refs);
|
|
return Status;
|
|
}
|
|
|
|
if (t->header.level == 0) {
|
|
LIST_ENTRY* le2 = t->itemlist.Flink;
|
|
while (le2 != &t->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
|
|
|
|
if (!td->ignore && td->key.obj_type == TYPE_EXTENT_DATA) {
|
|
EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
|
|
|
|
if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
|
|
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
|
|
|
|
if (ed2->address == address) {
|
|
extent_ref* er2;
|
|
EXTENT_DATA_REF* edr;
|
|
|
|
er2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG);
|
|
if (!er2) {
|
|
ERR("out of memory\n");
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
edr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA_REF), ALLOC_TAG);
|
|
if (!edr) {
|
|
ERR("out of memory\n");
|
|
free_traverse_ptr(&tp);
|
|
ExFreePool(er2);
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
edr->root = t->header.tree_id;
|
|
edr->objid = td->key.obj_id;
|
|
edr->offset = td->key.offset;
|
|
edr->count = 1;
|
|
|
|
er2->type = TYPE_EXTENT_DATA_REF;
|
|
er2->data = edr;
|
|
er2->allocated = TRUE;
|
|
|
|
InsertTailList(&extent_refs, &er2->list_entry); // FIXME - list should be in order
|
|
}
|
|
}
|
|
}
|
|
|
|
le2 = le2->Flink;
|
|
}
|
|
}
|
|
|
|
free_tree(t);
|
|
|
|
RemoveEntryList(&er->list_entry);
|
|
|
|
if (er->allocated)
|
|
ExFreePool(er->data);
|
|
|
|
ExFreePool(er);
|
|
}
|
|
// FIXME - also do for SHARED_BLOCK_REF?
|
|
|
|
le = next_le;
|
|
}
|
|
|
|
if (IsListEmpty(&extent_refs)) {
|
|
WARN("no extent refs found\n");
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
len = 0;
|
|
count = 0;
|
|
le = extent_refs.Flink;
|
|
while (le != &extent_refs) {
|
|
extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
|
|
|
|
len++;
|
|
if (er->type == TYPE_TREE_BLOCK_REF) {
|
|
len += sizeof(TREE_BLOCK_REF);
|
|
} else if (er->type == TYPE_EXTENT_DATA_REF) {
|
|
len += sizeof(EXTENT_DATA_REF);
|
|
} else {
|
|
ERR("unexpected extent subitem %x\n", er->type);
|
|
}
|
|
|
|
count++;
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
newei = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM) + len, ALLOC_TAG);
|
|
if (!newei) {
|
|
ERR("out of memory\n");
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
RtlCopyMemory(newei, ei, sizeof(EXTENT_ITEM));
|
|
newei->refcount = count;
|
|
|
|
siptr = (UINT8*)&newei[1];
|
|
le = extent_refs.Flink;
|
|
while (le != &extent_refs) {
|
|
extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
|
|
|
|
*siptr = er->type;
|
|
siptr++;
|
|
|
|
if (er->type == TYPE_TREE_BLOCK_REF) {
|
|
RtlCopyMemory(siptr, er->data, sizeof(TREE_BLOCK_REF));
|
|
} else if (er->type == TYPE_EXTENT_DATA_REF) {
|
|
RtlCopyMemory(siptr, er->data, sizeof(EXTENT_DATA_REF));
|
|
} else {
|
|
ERR("unexpected extent subitem %x\n", er->type);
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
free_traverse_ptr(&tp);
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, newei, sizeof(EXTENT_ITEM) + len, NULL, rollback)) {
|
|
ERR("error - failed to insert item\n");
|
|
ExFreePool(newei);
|
|
free_extent_refs(&extent_refs);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
free_extent_refs(&extent_refs);
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len) {
|
|
UINT8* siptr = (UINT8*)&ei[1];
|
|
|
|
do {
|
|
if (*siptr == TYPE_TREE_BLOCK_REF) {
|
|
siptr += sizeof(TREE_BLOCK_REF) + 1;
|
|
len -= sizeof(TREE_BLOCK_REF) + 1;
|
|
} else if (*siptr == TYPE_EXTENT_DATA_REF) {
|
|
siptr += sizeof(EXTENT_DATA_REF) + 1;
|
|
len -= sizeof(EXTENT_DATA_REF) + 1;
|
|
} else if (*siptr == TYPE_SHARED_BLOCK_REF) {
|
|
return TRUE;
|
|
} else if (*siptr == TYPE_SHARED_DATA_REF) {
|
|
return TRUE;
|
|
} else {
|
|
ERR("unrecognized extent subitem %x\n", *siptr);
|
|
return FALSE;
|
|
}
|
|
} while (len > 0);
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
NTSTATUS STDCALL remove_extent_ref(device_extension* Vcb, UINT64 address, UINT64 size, root* subvol, UINT64 inode, UINT64 offset, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
|
|
KEY searchkey;
|
|
traverse_ptr tp;
|
|
EXTENT_ITEM* ei;
|
|
UINT8* siptr;
|
|
ULONG len;
|
|
EXTENT_DATA_REF* edr;
|
|
BOOL found;
|
|
NTSTATUS Status;
|
|
|
|
TRACE("(%p, %llx, %llx, %llx, %llx, %llx)\n", Vcb, address, size, subvol->id, inode, offset);
|
|
|
|
searchkey.obj_id = address;
|
|
searchkey.obj_type = TYPE_EXTENT_ITEM;
|
|
searchkey.offset = size;
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (keycmp(&tp.item->key, &searchkey)) {
|
|
WARN("extent item not found for address %llx, size %llx\n", address, size);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
if (tp.item->size == sizeof(EXTENT_ITEM_V0)) { // old extent ref, convert
|
|
NTSTATUS Status = convert_old_data_extent(Vcb, address, size, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("convert_old_data_extent returned %08x\n", Status);
|
|
free_traverse_ptr(&tp);
|
|
return Status;
|
|
}
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (keycmp(&tp.item->key, &searchkey)) {
|
|
WARN("extent item not found for address %llx, size %llx\n", address, size);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_SUCCESS;
|
|
}
|
|
}
|
|
|
|
ei = (EXTENT_ITEM*)tp.item->data;
|
|
|
|
if (tp.item->size < sizeof(EXTENT_ITEM)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
if (!(ei->flags & EXTENT_ITEM_DATA)) {
|
|
ERR("error - EXTENT_ITEM_DATA flag not set\n");
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
// FIXME - is ei->refcount definitely the number of items, or is it the sum of the subitem refcounts?
|
|
|
|
if (extent_item_is_shared(ei, tp.item->size - sizeof(EXTENT_ITEM))) {
|
|
NTSTATUS Status = convert_shared_data_extent(Vcb, address, size, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("convert_shared_data_extent returned %08x\n", Status);
|
|
free_traverse_ptr(&tp);
|
|
return Status;
|
|
}
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (keycmp(&tp.item->key, &searchkey)) {
|
|
WARN("extent item not found for address %llx, size %llx\n", address, size);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
ei = (EXTENT_ITEM*)tp.item->data;
|
|
}
|
|
|
|
len = tp.item->size - sizeof(EXTENT_ITEM);
|
|
siptr = (UINT8*)&ei[1];
|
|
found = FALSE;
|
|
|
|
do {
|
|
if (*siptr == TYPE_EXTENT_DATA_REF) {
|
|
edr = (EXTENT_DATA_REF*)&siptr[1];
|
|
|
|
if (edr->root == subvol->id && edr->objid == inode && edr->offset == offset) {
|
|
found = TRUE;
|
|
break;
|
|
}
|
|
|
|
siptr += sizeof(UINT8) + sizeof(EXTENT_DATA_REF);
|
|
|
|
if (len > sizeof(EXTENT_DATA_REF) + sizeof(UINT8)) {
|
|
len -= sizeof(EXTENT_DATA_REF) + sizeof(UINT8);
|
|
} else
|
|
break;
|
|
// // FIXME - TYPE_TREE_BLOCK_REF 0xB0
|
|
} else {
|
|
ERR("unrecognized extent subitem %x\n", *siptr);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
} while (len > 0);
|
|
|
|
if (!found) {
|
|
WARN("could not find extent data ref\n");
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
// FIXME - decrease subitem refcount if there already?
|
|
|
|
len = tp.item->size - sizeof(UINT8) - sizeof(EXTENT_DATA_REF);
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
if (len == sizeof(EXTENT_ITEM)) { // extent no longer needed
|
|
chunk* c;
|
|
LIST_ENTRY* le2;
|
|
|
|
if (changed_sector_list) {
|
|
changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
|
|
if (!sc) {
|
|
ERR("out of memory\n");
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
sc->ol.key = address;
|
|
sc->checksums = NULL;
|
|
sc->length = size / Vcb->superblock.sector_size;
|
|
|
|
sc->deleted = TRUE;
|
|
|
|
insert_into_ordered_list(changed_sector_list, &sc->ol);
|
|
}
|
|
|
|
c = NULL;
|
|
le2 = Vcb->chunks.Flink;
|
|
while (le2 != &Vcb->chunks) {
|
|
c = CONTAINING_RECORD(le2, chunk, list_entry);
|
|
|
|
TRACE("chunk: %llx, %llx\n", c->offset, c->chunk_item->size);
|
|
|
|
if (address >= c->offset && address + size < c->offset + c->chunk_item->size)
|
|
break;
|
|
|
|
le2 = le2->Flink;
|
|
}
|
|
if (le2 == &Vcb->chunks) c = NULL;
|
|
|
|
if (c) {
|
|
decrease_chunk_usage(c, size);
|
|
|
|
add_to_space_list(c, address, size, SPACE_TYPE_DELETING);
|
|
}
|
|
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
ei = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
|
|
if (!ei) {
|
|
ERR("out of memory\n");
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
RtlCopyMemory(ei, tp.item->data, siptr - tp.item->data);
|
|
ei->refcount--;
|
|
ei->generation = Vcb->superblock.generation;
|
|
|
|
if (tp.item->data + len != siptr)
|
|
RtlCopyMemory((UINT8*)ei + (siptr - tp.item->data), siptr + sizeof(UINT8) + sizeof(EXTENT_DATA_REF), tp.item->size - (siptr - tp.item->data) - sizeof(UINT8) - sizeof(EXTENT_DATA_REF));
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, len, NULL, rollback)) {
|
|
ERR("error - failed to insert item\n");
|
|
ExFreePool(ei);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
static __inline BOOL entry_in_ordered_list(LIST_ENTRY* list, UINT64 value) {
|
|
LIST_ENTRY* le = list->Flink;
|
|
ordered_list* ol;
|
|
|
|
while (le != list) {
|
|
ol = (ordered_list*)le;
|
|
|
|
if (ol->key > value)
|
|
return FALSE;
|
|
else if (ol->key == value)
|
|
return TRUE;
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
|
|
KEY searchkey;
|
|
traverse_ptr tp, next_tp;
|
|
NTSTATUS Status;
|
|
BOOL b;
|
|
|
|
TRACE("(%p, (%llx, %llx), %llx, %llx, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, end_data, changed_sector_list);
|
|
|
|
searchkey.obj_id = fcb->inode;
|
|
searchkey.obj_type = TYPE_EXTENT_DATA;
|
|
searchkey.offset = start_data;
|
|
|
|
Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
do {
|
|
EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data;
|
|
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
|
|
UINT64 len;
|
|
|
|
if (tp.item->size < sizeof(EXTENT_DATA)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
b = find_next_item(Vcb, &tp, &next_tp, FALSE);
|
|
|
|
len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
|
|
|
|
if (tp.item->key.offset < end_data && tp.item->key.offset + len >= start_data) {
|
|
if (ed->compression != BTRFS_COMPRESSION_NONE) {
|
|
FIXME("FIXME - compression not supported at present\n");
|
|
Status = STATUS_NOT_SUPPORTED;
|
|
goto end;
|
|
}
|
|
|
|
if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
|
|
WARN("root %llx, inode %llx, extent %llx: encryption not supported (type %x)\n", fcb->subvol->id, fcb->inode, tp.item->key.offset, ed->encryption);
|
|
Status = STATUS_NOT_SUPPORTED;
|
|
goto end;
|
|
}
|
|
|
|
if (ed->encoding != BTRFS_ENCODING_NONE) {
|
|
WARN("other encodings not supported\n");
|
|
Status = STATUS_NOT_SUPPORTED;
|
|
goto end;
|
|
}
|
|
|
|
if (ed->type == EXTENT_TYPE_INLINE) {
|
|
if (start_data <= tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove all
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
fcb->inode_item.st_blocks -= len;
|
|
} else if (start_data <= tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove beginning
|
|
EXTENT_DATA* ned;
|
|
UINT64 size;
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
size = len - (end_data - tp.item->key.offset);
|
|
|
|
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
|
|
if (!ned) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto end;
|
|
}
|
|
|
|
ned->generation = Vcb->superblock.generation;
|
|
ned->decoded_size = size;
|
|
ned->compression = ed->compression;
|
|
ned->encryption = ed->encryption;
|
|
ned->encoding = ed->encoding;
|
|
ned->type = ed->type;
|
|
|
|
RtlCopyMemory(&ned->data[0], &ed->data[end_data - tp.item->key.offset], size);
|
|
|
|
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(ned);
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
fcb->inode_item.st_blocks -= end_data - tp.item->key.offset;
|
|
} else if (start_data > tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove end
|
|
EXTENT_DATA* ned;
|
|
UINT64 size;
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
size = start_data - tp.item->key.offset;
|
|
|
|
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
|
|
if (!ned) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto end;
|
|
}
|
|
|
|
ned->generation = Vcb->superblock.generation;
|
|
ned->decoded_size = size;
|
|
ned->compression = ed->compression;
|
|
ned->encryption = ed->encryption;
|
|
ned->encoding = ed->encoding;
|
|
ned->type = ed->type;
|
|
|
|
RtlCopyMemory(&ned->data[0], &ed->data[0], size);
|
|
|
|
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(ned);
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
fcb->inode_item.st_blocks -= tp.item->key.offset + len - start_data;
|
|
} else if (start_data > tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove middle
|
|
EXTENT_DATA* ned;
|
|
UINT64 size;
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
size = start_data - tp.item->key.offset;
|
|
|
|
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
|
|
if (!ned) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto end;
|
|
}
|
|
|
|
ned->generation = Vcb->superblock.generation;
|
|
ned->decoded_size = size;
|
|
ned->compression = ed->compression;
|
|
ned->encryption = ed->encryption;
|
|
ned->encoding = ed->encoding;
|
|
ned->type = ed->type;
|
|
|
|
RtlCopyMemory(&ned->data[0], &ed->data[0], size);
|
|
|
|
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(ned);
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
size = tp.item->key.offset + len - end_data;
|
|
|
|
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
|
|
if (!ned) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto end;
|
|
}
|
|
|
|
ned->generation = Vcb->superblock.generation;
|
|
ned->decoded_size = size;
|
|
ned->compression = ed->compression;
|
|
ned->encryption = ed->encryption;
|
|
ned->encoding = ed->encoding;
|
|
ned->type = ed->type;
|
|
|
|
RtlCopyMemory(&ned->data[0], &ed->data[end_data - tp.item->key.offset], size);
|
|
|
|
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(ned);
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
fcb->inode_item.st_blocks -= end_data - start_data;
|
|
}
|
|
} else if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
|
|
if (start_data <= tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove all
|
|
if (ed2->address != 0) {
|
|
Status = remove_extent_ref(Vcb, ed2->address, ed2->size, fcb->subvol, fcb->inode, tp.item->key.offset - ed2->offset, changed_sector_list, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("remove_extent_ref returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
|
|
fcb->inode_item.st_blocks -= len;
|
|
}
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
} else if (start_data <= tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove beginning
|
|
EXTENT_DATA* ned;
|
|
EXTENT_DATA2* ned2;
|
|
|
|
if (ed2->address != 0)
|
|
fcb->inode_item.st_blocks -= end_data - tp.item->key.offset;
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
|
|
if (!ned) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto end;
|
|
}
|
|
|
|
ned2 = (EXTENT_DATA2*)&ned->data[0];
|
|
|
|
ned->generation = Vcb->superblock.generation;
|
|
ned->decoded_size = ed->decoded_size;
|
|
ned->compression = ed->compression;
|
|
ned->encryption = ed->encryption;
|
|
ned->encoding = ed->encoding;
|
|
ned->type = ed->type;
|
|
ned2->address = ed2->address;
|
|
ned2->size = ed2->size;
|
|
ned2->offset = ed2->address == 0 ? 0 : (ed2->offset + (end_data - tp.item->key.offset));
|
|
ned2->num_bytes = ed2->num_bytes - (end_data - tp.item->key.offset);
|
|
|
|
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(ned);
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
} else if (start_data > tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove end
|
|
EXTENT_DATA* ned;
|
|
EXTENT_DATA2* ned2;
|
|
|
|
if (ed2->address != 0)
|
|
fcb->inode_item.st_blocks -= tp.item->key.offset + len - start_data;
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
|
|
if (!ned) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto end;
|
|
}
|
|
|
|
ned2 = (EXTENT_DATA2*)&ned->data[0];
|
|
|
|
ned->generation = Vcb->superblock.generation;
|
|
ned->decoded_size = ed->decoded_size;
|
|
ned->compression = ed->compression;
|
|
ned->encryption = ed->encryption;
|
|
ned->encoding = ed->encoding;
|
|
ned->type = ed->type;
|
|
ned2->address = ed2->address;
|
|
ned2->size = ed2->size;
|
|
ned2->offset = ed2->address == 0 ? 0 : ed2->offset;
|
|
ned2->num_bytes = start_data - tp.item->key.offset;
|
|
|
|
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(ned);
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
} else if (start_data > tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove middle
|
|
EXTENT_DATA* ned;
|
|
EXTENT_DATA2* ned2;
|
|
|
|
if (ed2->address != 0)
|
|
fcb->inode_item.st_blocks -= end_data - start_data;
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
|
|
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
|
|
if (!ned) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto end;
|
|
}
|
|
|
|
ned2 = (EXTENT_DATA2*)&ned->data[0];
|
|
|
|
ned->generation = Vcb->superblock.generation;
|
|
ned->decoded_size = ed->decoded_size;
|
|
ned->compression = ed->compression;
|
|
ned->encryption = ed->encryption;
|
|
ned->encoding = ed->encoding;
|
|
ned->type = ed->type;
|
|
ned2->address = ed2->address;
|
|
ned2->size = ed2->size;
|
|
ned2->offset = ed2->address == 0 ? 0 : ed2->offset;
|
|
ned2->num_bytes = start_data - tp.item->key.offset;
|
|
|
|
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(ned);
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
|
|
if (!ned) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto end;
|
|
}
|
|
|
|
ned2 = (EXTENT_DATA2*)&ned->data[0];
|
|
|
|
ned->generation = Vcb->superblock.generation;
|
|
ned->decoded_size = ed->decoded_size;
|
|
ned->compression = ed->compression;
|
|
ned->encryption = ed->encryption;
|
|
ned->encoding = ed->encoding;
|
|
ned->type = ed->type;
|
|
ned2->address = ed2->address;
|
|
ned2->size = ed2->size;
|
|
ned2->offset = ed2->address == 0 ? 0 : (ed2->offset + (end_data - tp.item->key.offset));
|
|
ned2->num_bytes = tp.item->key.offset + len - end_data;
|
|
|
|
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(ned);
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (b) {
|
|
free_traverse_ptr(&tp);
|
|
tp = next_tp;
|
|
|
|
if (tp.item->key.obj_id > fcb->inode || tp.item->key.obj_type > TYPE_EXTENT_DATA || tp.item->key.offset >= end_data)
|
|
break;
|
|
}
|
|
} while (b);
|
|
|
|
// FIXME - do bitmap analysis of changed extents, and free what we can
|
|
|
|
Status = STATUS_SUCCESS;
|
|
|
|
end:
|
|
free_traverse_ptr(&tp);
|
|
|
|
return Status;
|
|
}
|
|
|
|
static BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
|
|
UINT64 address;
|
|
NTSTATUS Status;
|
|
EXTENT_ITEM_DATA_REF* eidr;
|
|
EXTENT_DATA* ed;
|
|
EXTENT_DATA2* ed2;
|
|
ULONG edsize = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
|
|
changed_sector* sc;
|
|
traverse_ptr tp;
|
|
int i;
|
|
|
|
TRACE("(%p, (%llx, %llx), %llx, %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, c->offset, start_data, length, data, changed_sector_list);
|
|
|
|
if (!find_address_in_chunk(Vcb, c, length, &address))
|
|
return FALSE;
|
|
|
|
eidr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_DATA_REF), ALLOC_TAG);
|
|
if (!eidr) {
|
|
ERR("out of memory\n");
|
|
return FALSE;
|
|
}
|
|
|
|
eidr->ei.refcount = 1;
|
|
eidr->ei.generation = Vcb->superblock.generation;
|
|
eidr->ei.flags = EXTENT_ITEM_DATA;
|
|
eidr->type = TYPE_EXTENT_DATA_REF;
|
|
eidr->edr.root = fcb->subvol->id;
|
|
eidr->edr.objid = fcb->inode;
|
|
eidr->edr.offset = start_data;
|
|
eidr->edr.count = 1;
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, length, eidr, sizeof(EXTENT_ITEM_DATA_REF), &tp, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(eidr);
|
|
return FALSE;
|
|
}
|
|
|
|
tp.tree->header.generation = eidr->ei.generation;
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
Status = write_data(Vcb, address, data, length);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("write_data returned %08x\n", Status);
|
|
return FALSE;
|
|
}
|
|
|
|
if (changed_sector_list) {
|
|
sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
|
|
if (!sc) {
|
|
ERR("out of memory\n");
|
|
return FALSE;
|
|
}
|
|
|
|
sc->ol.key = address;
|
|
sc->length = length / Vcb->superblock.sector_size;
|
|
sc->deleted = FALSE;
|
|
|
|
sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
|
|
if (!sc->checksums) {
|
|
ERR("out of memory\n");
|
|
ExFreePool(sc);
|
|
return FALSE;
|
|
}
|
|
|
|
for (i = 0; i < sc->length; i++) {
|
|
sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
|
|
}
|
|
|
|
insert_into_ordered_list(changed_sector_list, &sc->ol);
|
|
}
|
|
|
|
// add extent data to inode
|
|
ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
|
|
if (!ed) {
|
|
ERR("out of memory\n");
|
|
return FALSE;
|
|
}
|
|
|
|
ed->generation = Vcb->superblock.generation;
|
|
ed->decoded_size = length;
|
|
ed->compression = BTRFS_COMPRESSION_NONE;
|
|
ed->encryption = BTRFS_ENCRYPTION_NONE;
|
|
ed->encoding = BTRFS_ENCODING_NONE;
|
|
ed->type = EXTENT_TYPE_REGULAR;
|
|
|
|
ed2 = (EXTENT_DATA2*)ed->data;
|
|
ed2->address = address;
|
|
ed2->size = length;
|
|
ed2->offset = 0;
|
|
ed2->num_bytes = length;
|
|
|
|
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start_data, ed, edsize, NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(ed);
|
|
return FALSE;
|
|
}
|
|
|
|
increase_chunk_usage(c, length);
|
|
add_to_space_list(c, address, length, SPACE_TYPE_WRITING);
|
|
|
|
fcb->inode_item.st_blocks += length;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static BOOL extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data,
|
|
LIST_ENTRY* changed_sector_list, traverse_ptr* edtp, traverse_ptr* eitp, LIST_ENTRY* rollback) {
|
|
EXTENT_DATA* ed;
|
|
EXTENT_DATA2* ed2;
|
|
EXTENT_ITEM* ei;
|
|
NTSTATUS Status;
|
|
changed_sector* sc;
|
|
chunk* c;
|
|
int i;
|
|
|
|
TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data,
|
|
length, data, changed_sector_list, edtp, eitp);
|
|
|
|
ed = ExAllocatePoolWithTag(PagedPool, edtp->item->size, ALLOC_TAG);
|
|
if (!ed) {
|
|
ERR("out of memory\n");
|
|
return FALSE;
|
|
}
|
|
|
|
RtlCopyMemory(ed, edtp->item->data, edtp->item->size);
|
|
|
|
ed->decoded_size += length;
|
|
ed2 = (EXTENT_DATA2*)ed->data;
|
|
ed2->size += length;
|
|
ed2->num_bytes += length;
|
|
|
|
delete_tree_item(Vcb, edtp, rollback);
|
|
|
|
if (!insert_tree_item(Vcb, fcb->subvol, edtp->item->key.obj_id, edtp->item->key.obj_type, edtp->item->key.offset, ed, edtp->item->size, NULL, rollback)) {
|
|
TRACE("insert_tree_item failed\n");
|
|
|
|
ExFreePool(ed);
|
|
return FALSE;
|
|
}
|
|
|
|
ei = ExAllocatePoolWithTag(PagedPool, eitp->item->size, ALLOC_TAG);
|
|
if (!ei) {
|
|
ERR("out of memory\n");
|
|
ExFreePool(ed);
|
|
return FALSE;
|
|
}
|
|
|
|
RtlCopyMemory(ei, eitp->item->data, eitp->item->size);
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->extent_root, eitp->item->key.obj_id, eitp->item->key.obj_type, eitp->item->key.offset + length, ei, eitp->item->size, NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
|
|
ExFreePool(ei);
|
|
return FALSE;
|
|
}
|
|
|
|
delete_tree_item(Vcb, eitp, rollback);
|
|
|
|
Status = write_data(Vcb, eitp->item->key.obj_id + eitp->item->key.offset, data, length);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("write_data returned %08x\n", Status);
|
|
return FALSE;
|
|
}
|
|
|
|
if (changed_sector_list) {
|
|
sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
|
|
if (!sc) {
|
|
ERR("out of memory\n");
|
|
return FALSE;
|
|
}
|
|
|
|
sc->ol.key = eitp->item->key.obj_id + eitp->item->key.offset;
|
|
sc->length = length / Vcb->superblock.sector_size;
|
|
sc->deleted = FALSE;
|
|
|
|
sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
|
|
if (!sc->checksums) {
|
|
ERR("out of memory\n");
|
|
ExFreePool(sc);
|
|
return FALSE;
|
|
}
|
|
|
|
for (i = 0; i < sc->length; i++) {
|
|
sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
|
|
}
|
|
insert_into_ordered_list(changed_sector_list, &sc->ol);
|
|
}
|
|
|
|
c = get_chunk_from_address(Vcb, eitp->item->key.obj_id);
|
|
|
|
if (c) {
|
|
increase_chunk_usage(c, length);
|
|
|
|
add_to_space_list(c, eitp->item->key.obj_id + eitp->item->key.offset, length, SPACE_TYPE_WRITING);
|
|
}
|
|
|
|
fcb->inode_item.st_blocks += length;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static BOOL try_extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data,
|
|
LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
|
|
KEY searchkey;
|
|
traverse_ptr tp, tp2;
|
|
BOOL success = FALSE;
|
|
EXTENT_DATA* ed;
|
|
EXTENT_DATA2* ed2;
|
|
EXTENT_ITEM* ei;
|
|
chunk* c;
|
|
LIST_ENTRY* le;
|
|
space* s;
|
|
NTSTATUS Status;
|
|
|
|
searchkey.obj_id = fcb->inode;
|
|
searchkey.obj_type = TYPE_EXTENT_DATA;
|
|
searchkey.offset = start_data;
|
|
|
|
Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return FALSE;
|
|
}
|
|
|
|
if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset >= start_data) {
|
|
WARN("previous EXTENT_DATA not found\n");
|
|
goto end;
|
|
}
|
|
|
|
ed = (EXTENT_DATA*)tp.item->data;
|
|
|
|
if (tp.item->size < sizeof(EXTENT_DATA)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
|
|
goto end;
|
|
}
|
|
|
|
if (ed->type != EXTENT_TYPE_REGULAR) {
|
|
TRACE("not extending extent which is not EXTENT_TYPE_REGULAR\n");
|
|
goto end;
|
|
}
|
|
|
|
ed2 = (EXTENT_DATA2*)ed->data;
|
|
|
|
if (tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
|
|
goto end;
|
|
}
|
|
|
|
if (tp.item->key.offset + ed2->num_bytes != start_data) {
|
|
TRACE("last EXTENT_DATA does not run up to start_data (%llx + %llx != %llx)\n", tp.item->key.offset, ed2->num_bytes, start_data);
|
|
goto end;
|
|
}
|
|
|
|
if (ed->compression != BTRFS_COMPRESSION_NONE) {
|
|
FIXME("FIXME: compression not yet supported\n");
|
|
goto end;
|
|
}
|
|
|
|
if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
|
|
WARN("encryption not supported\n");
|
|
goto end;
|
|
}
|
|
|
|
if (ed->encoding != BTRFS_ENCODING_NONE) {
|
|
WARN("other encodings not supported\n");
|
|
goto end;
|
|
}
|
|
|
|
if (ed2->size - ed2->offset != ed2->num_bytes) {
|
|
TRACE("last EXTENT_DATA does not run all the way to the end of the extent\n");
|
|
goto end;
|
|
}
|
|
|
|
searchkey.obj_id = ed2->address;
|
|
searchkey.obj_type = TYPE_EXTENT_ITEM;
|
|
searchkey.offset = ed2->size;
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
|
|
if (keycmp(&tp2.item->key, &searchkey)) {
|
|
ERR("error - extent %llx,%llx not found in tree\n", ed2->address, ed2->size);
|
|
int3; // TESTING
|
|
goto end2;
|
|
}
|
|
|
|
if (tp2.item->size == sizeof(EXTENT_ITEM_V0)) { // old extent ref, convert
|
|
NTSTATUS Status = convert_old_data_extent(Vcb, ed2->address, ed2->size, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("convert_old_data_extent returned %08x\n", Status);
|
|
goto end2;
|
|
}
|
|
|
|
free_traverse_ptr(&tp2);
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
|
|
if (keycmp(&tp2.item->key, &searchkey)) {
|
|
WARN("extent item not found for address %llx, size %llx\n", ed2->address, ed2->size);
|
|
goto end2;
|
|
}
|
|
}
|
|
|
|
ei = (EXTENT_ITEM*)tp2.item->data;
|
|
|
|
if (tp.item->size < sizeof(EXTENT_ITEM)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
|
|
goto end2;
|
|
}
|
|
|
|
// FIXME - test this
|
|
if (extent_item_is_shared(ei, tp2.item->size - sizeof(EXTENT_ITEM))) {
|
|
NTSTATUS Status = convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("convert_shared_data_extent returned %08x\n", Status);
|
|
goto end2;
|
|
}
|
|
|
|
free_traverse_ptr(&tp2);
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
|
|
if (keycmp(&tp2.item->key, &searchkey)) {
|
|
WARN("extent item not found for address %llx, size %llx\n", ed2->address, ed2->size);
|
|
goto end2;
|
|
}
|
|
|
|
ei = (EXTENT_ITEM*)tp2.item->data;
|
|
|
|
if (tp.item->size < sizeof(EXTENT_ITEM)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
|
|
goto end2;
|
|
}
|
|
}
|
|
|
|
if (ei->refcount != 1) {
|
|
TRACE("extent refcount was not 1\n");
|
|
goto end2;
|
|
}
|
|
|
|
if (ei->flags != EXTENT_ITEM_DATA) {
|
|
ERR("error - extent was not a data extent\n");
|
|
goto end2;
|
|
}
|
|
|
|
c = get_chunk_from_address(Vcb, ed2->address);
|
|
|
|
le = c->space.Flink;
|
|
while (le != &c->space) {
|
|
s = CONTAINING_RECORD(le, space, list_entry);
|
|
|
|
if (s->offset == ed2->address + ed2->size) {
|
|
if (s->type == SPACE_TYPE_FREE && s->size >= length) {
|
|
success = extend_data(Vcb, fcb, start_data, length, data, changed_sector_list, &tp, &tp2, rollback);
|
|
}
|
|
break;
|
|
} else if (s->offset > ed2->address + ed2->size)
|
|
break;
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
end2:
|
|
free_traverse_ptr(&tp2);
|
|
|
|
end:
|
|
free_traverse_ptr(&tp);
|
|
|
|
return success;
|
|
}
|
|
|
|
NTSTATUS insert_sparse_extent(device_extension* Vcb, root* r, UINT64 inode, UINT64 start, UINT64 length, LIST_ENTRY* rollback) {
|
|
EXTENT_DATA* ed;
|
|
EXTENT_DATA2* ed2;
|
|
|
|
TRACE("(%p, %llx, %llx, %llx, %llx)\n", Vcb, r->id, inode, start, length);
|
|
|
|
ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
|
|
if (!ed) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
ed->generation = Vcb->superblock.generation;
|
|
ed->decoded_size = length;
|
|
ed->compression = BTRFS_COMPRESSION_NONE;
|
|
ed->encryption = BTRFS_ENCRYPTION_NONE;
|
|
ed->encoding = BTRFS_ENCODING_NONE;
|
|
ed->type = EXTENT_TYPE_REGULAR;
|
|
|
|
ed2 = (EXTENT_DATA2*)ed->data;
|
|
ed2->address = 0;
|
|
ed2->size = 0;
|
|
ed2->offset = 0;
|
|
ed2->num_bytes = length;
|
|
|
|
if (!insert_tree_item(Vcb, r, inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(ed);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
// static void print_tree(tree* t) {
|
|
// LIST_ENTRY* le = t->itemlist.Flink;
|
|
// while (le != &t->itemlist) {
|
|
// tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
|
|
// ERR("%llx,%x,%llx (ignore = %s)\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->ignore ? "TRUE" : "FALSE");
|
|
// le = le->Flink;
|
|
// }
|
|
// }
|
|
|
|
static NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
|
|
LIST_ENTRY* le = Vcb->chunks.Flink;
|
|
chunk* c;
|
|
KEY searchkey;
|
|
UINT64 flags;
|
|
|
|
TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data, changed_sector_list);
|
|
|
|
// FIXME - split data up if not enough space for just one extent
|
|
|
|
if (start_data > 0 && try_extend_data(Vcb, fcb, start_data, length, data, changed_sector_list, rollback))
|
|
return STATUS_SUCCESS;
|
|
|
|
// if there is a gap before start_data, plug it with a sparse extent
|
|
if (start_data > 0) {
|
|
traverse_ptr tp;
|
|
NTSTATUS Status;
|
|
EXTENT_DATA* ed;
|
|
UINT64 len;
|
|
|
|
searchkey.obj_id = fcb->inode;
|
|
searchkey.obj_type = TYPE_EXTENT_DATA;
|
|
searchkey.offset = start_data;
|
|
|
|
Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
// if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset >= start_data) {
|
|
// traverse_ptr next_tp;
|
|
//
|
|
// ERR("error - did not find EXTENT_DATA expected - looking for %llx,%x,%llx, found %llx,%x,%llx\n",
|
|
// searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
|
|
// print_tree(tp.tree);
|
|
//
|
|
// if (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
|
|
// ERR("---\n");
|
|
// ERR("key = %llx,%x,%llx\n", next_tp.tree->paritem->key.obj_id, next_tp.tree->paritem->key.obj_type, next_tp.tree->paritem->key.offset);
|
|
// print_tree(next_tp.tree);
|
|
//
|
|
// free_traverse_ptr(&next_tp);
|
|
// } else
|
|
// ERR("next item not found\n");
|
|
//
|
|
// int3;
|
|
// free_traverse_ptr(&tp);
|
|
// return STATUS_INTERNAL_ERROR;
|
|
// }
|
|
|
|
if (tp.item->key.obj_type == TYPE_EXTENT_DATA && tp.item->size >= sizeof(EXTENT_DATA)) {
|
|
EXTENT_DATA2* ed2;
|
|
|
|
ed = (EXTENT_DATA*)tp.item->data;
|
|
ed2 = (EXTENT_DATA2*)ed->data;
|
|
|
|
len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
|
|
} else
|
|
ed = NULL;
|
|
|
|
if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || !ed || tp.item->key.offset + len < start_data) {
|
|
if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA)
|
|
Status = insert_sparse_extent(Vcb, fcb->subvol, fcb->inode, 0, start_data, rollback);
|
|
else if (!ed)
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
|
|
else {
|
|
Status = insert_sparse_extent(Vcb, fcb->subvol, fcb->inode, tp.item->key.offset + len,
|
|
start_data - tp.item->key.offset - len, rollback);
|
|
}
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("insert_sparse_extent returned %08x\n", Status);
|
|
free_traverse_ptr(&tp);
|
|
return Status;
|
|
}
|
|
}
|
|
|
|
free_traverse_ptr(&tp);
|
|
}
|
|
|
|
// FIXME - how do we know which RAID level to put this to?
|
|
flags = BLOCK_FLAG_DATA; // SINGLE
|
|
|
|
// if (!chunk_test) { // TESTING
|
|
// if ((c = alloc_chunk(Vcb, flags, NULL))) {
|
|
// ERR("chunk_item->type = %llx\n", c->chunk_item->type);
|
|
// ERR("size = %llx\n", c->chunk_item->size);
|
|
// ERR("used = %llx\n", c->used);
|
|
//
|
|
// if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
|
|
// if (insert_extent_chunk(Vcb, fcb, c, start_data, length, data, changed_sector_list)) {
|
|
// // chunk_test = TRUE;
|
|
// ERR("SUCCESS\n");
|
|
// return STATUS_SUCCESS;
|
|
// } else
|
|
// ERR(":-(\n");
|
|
// } else
|
|
// ERR("???\n");
|
|
// }
|
|
// }
|
|
|
|
while (le != &Vcb->chunks) {
|
|
c = CONTAINING_RECORD(le, chunk, list_entry);
|
|
|
|
if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
|
|
if (insert_extent_chunk(Vcb, fcb, c, start_data, length, data, changed_sector_list, rollback))
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
if ((c = alloc_chunk(Vcb, flags, rollback))) {
|
|
if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
|
|
if (insert_extent_chunk(Vcb, fcb, c, start_data, length, data, changed_sector_list, rollback))
|
|
return STATUS_SUCCESS;
|
|
}
|
|
}
|
|
|
|
// FIXME - rebalance chunks if free space elsewhere?
|
|
WARN("couldn't find any data chunks with %llx bytes free\n", length);
|
|
|
|
return STATUS_DISK_FULL;
|
|
}
|
|
|
|
void update_checksum_tree(device_extension* Vcb, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
|
|
LIST_ENTRY* le = changed_sector_list->Flink;
|
|
changed_sector* cs;
|
|
traverse_ptr tp, next_tp;
|
|
KEY searchkey;
|
|
UINT32* data;
|
|
NTSTATUS Status;
|
|
|
|
if (!Vcb->checksum_root) {
|
|
ERR("no checksum root\n");
|
|
goto exit;
|
|
}
|
|
|
|
while (le != changed_sector_list) {
|
|
UINT64 startaddr, endaddr;
|
|
ULONG len;
|
|
UINT32* checksums;
|
|
RTL_BITMAP bmp;
|
|
ULONG* bmparr;
|
|
ULONG runlength, index;
|
|
|
|
cs = (changed_sector*)le;
|
|
|
|
searchkey.obj_id = EXTENT_CSUM_ID;
|
|
searchkey.obj_type = TYPE_EXTENT_CSUM;
|
|
searchkey.offset = cs->ol.key;
|
|
|
|
// FIXME - create checksum_root if it doesn't exist at all
|
|
|
|
Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) { // tree is completely empty
|
|
// FIXME - do proper check here that tree is empty
|
|
if (!cs->deleted) {
|
|
checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * cs->length, ALLOC_TAG);
|
|
if (!checksums) {
|
|
ERR("out of memory\n");
|
|
goto exit;
|
|
}
|
|
|
|
RtlCopyMemory(checksums, cs->checksums, sizeof(UINT32) * cs->length);
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, cs->ol.key, checksums, sizeof(UINT32) * cs->length, NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(checksums);
|
|
goto exit;
|
|
}
|
|
}
|
|
} else {
|
|
UINT32 tplen;
|
|
|
|
// FIXME - check entry is TYPE_EXTENT_CSUM?
|
|
|
|
if (tp.item->key.offset < cs->ol.key && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= cs->ol.key)
|
|
startaddr = tp.item->key.offset;
|
|
else
|
|
startaddr = cs->ol.key;
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
searchkey.obj_id = EXTENT_CSUM_ID;
|
|
searchkey.obj_type = TYPE_EXTENT_CSUM;
|
|
searchkey.offset = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
|
|
|
|
Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
goto exit;
|
|
}
|
|
|
|
tplen = tp.item->size / sizeof(UINT32);
|
|
|
|
if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= cs->ol.key + (cs->length * Vcb->superblock.sector_size))
|
|
endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size);
|
|
else
|
|
endaddr = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
TRACE("cs starts at %llx (%x sectors)\n", cs->ol.key, cs->length);
|
|
TRACE("startaddr = %llx\n", startaddr);
|
|
TRACE("endaddr = %llx\n", endaddr);
|
|
|
|
len = (endaddr - startaddr) / Vcb->superblock.sector_size;
|
|
|
|
checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * len, ALLOC_TAG);
|
|
if (!checksums) {
|
|
ERR("out of memory\n");
|
|
goto exit;
|
|
}
|
|
|
|
bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
|
|
if (!bmparr) {
|
|
ERR("out of memory\n");
|
|
ExFreePool(checksums);
|
|
goto exit;
|
|
}
|
|
|
|
RtlInitializeBitMap(&bmp, bmparr, len);
|
|
RtlSetAllBits(&bmp);
|
|
|
|
searchkey.obj_id = EXTENT_CSUM_ID;
|
|
searchkey.obj_type = TYPE_EXTENT_CSUM;
|
|
searchkey.offset = cs->ol.key;
|
|
|
|
Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
goto exit;
|
|
}
|
|
|
|
// set bit = free space, cleared bit = allocated sector
|
|
|
|
// ERR("start loop\n");
|
|
while (tp.item->key.offset < endaddr) {
|
|
// ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
|
|
if (tp.item->key.offset >= startaddr) {
|
|
if (tp.item->size > 0) {
|
|
RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, tp.item->size);
|
|
RtlClearBits(&bmp, (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size, tp.item->size / sizeof(UINT32));
|
|
}
|
|
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
}
|
|
|
|
if (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
|
|
free_traverse_ptr(&tp);
|
|
tp = next_tp;
|
|
} else
|
|
break;
|
|
}
|
|
// ERR("end loop\n");
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
if (cs->deleted) {
|
|
RtlSetBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
|
|
} else {
|
|
RtlCopyMemory(&checksums[(cs->ol.key - startaddr) / Vcb->superblock.sector_size], cs->checksums, cs->length * sizeof(UINT32));
|
|
RtlClearBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
|
|
}
|
|
|
|
runlength = RtlFindFirstRunClear(&bmp, &index);
|
|
|
|
while (runlength != 0) {
|
|
do {
|
|
ULONG rl;
|
|
|
|
if (runlength * sizeof(UINT32) > MAX_CSUM_SIZE)
|
|
rl = MAX_CSUM_SIZE / sizeof(UINT32);
|
|
else
|
|
rl = runlength;
|
|
|
|
data = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * rl, ALLOC_TAG);
|
|
if (!data) {
|
|
ERR("out of memory\n");
|
|
ExFreePool(bmparr);
|
|
ExFreePool(checksums);
|
|
goto exit;
|
|
}
|
|
|
|
RtlCopyMemory(data, &checksums[index], sizeof(UINT32) * rl);
|
|
|
|
if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, startaddr + (index * Vcb->superblock.sector_size), data, sizeof(UINT32) * rl, NULL, rollback)) {
|
|
ERR("insert_tree_item failed\n");
|
|
ExFreePool(data);
|
|
ExFreePool(bmparr);
|
|
ExFreePool(checksums);
|
|
goto exit;
|
|
}
|
|
|
|
runlength -= rl;
|
|
index += rl;
|
|
} while (runlength > 0);
|
|
|
|
runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
|
|
}
|
|
|
|
ExFreePool(bmparr);
|
|
ExFreePool(checksums);
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
exit:
|
|
while (!IsListEmpty(changed_sector_list)) {
|
|
le = RemoveHeadList(changed_sector_list);
|
|
cs = (changed_sector*)le;
|
|
|
|
if (cs->checksums)
|
|
ExFreePool(cs->checksums);
|
|
|
|
ExFreePool(cs);
|
|
}
|
|
}
|
|
|
|
NTSTATUS truncate_file(fcb* fcb, UINT64 end, LIST_ENTRY* rollback) {
|
|
LIST_ENTRY changed_sector_list;
|
|
NTSTATUS Status;
|
|
BOOL nocsum = fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
|
|
|
|
if (!nocsum)
|
|
InitializeListHead(&changed_sector_list);
|
|
|
|
// FIXME - convert into inline extent if short enough
|
|
|
|
Status = excise_extents(fcb->Vcb, fcb, sector_align(end, fcb->Vcb->superblock.sector_size),
|
|
sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), nocsum ? NULL : &changed_sector_list, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - excise_extents failed\n");
|
|
return Status;
|
|
}
|
|
|
|
fcb->inode_item.st_size = end;
|
|
TRACE("setting st_size to %llx\n", end);
|
|
|
|
fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
|
|
fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
|
|
fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
|
|
// FIXME - inform cache manager of this
|
|
|
|
TRACE("fcb %p FileSize = %llx\n", fcb, fcb->Header.FileSize.QuadPart);
|
|
|
|
if (!nocsum)
|
|
update_checksum_tree(fcb->Vcb, &changed_sector_list, rollback);
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
NTSTATUS extend_file(fcb* fcb, UINT64 end, LIST_ENTRY* rollback) {
|
|
UINT64 oldalloc, newalloc;
|
|
KEY searchkey;
|
|
traverse_ptr tp;
|
|
BOOL cur_inline;
|
|
NTSTATUS Status;
|
|
|
|
TRACE("(%p, %x, %p)\n", fcb, end, rollback);
|
|
|
|
if (fcb->ads) {
|
|
FIXME("FIXME - support streams here\n"); // FIXME
|
|
return STATUS_NOT_IMPLEMENTED;
|
|
} else {
|
|
searchkey.obj_id = fcb->inode;
|
|
searchkey.obj_type = TYPE_EXTENT_DATA;
|
|
searchkey.offset = 0xffffffffffffffff;
|
|
|
|
Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
oldalloc = 0;
|
|
if (tp.item->key.obj_id == fcb->inode && tp.item->key.obj_type == TYPE_EXTENT_DATA) {
|
|
EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data;
|
|
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
|
|
|
|
if (tp.item->size < sizeof(EXTENT_DATA)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
oldalloc = tp.item->key.offset + (ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes);
|
|
cur_inline = ed->type == EXTENT_TYPE_INLINE;
|
|
|
|
if (cur_inline && end > fcb->Vcb->max_inline) {
|
|
LIST_ENTRY changed_sector_list;
|
|
BOOL nocsum = fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
|
|
UINT64 origlength, length;
|
|
UINT8* data;
|
|
|
|
TRACE("giving inline file proper extents\n");
|
|
|
|
origlength = ed->decoded_size;
|
|
|
|
cur_inline = FALSE;
|
|
|
|
if (!nocsum)
|
|
InitializeListHead(&changed_sector_list);
|
|
|
|
delete_tree_item(fcb->Vcb, &tp, rollback);
|
|
|
|
length = sector_align(origlength, fcb->Vcb->superblock.sector_size);
|
|
|
|
data = ExAllocatePoolWithTag(PagedPool, length, ALLOC_TAG);
|
|
if (!data) {
|
|
ERR("could not allocate %llx bytes for data\n", length);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
if (length > origlength)
|
|
RtlZeroMemory(data + origlength, length - origlength);
|
|
|
|
RtlCopyMemory(data, ed->data, origlength);
|
|
|
|
fcb->inode_item.st_blocks -= origlength;
|
|
|
|
Status = insert_extent(fcb->Vcb, fcb, tp.item->key.offset, length, data, nocsum ? NULL : &changed_sector_list, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("insert_extent returned %08x\n", Status);
|
|
free_traverse_ptr(&tp);
|
|
ExFreePool(data);
|
|
return Status;
|
|
}
|
|
|
|
oldalloc = tp.item->key.offset + length;
|
|
|
|
ExFreePool(data);
|
|
|
|
if (!nocsum)
|
|
update_checksum_tree(fcb->Vcb, &changed_sector_list, rollback);
|
|
}
|
|
|
|
if (cur_inline) {
|
|
ULONG edsize;
|
|
|
|
if (end > oldalloc) {
|
|
edsize = sizeof(EXTENT_DATA) - 1 + end - tp.item->key.offset;
|
|
ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
|
|
|
|
if (!ed) {
|
|
ERR("out of memory\n");
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
RtlZeroMemory(ed, edsize);
|
|
RtlCopyMemory(ed, tp.item->data, tp.item->size);
|
|
|
|
ed->decoded_size = end - tp.item->key.offset;
|
|
|
|
delete_tree_item(fcb->Vcb, &tp, rollback);
|
|
|
|
if (!insert_tree_item(fcb->Vcb, fcb->subvol, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ed, edsize, NULL, rollback)) {
|
|
ERR("error - failed to insert item\n");
|
|
ExFreePool(ed);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
}
|
|
|
|
TRACE("extending inline file (oldalloc = %llx, end = %llx)\n", oldalloc, end);
|
|
|
|
fcb->inode_item.st_size = end;
|
|
TRACE("setting st_size to %llx\n", end);
|
|
|
|
fcb->inode_item.st_blocks = end;
|
|
|
|
fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
|
|
} else {
|
|
newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
|
|
|
|
if (newalloc > oldalloc) {
|
|
Status = insert_sparse_extent(fcb->Vcb, fcb->subvol, fcb->inode, oldalloc, newalloc - oldalloc, rollback);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("insert_sparse_extent returned %08x\n", Status);
|
|
free_traverse_ptr(&tp);
|
|
return Status;
|
|
}
|
|
}
|
|
|
|
fcb->inode_item.st_size = end;
|
|
TRACE("setting st_size to %llx\n", end);
|
|
|
|
TRACE("newalloc = %llx\n", newalloc);
|
|
|
|
fcb->Header.AllocationSize.QuadPart = newalloc;
|
|
fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
|
|
}
|
|
} else {
|
|
if (end > fcb->Vcb->max_inline) {
|
|
newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
|
|
|
|
Status = insert_sparse_extent(fcb->Vcb, fcb->subvol, fcb->inode, 0, newalloc, rollback);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("insert_sparse_extent returned %08x\n", Status);
|
|
free_traverse_ptr(&tp);
|
|
return Status;
|
|
}
|
|
|
|
fcb->inode_item.st_size = end;
|
|
TRACE("setting st_size to %llx\n", end);
|
|
|
|
TRACE("newalloc = %llx\n", newalloc);
|
|
|
|
fcb->Header.AllocationSize.QuadPart = newalloc;
|
|
fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
|
|
} else {
|
|
EXTENT_DATA* ed;
|
|
ULONG edsize;
|
|
|
|
edsize = sizeof(EXTENT_DATA) - 1 + end;
|
|
ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
|
|
|
|
if (!ed) {
|
|
ERR("out of memory\n");
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
ed->generation = fcb->Vcb->superblock.generation;
|
|
ed->decoded_size = end;
|
|
ed->compression = BTRFS_COMPRESSION_NONE;
|
|
ed->encryption = BTRFS_ENCRYPTION_NONE;
|
|
ed->encoding = BTRFS_ENCODING_NONE;
|
|
ed->type = EXTENT_TYPE_INLINE;
|
|
|
|
RtlZeroMemory(ed->data, end);
|
|
|
|
if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, ed, edsize, NULL, rollback)) {
|
|
ERR("error - failed to insert item\n");
|
|
ExFreePool(ed);
|
|
free_traverse_ptr(&tp);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
fcb->inode_item.st_size = end;
|
|
TRACE("setting st_size to %llx\n", end);
|
|
|
|
fcb->inode_item.st_blocks = end;
|
|
|
|
fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
|
|
}
|
|
}
|
|
|
|
free_traverse_ptr(&tp);
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
static UINT64 get_extent_item_refcount(device_extension* Vcb, UINT64 address) {
|
|
KEY searchkey;
|
|
traverse_ptr tp;
|
|
EXTENT_ITEM* ei;
|
|
UINT64 rc;
|
|
NTSTATUS Status;
|
|
|
|
searchkey.obj_id = address;
|
|
searchkey.obj_type = TYPE_EXTENT_ITEM;
|
|
searchkey.offset = 0xffffffffffffffff;
|
|
|
|
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return 0;
|
|
}
|
|
|
|
if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
|
|
ERR("error - could not find EXTENT_ITEM for %llx\n", address);
|
|
free_traverse_ptr(&tp);
|
|
return 0;
|
|
}
|
|
|
|
if (tp.item->size < sizeof(EXTENT_ITEM)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
|
|
free_traverse_ptr(&tp);
|
|
return 0;
|
|
}
|
|
|
|
ei = (EXTENT_ITEM*)tp.item->data;
|
|
rc = ei->refcount;
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
return rc;
|
|
}
|
|
|
|
static NTSTATUS do_nocow_write(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
|
|
KEY searchkey;
|
|
traverse_ptr tp, next_tp;
|
|
NTSTATUS Status;
|
|
EXTENT_DATA* ed;
|
|
BOOL b, do_cow;
|
|
EXTENT_DATA2* eds;
|
|
UINT64 size, new_start, new_end, last_write = 0;
|
|
|
|
TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data, changed_sector_list);
|
|
|
|
searchkey.obj_id = fcb->inode;
|
|
searchkey.obj_type = TYPE_EXTENT_DATA;
|
|
searchkey.offset = start_data;
|
|
|
|
Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
return Status;
|
|
}
|
|
|
|
if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset > start_data) {
|
|
ERR("previous EXTENT_DATA not found (found %llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
do {
|
|
if (tp.item->size < sizeof(EXTENT_DATA)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
ed = (EXTENT_DATA*)tp.item->data;
|
|
|
|
if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
eds = (EXTENT_DATA2*)&ed->data[0];
|
|
|
|
b = find_next_item(Vcb, &tp, &next_tp, TRUE);
|
|
|
|
switch (ed->type) {
|
|
case EXTENT_TYPE_REGULAR:
|
|
{
|
|
UINT64 rc = get_extent_item_refcount(Vcb, eds->address);
|
|
|
|
if (rc == 0) {
|
|
ERR("get_extent_item_refcount failed\n");
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
do_cow = rc > 1;
|
|
break;
|
|
}
|
|
|
|
case EXTENT_TYPE_INLINE:
|
|
do_cow = TRUE;
|
|
break;
|
|
|
|
case EXTENT_TYPE_PREALLOC:
|
|
FIXME("FIXME - handle prealloc extents\n"); // FIXME
|
|
Status = STATUS_NOT_SUPPORTED;
|
|
goto end;
|
|
|
|
default:
|
|
ERR("error - unknown extent type %x\n", ed->type);
|
|
Status = STATUS_NOT_SUPPORTED;
|
|
goto end;
|
|
}
|
|
|
|
if (ed->compression != BTRFS_COMPRESSION_NONE) {
|
|
FIXME("FIXME: compression not yet supported\n");
|
|
Status = STATUS_NOT_SUPPORTED;
|
|
goto end;
|
|
}
|
|
|
|
if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
|
|
WARN("encryption not supported\n");
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
if (ed->encoding != BTRFS_ENCODING_NONE) {
|
|
WARN("other encodings not supported\n");
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
size = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : eds->num_bytes;
|
|
|
|
TRACE("extent: start = %llx, length = %llx\n", tp.item->key.offset, size);
|
|
|
|
new_start = tp.item->key.offset < start_data ? start_data : tp.item->key.offset;
|
|
new_end = tp.item->key.offset + size > start_data + length ? (start_data + length) : (tp.item->key.offset + size);
|
|
|
|
TRACE("new_start = %llx\n", new_start);
|
|
TRACE("new_end = %llx\n", new_end);
|
|
|
|
if (do_cow) {
|
|
TRACE("doing COW write\n");
|
|
|
|
Status = excise_extents(Vcb, fcb, new_start, new_start + new_end, changed_sector_list, rollback);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - excise_extents returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
|
|
Status = insert_extent(Vcb, fcb, new_start, new_end - new_start, (UINT8*)data + new_start - start_data, changed_sector_list, rollback);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - insert_extent returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
} else {
|
|
UINT64 writeaddr;
|
|
|
|
writeaddr = eds->address + eds->offset + new_start - tp.item->key.offset;
|
|
TRACE("doing non-COW write to %llx\n", writeaddr);
|
|
|
|
Status = write_data(Vcb, writeaddr, (UINT8*)data + new_start - start_data, new_end - new_start);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - write_data returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
|
|
if (changed_sector_list) {
|
|
unsigned int i;
|
|
changed_sector* sc;
|
|
|
|
sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
|
|
if (!sc) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto end;
|
|
}
|
|
|
|
sc->ol.key = writeaddr;
|
|
sc->length = (new_end - new_start) / Vcb->superblock.sector_size;
|
|
sc->deleted = FALSE;
|
|
|
|
sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
|
|
if (!sc->checksums) {
|
|
ERR("out of memory\n");
|
|
ExFreePool(sc);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto end;
|
|
}
|
|
|
|
for (i = 0; i < sc->length; i++) {
|
|
sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + new_start - start_data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
|
|
}
|
|
|
|
insert_into_ordered_list(changed_sector_list, &sc->ol);
|
|
}
|
|
}
|
|
|
|
last_write = new_end;
|
|
|
|
if (b) {
|
|
free_traverse_ptr(&tp);
|
|
tp = next_tp;
|
|
|
|
if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset >= start_data + length)
|
|
b = FALSE;
|
|
}
|
|
} while (b);
|
|
|
|
if (last_write < start_data + length) {
|
|
new_start = last_write;
|
|
new_end = start_data + length;
|
|
|
|
TRACE("new_start = %llx\n", new_start);
|
|
TRACE("new_end = %llx\n", new_end);
|
|
|
|
Status = insert_extent(Vcb, fcb, new_start, new_end - new_start, (UINT8*)data + new_start - start_data, changed_sector_list, rollback);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - insert_extent returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
}
|
|
|
|
Status = STATUS_SUCCESS;
|
|
|
|
end:
|
|
free_traverse_ptr(&tp);
|
|
|
|
return Status;
|
|
}
|
|
|
|
#ifdef DEBUG_PARANOID
|
|
static void print_loaded_trees(tree* t, int spaces) {
|
|
char pref[10];
|
|
int i;
|
|
LIST_ENTRY* le;
|
|
|
|
for (i = 0; i < spaces; i++) {
|
|
pref[i] = ' ';
|
|
}
|
|
pref[spaces] = 0;
|
|
|
|
if (!t) {
|
|
ERR("%s(not loaded)\n", pref);
|
|
return;
|
|
}
|
|
|
|
le = t->itemlist.Flink;
|
|
while (le != &t->itemlist) {
|
|
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
|
|
|
|
ERR("%s%llx,%x,%llx ignore=%s\n", pref, td->key.obj_id, td->key.obj_type, td->key.offset, td->ignore ? "TRUE" : "FALSE");
|
|
|
|
if (t->header.level > 0) {
|
|
print_loaded_trees(td->treeholder.tree, spaces+1);
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
}
|
|
|
|
static void check_extents_consistent(device_extension* Vcb, fcb* fcb) {
|
|
KEY searchkey;
|
|
traverse_ptr tp, next_tp;
|
|
UINT64 length, oldlength, lastoff, alloc;
|
|
NTSTATUS Status;
|
|
EXTENT_DATA* ed;
|
|
EXTENT_DATA2* ed2;
|
|
|
|
if (fcb->ads || fcb->inode_item.st_size == 0 || fcb->deleted)
|
|
return;
|
|
|
|
TRACE("inode = %llx, subvol = %llx\n", fcb->inode, fcb->subvol->id);
|
|
|
|
searchkey.obj_id = fcb->inode;
|
|
searchkey.obj_type = TYPE_EXTENT_DATA;
|
|
searchkey.offset = 0;
|
|
|
|
Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
goto failure2;
|
|
}
|
|
|
|
if (keycmp(&searchkey, &tp.item->key)) {
|
|
ERR("could not find EXTENT_DATA at offset 0\n");
|
|
goto failure;
|
|
}
|
|
|
|
if (tp.item->size < sizeof(EXTENT_DATA)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
|
|
goto failure;
|
|
}
|
|
|
|
ed = (EXTENT_DATA*)tp.item->data;
|
|
ed2 = (EXTENT_DATA2*)&ed->data[0];
|
|
|
|
length = oldlength = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
|
|
lastoff = tp.item->key.offset;
|
|
|
|
TRACE("(%llx,%x,%llx) length = %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, length);
|
|
|
|
alloc = 0;
|
|
if (ed->type != EXTENT_TYPE_REGULAR || ed2->address != 0) {
|
|
alloc += length;
|
|
}
|
|
|
|
while (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
|
|
if (next_tp.item->key.obj_id != searchkey.obj_id || next_tp.item->key.obj_type != searchkey.obj_type) {
|
|
free_traverse_ptr(&next_tp);
|
|
break;
|
|
}
|
|
|
|
free_traverse_ptr(&tp);
|
|
tp = next_tp;
|
|
|
|
if (tp.item->size < sizeof(EXTENT_DATA)) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
|
|
goto failure;
|
|
}
|
|
|
|
ed = (EXTENT_DATA*)tp.item->data;
|
|
ed2 = (EXTENT_DATA2*)&ed->data[0];
|
|
|
|
length = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
|
|
|
|
TRACE("(%llx,%x,%llx) length = %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, length);
|
|
|
|
if (tp.item->key.offset != lastoff + oldlength) {
|
|
ERR("EXTENT_DATA in %llx,%llx was at %llx, expected %llx\n", fcb->subvol->id, fcb->inode, tp.item->key.offset, lastoff + oldlength);
|
|
goto failure;
|
|
}
|
|
|
|
if (ed->type != EXTENT_TYPE_REGULAR || ed2->address != 0) {
|
|
alloc += length;
|
|
}
|
|
|
|
oldlength = length;
|
|
lastoff = tp.item->key.offset;
|
|
}
|
|
|
|
if (alloc != fcb->inode_item.st_blocks) {
|
|
ERR("allocation size was %llx, expected %llx\n", alloc, fcb->inode_item.st_blocks);
|
|
goto failure;
|
|
}
|
|
|
|
// if (fcb->inode_item.st_blocks != lastoff + oldlength) {
|
|
// ERR("extents finished at %x, expected %x\n", (UINT32)(lastoff + oldlength), (UINT32)fcb->inode_item.st_blocks);
|
|
// goto failure;
|
|
// }
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
return;
|
|
|
|
failure:
|
|
free_traverse_ptr(&tp);
|
|
|
|
failure2:
|
|
if (fcb->subvol->treeholder.tree)
|
|
print_loaded_trees(fcb->subvol->treeholder.tree, 0);
|
|
|
|
int3;
|
|
}
|
|
|
|
// static void check_extent_tree_consistent(device_extension* Vcb) {
|
|
// KEY searchkey;
|
|
// traverse_ptr tp, next_tp;
|
|
// UINT64 lastaddr;
|
|
// BOOL b, inconsistency;
|
|
//
|
|
// searchkey.obj_id = 0;
|
|
// searchkey.obj_type = 0;
|
|
// searchkey.offset = 0;
|
|
//
|
|
// if (!find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE)) {
|
|
// ERR("error - could not find any entries in extent_root\n");
|
|
// int3;
|
|
// }
|
|
//
|
|
// lastaddr = 0;
|
|
// inconsistency = FALSE;
|
|
//
|
|
// do {
|
|
// if (tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
|
|
// // ERR("%x,%x,%x\n", (UINT32)tp.item->key.obj_id, tp.item->key.obj_type, (UINT32)tp.item->key.offset);
|
|
//
|
|
// if (tp.item->key.obj_id < lastaddr) {
|
|
// // ERR("inconsistency!\n");
|
|
// // int3;
|
|
// inconsistency = TRUE;
|
|
// }
|
|
//
|
|
// lastaddr = tp.item->key.obj_id + tp.item->key.offset;
|
|
// }
|
|
//
|
|
// b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
|
|
// if (b) {
|
|
// free_traverse_ptr(&tp);
|
|
// tp = next_tp;
|
|
// }
|
|
// } while (b);
|
|
//
|
|
// free_traverse_ptr(&tp);
|
|
//
|
|
// if (!inconsistency)
|
|
// return;
|
|
//
|
|
// ERR("Inconsistency detected:\n");
|
|
//
|
|
// if (!find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE)) {
|
|
// ERR("error - could not find any entries in extent_root\n");
|
|
// int3;
|
|
// }
|
|
//
|
|
// do {
|
|
// if (tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
|
|
// ERR("%x,%x,%x\n", (UINT32)tp.item->key.obj_id, tp.item->key.obj_type, (UINT32)tp.item->key.offset);
|
|
//
|
|
// if (tp.item->key.obj_id < lastaddr) {
|
|
// ERR("inconsistency!\n");
|
|
// }
|
|
//
|
|
// lastaddr = tp.item->key.obj_id + tp.item->key.offset;
|
|
// }
|
|
//
|
|
// b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
|
|
// if (b) {
|
|
// free_traverse_ptr(&tp);
|
|
// tp = next_tp;
|
|
// }
|
|
// } while (b);
|
|
//
|
|
// free_traverse_ptr(&tp);
|
|
//
|
|
// int3;
|
|
// }
|
|
#endif
|
|
|
|
NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, BOOL paging_io, BOOL no_cache, LIST_ENTRY* rollback) {
|
|
PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
|
|
PFILE_OBJECT FileObject = IrpSp->FileObject;
|
|
KEY searchkey;
|
|
traverse_ptr tp;
|
|
EXTENT_DATA* ed2;
|
|
UINT64 newlength, start_data, end_data;
|
|
UINT32 bufhead;
|
|
BOOL make_inline;
|
|
UINT8* data;
|
|
LIST_ENTRY changed_sector_list;
|
|
INODE_ITEM *ii, *origii;
|
|
BOOL changed_length = FALSE, nocsum, nocow/*, lazy_writer = FALSE, write_eof = FALSE*/;
|
|
NTSTATUS Status;
|
|
LARGE_INTEGER time;
|
|
BTRFS_TIME now;
|
|
fcb* fcb;
|
|
BOOL paging_lock = FALSE;
|
|
|
|
TRACE("(%p, %p, %llx, %p, %x, %u, %u)\n", Vcb, FileObject, offset.QuadPart, buf, *length, paging_io, no_cache);
|
|
|
|
if (*length == 0) {
|
|
WARN("returning success for zero-length write\n");
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
if (!FileObject) {
|
|
ERR("error - FileObject was NULL\n");
|
|
return STATUS_ACCESS_DENIED;
|
|
}
|
|
|
|
fcb = FileObject->FsContext;
|
|
|
|
if (fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) {
|
|
WARN("tried to write to something other than a file or symlink (inode %llx, type %u, %p, %p)\n", fcb->inode, fcb->type, &fcb->type, fcb);
|
|
return STATUS_ACCESS_DENIED;
|
|
}
|
|
|
|
if (offset.LowPart == FILE_WRITE_TO_END_OF_FILE && offset.HighPart == -1) {
|
|
offset = fcb->Header.FileSize;
|
|
// write_eof = TRUE;
|
|
}
|
|
|
|
TRACE("fcb->Header.Flags = %x\n", fcb->Header.Flags);
|
|
|
|
if (no_cache && !paging_io && FileObject->SectionObjectPointer->DataSectionObject) {
|
|
IO_STATUS_BLOCK iosb;
|
|
|
|
ExAcquireResourceExclusiveLite(fcb->Header.PagingIoResource, TRUE);
|
|
|
|
CcFlushCache(FileObject->SectionObjectPointer, &offset, *length, &iosb);
|
|
|
|
if (!NT_SUCCESS(iosb.Status)) {
|
|
ExReleaseResourceLite(fcb->Header.PagingIoResource);
|
|
ERR("CcFlushCache returned %08x\n", iosb.Status);
|
|
return iosb.Status;
|
|
}
|
|
|
|
paging_lock = TRUE;
|
|
|
|
CcPurgeCacheSection(FileObject->SectionObjectPointer, &offset, *length, FALSE);
|
|
}
|
|
|
|
if (paging_io) {
|
|
ExAcquireResourceSharedLite(fcb->Header.PagingIoResource, TRUE);
|
|
paging_lock = TRUE;
|
|
}
|
|
|
|
nocsum = fcb->ads ? TRUE : fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
|
|
nocow = fcb->ads ? TRUE : fcb->inode_item.flags & BTRFS_INODE_NODATACOW;
|
|
|
|
newlength = fcb->ads ? fcb->adssize : fcb->inode_item.st_size;
|
|
|
|
if (fcb->deleted)
|
|
newlength = 0;
|
|
|
|
TRACE("newlength = %llx\n", newlength);
|
|
|
|
// if (KeGetCurrentThread() == fcb->lazy_writer_thread) {
|
|
// ERR("lazy writer on the TV\n");
|
|
// lazy_writer = TRUE;
|
|
// }
|
|
|
|
if (offset.QuadPart + *length > newlength) {
|
|
if (paging_io) {
|
|
if (offset.QuadPart >= newlength) {
|
|
TRACE("paging IO tried to write beyond end of file (file size = %llx, offset = %llx, length = %x)\n", newlength, offset.QuadPart, *length);
|
|
TRACE("filename %.*S\n", fcb->full_filename.Length / sizeof(WCHAR), fcb->full_filename.Buffer);
|
|
TRACE("FileObject: AllocationSize = %llx, FileSize = %llx, ValidDataLength = %llx\n",
|
|
fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
|
|
Status = STATUS_SUCCESS;
|
|
goto end;
|
|
}
|
|
|
|
*length = newlength - offset.QuadPart;
|
|
} else {
|
|
newlength = offset.QuadPart + *length;
|
|
changed_length = TRUE;
|
|
|
|
TRACE("extending length to %llx\n", newlength);
|
|
}
|
|
}
|
|
|
|
make_inline = fcb->ads ? FALSE : newlength <= fcb->Vcb->max_inline;
|
|
|
|
if (changed_length) {
|
|
if (newlength > fcb->Header.AllocationSize.QuadPart) {
|
|
Status = extend_file(fcb, newlength, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("extend_file returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
} else if (fcb->ads)
|
|
fcb->adssize = newlength;
|
|
else
|
|
fcb->inode_item.st_size = newlength;
|
|
|
|
fcb->Header.FileSize.QuadPart = newlength;
|
|
fcb->Header.ValidDataLength.QuadPart = newlength;
|
|
|
|
TRACE("AllocationSize = %llx\n", fcb->Header.AllocationSize.QuadPart);
|
|
TRACE("FileSize = %llx\n", fcb->Header.FileSize.QuadPart);
|
|
TRACE("ValidDataLength = %llx\n", fcb->Header.ValidDataLength.QuadPart);
|
|
}
|
|
|
|
if (!no_cache) {
|
|
BOOL wait;
|
|
|
|
if (!FileObject->PrivateCacheMap || changed_length) {
|
|
CC_FILE_SIZES ccfs;
|
|
|
|
ccfs.AllocationSize = fcb->Header.AllocationSize;
|
|
ccfs.FileSize = fcb->Header.FileSize;
|
|
ccfs.ValidDataLength = fcb->Header.ValidDataLength;
|
|
|
|
if (!FileObject->PrivateCacheMap) {
|
|
TRACE("calling CcInitializeCacheMap...\n");
|
|
CcInitializeCacheMap(FileObject, &ccfs, FALSE, cache_callbacks, FileObject);
|
|
|
|
CcSetReadAheadGranularity(FileObject, READ_AHEAD_GRANULARITY);
|
|
} else {
|
|
CcSetFileSizes(FileObject, &ccfs);
|
|
}
|
|
}
|
|
|
|
// FIXME - uncomment this when async is working
|
|
// wait = IoIsOperationSynchronous(Irp) ? TRUE : FALSE;
|
|
wait = TRUE;
|
|
|
|
TRACE("CcCopyWrite(%p, %llx, %x, %u, %p)\n", FileObject, offset.QuadPart, *length, wait, buf);
|
|
if (!CcCopyWrite(FileObject, &offset, *length, wait, buf)) {
|
|
TRACE("CcCopyWrite failed.\n");
|
|
|
|
IoMarkIrpPending(Irp);
|
|
Status = STATUS_PENDING;
|
|
goto end;
|
|
}
|
|
TRACE("CcCopyWrite finished\n");
|
|
|
|
Status = STATUS_SUCCESS;
|
|
goto end;
|
|
}
|
|
|
|
if (fcb->ads) {
|
|
UINT16 datalen;
|
|
UINT8* data2;
|
|
UINT32 maxlen;
|
|
|
|
if (!get_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, &data, &datalen)) {
|
|
ERR("get_xattr failed\n");
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
if (changed_length) {
|
|
// find maximum length of xattr
|
|
maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
|
|
|
|
searchkey.obj_id = fcb->inode;
|
|
searchkey.obj_type = TYPE_XATTR_ITEM;
|
|
searchkey.offset = fcb->adshash;
|
|
|
|
Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
|
|
if (keycmp(&tp.item->key, &searchkey)) {
|
|
ERR("error - could not find key for xattr\n");
|
|
free_traverse_ptr(&tp);
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
if (tp.item->size < datalen) {
|
|
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, datalen);
|
|
free_traverse_ptr(&tp);
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto end;
|
|
}
|
|
|
|
maxlen -= tp.item->size - datalen; // subtract XATTR_ITEM overhead
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
if (newlength > maxlen) {
|
|
ERR("error - xattr too long (%llu > %u)\n", newlength, maxlen);
|
|
Status = STATUS_DISK_FULL;
|
|
goto end;
|
|
}
|
|
|
|
fcb->adssize = newlength;
|
|
|
|
data2 = ExAllocatePoolWithTag(PagedPool, newlength, ALLOC_TAG);
|
|
if (!data2) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto end;
|
|
}
|
|
|
|
RtlCopyMemory(data2, data, datalen);
|
|
|
|
if (offset.QuadPart > datalen)
|
|
RtlZeroMemory(&data2[datalen], offset.QuadPart - datalen);
|
|
} else
|
|
data2 = data;
|
|
|
|
if (*length > 0)
|
|
RtlCopyMemory(&data2[offset.QuadPart], buf, *length);
|
|
|
|
Status = set_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, data2, newlength, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("set_xattr returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
|
|
if (data) ExFreePool(data);
|
|
if (data2 != data) ExFreePool(data2);
|
|
|
|
fcb->Header.ValidDataLength.QuadPart = newlength;
|
|
} else {
|
|
if (make_inline) {
|
|
start_data = 0;
|
|
end_data = sector_align(newlength, fcb->Vcb->superblock.sector_size);
|
|
bufhead = sizeof(EXTENT_DATA) - 1;
|
|
} else {
|
|
start_data = offset.QuadPart & ~(fcb->Vcb->superblock.sector_size - 1);
|
|
end_data = sector_align(offset.QuadPart + *length, fcb->Vcb->superblock.sector_size);
|
|
bufhead = 0;
|
|
}
|
|
|
|
fcb->Header.ValidDataLength.QuadPart = newlength;
|
|
TRACE("fcb %p FileSize = %llx\n", fcb, fcb->Header.FileSize.QuadPart);
|
|
|
|
data = ExAllocatePoolWithTag(PagedPool, end_data - start_data + bufhead, ALLOC_TAG);
|
|
if (!data) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto end;
|
|
}
|
|
|
|
RtlZeroMemory(data + bufhead, end_data - start_data);
|
|
|
|
TRACE("start_data = %llx\n", start_data);
|
|
TRACE("end_data = %llx\n", end_data);
|
|
|
|
if (offset.QuadPart > start_data || offset.QuadPart + *length < end_data) {
|
|
if (changed_length) {
|
|
if (fcb->inode_item.st_size > start_data)
|
|
Status = read_file(Vcb, fcb->subvol, fcb->inode, data + bufhead, start_data, fcb->inode_item.st_size - start_data, NULL);
|
|
else
|
|
Status = STATUS_SUCCESS;
|
|
} else
|
|
Status = read_file(Vcb, fcb->subvol, fcb->inode, data + bufhead, start_data, end_data - start_data, NULL);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("read_file returned %08x\n", Status);
|
|
ExFreePool(data);
|
|
goto end;
|
|
}
|
|
}
|
|
|
|
RtlCopyMemory(data + bufhead + offset.QuadPart - start_data, buf, *length);
|
|
|
|
if (!nocsum)
|
|
InitializeListHead(&changed_sector_list);
|
|
|
|
if (make_inline || !nocow) {
|
|
Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, nocsum ? NULL : &changed_sector_list, rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - excise_extents returned %08x\n", Status);
|
|
ExFreePool(data);
|
|
goto end;
|
|
}
|
|
|
|
if (!make_inline) {
|
|
Status = insert_extent(fcb->Vcb, fcb, start_data, end_data - start_data, data, nocsum ? NULL : &changed_sector_list, rollback);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - insert_extent returned %08x\n", Status);
|
|
ExFreePool(data);
|
|
goto end;
|
|
}
|
|
|
|
ExFreePool(data);
|
|
} else {
|
|
ed2 = (EXTENT_DATA*)data;
|
|
ed2->generation = fcb->Vcb->superblock.generation;
|
|
ed2->decoded_size = newlength;
|
|
ed2->compression = BTRFS_COMPRESSION_NONE;
|
|
ed2->encryption = BTRFS_ENCRYPTION_NONE;
|
|
ed2->encoding = BTRFS_ENCODING_NONE;
|
|
ed2->type = EXTENT_TYPE_INLINE;
|
|
|
|
insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, ed2, sizeof(EXTENT_DATA) - 1 + newlength, NULL, rollback);
|
|
|
|
fcb->inode_item.st_blocks += newlength;
|
|
}
|
|
} else {
|
|
Status = do_nocow_write(fcb->Vcb, fcb, start_data, end_data - start_data, data, nocsum ? NULL : &changed_sector_list, rollback);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - do_nocow_write returned %08x\n", Status);
|
|
ExFreePool(data);
|
|
goto end;
|
|
}
|
|
|
|
ExFreePool(data);
|
|
}
|
|
}
|
|
|
|
KeQuerySystemTime(&time);
|
|
win_time_to_unix(time, &now);
|
|
|
|
// ERR("no_cache = %s, FileObject->PrivateCacheMap = %p\n", no_cache ? "TRUE" : "FALSE", FileObject->PrivateCacheMap);
|
|
|
|
// if (!no_cache) {
|
|
// if (!FileObject->PrivateCacheMap) {
|
|
// CC_FILE_SIZES ccfs;
|
|
//
|
|
// ccfs.AllocationSize = fcb->Header.AllocationSize;
|
|
// ccfs.FileSize = fcb->Header.FileSize;
|
|
// ccfs.ValidDataLength = fcb->Header.ValidDataLength;
|
|
//
|
|
// TRACE("calling CcInitializeCacheMap...\n");
|
|
// CcInitializeCacheMap(FileObject, &ccfs, FALSE, cache_callbacks, fcb);
|
|
//
|
|
// changed_length = FALSE;
|
|
// }
|
|
// }
|
|
|
|
if (fcb->ads)
|
|
origii = &fcb->par->inode_item;
|
|
else
|
|
origii = &fcb->inode_item;
|
|
|
|
origii->transid = Vcb->superblock.generation;
|
|
origii->sequence++;
|
|
origii->st_ctime = now;
|
|
|
|
if (!fcb->ads) {
|
|
TRACE("setting st_size to %llx\n", newlength);
|
|
origii->st_size = newlength;
|
|
origii->st_mtime = now;
|
|
}
|
|
|
|
searchkey.obj_id = fcb->inode;
|
|
searchkey.obj_type = TYPE_INODE_ITEM;
|
|
searchkey.offset = 0;
|
|
|
|
Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("error - find_item returned %08x\n", Status);
|
|
goto end;
|
|
}
|
|
|
|
if (!keycmp(&tp.item->key, &searchkey))
|
|
delete_tree_item(Vcb, &tp, rollback);
|
|
else
|
|
WARN("couldn't find existing INODE_ITEM\n");
|
|
|
|
ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
|
|
if (!ii) {
|
|
ERR("out of memory\n");
|
|
free_traverse_ptr(&tp);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto end;
|
|
}
|
|
|
|
RtlCopyMemory(ii, origii, sizeof(INODE_ITEM));
|
|
insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, rollback);
|
|
|
|
free_traverse_ptr(&tp);
|
|
|
|
// FIXME - update inode_item of open FCBs pointing to the same inode (i.e. hardlinked files)
|
|
|
|
if (!nocsum)
|
|
update_checksum_tree(Vcb, &changed_sector_list, rollback);
|
|
|
|
if (changed_length) {
|
|
CC_FILE_SIZES ccfs;
|
|
|
|
ccfs.AllocationSize = fcb->Header.AllocationSize;
|
|
ccfs.FileSize = fcb->Header.FileSize;
|
|
ccfs.ValidDataLength = fcb->Header.ValidDataLength;
|
|
|
|
CcSetFileSizes(FileObject, &ccfs);
|
|
}
|
|
|
|
// FIXME - make sure this still called if STATUS_PENDING and async
|
|
// if (!no_cache) {
|
|
// if (!CcCopyWrite(FileObject, &offset, *length, TRUE, buf)) {
|
|
// ERR("CcCopyWrite failed.\n");
|
|
// }
|
|
// }
|
|
|
|
fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
|
|
fcb->subvol->root_item.ctime = now;
|
|
|
|
Status = STATUS_SUCCESS;
|
|
|
|
end:
|
|
if (FileObject->Flags & FO_SYNCHRONOUS_IO && !paging_io) {
|
|
TRACE("CurrentByteOffset was: %llx\n", FileObject->CurrentByteOffset.QuadPart);
|
|
FileObject->CurrentByteOffset.QuadPart = offset.QuadPart + (NT_SUCCESS(Status) ? *length : 0);
|
|
TRACE("CurrentByteOffset now: %llx\n", FileObject->CurrentByteOffset.QuadPart);
|
|
}
|
|
|
|
if (paging_lock)
|
|
ExReleaseResourceLite(fcb->Header.PagingIoResource);
|
|
|
|
return Status;
|
|
}
|
|
|
|
NTSTATUS write_file(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
|
|
PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
|
|
device_extension* Vcb = DeviceObject->DeviceExtension;
|
|
void* buf;
|
|
NTSTATUS Status;
|
|
LARGE_INTEGER offset = IrpSp->Parameters.Write.ByteOffset;
|
|
PFILE_OBJECT FileObject = IrpSp->FileObject;
|
|
fcb* fcb = FileObject ? FileObject->FsContext : NULL;
|
|
BOOL locked = FALSE;
|
|
// LARGE_INTEGER freq, time1, time2;
|
|
LIST_ENTRY rollback;
|
|
|
|
InitializeListHead(&rollback);
|
|
|
|
if (Vcb->readonly)
|
|
return STATUS_MEDIA_WRITE_PROTECTED;
|
|
|
|
if (fcb && fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY)
|
|
return STATUS_ACCESS_DENIED;
|
|
|
|
// time1 = KeQueryPerformanceCounter(&freq);
|
|
|
|
TRACE("write\n");
|
|
|
|
Irp->IoStatus.Information = 0;
|
|
|
|
switch (IrpSp->MinorFunction) {
|
|
case IRP_MN_COMPLETE:
|
|
FIXME("unsupported - IRP_MN_COMPLETE\n");
|
|
break;
|
|
|
|
case IRP_MN_COMPLETE_MDL:
|
|
FIXME("unsupported - IRP_MN_COMPLETE_MDL\n");
|
|
break;
|
|
|
|
case IRP_MN_COMPLETE_MDL_DPC:
|
|
FIXME("unsupported - IRP_MN_COMPLETE_MDL_DPC\n");
|
|
break;
|
|
|
|
case IRP_MN_COMPRESSED:
|
|
FIXME("unsupported - IRP_MN_COMPRESSED\n");
|
|
break;
|
|
|
|
case IRP_MN_DPC:
|
|
FIXME("unsupported - IRP_MN_DPC\n");
|
|
break;
|
|
|
|
case IRP_MN_MDL:
|
|
FIXME("unsupported - IRP_MN_MDL\n");
|
|
break;
|
|
|
|
case IRP_MN_MDL_DPC:
|
|
FIXME("unsupported - IRP_MN_MDL_DPC\n");
|
|
break;
|
|
|
|
case IRP_MN_NORMAL:
|
|
TRACE("IRP_MN_NORMAL\n");
|
|
break;
|
|
|
|
default:
|
|
WARN("unknown minor function %x\n", IrpSp->MinorFunction);
|
|
break;
|
|
}
|
|
|
|
TRACE("offset = %llx\n", offset.QuadPart);
|
|
TRACE("length = %x\n", IrpSp->Parameters.Write.Length);
|
|
|
|
if (!Irp->AssociatedIrp.SystemBuffer) {
|
|
buf = map_user_buffer(Irp);
|
|
|
|
if (Irp->MdlAddress && !buf) {
|
|
ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
} else
|
|
buf = Irp->AssociatedIrp.SystemBuffer;
|
|
|
|
TRACE("buf = %p\n", buf);
|
|
|
|
acquire_tree_lock(Vcb, TRUE);
|
|
locked = TRUE;
|
|
|
|
if (fcb && !(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForWriteAccess(&fcb->lock, Irp)) {
|
|
WARN("tried to write to locked region\n");
|
|
Status = STATUS_FILE_LOCK_CONFLICT;
|
|
goto exit;
|
|
}
|
|
|
|
// ERR("Irp->Flags = %x\n", Irp->Flags);
|
|
Status = write_file2(Vcb, Irp, offset, buf, &IrpSp->Parameters.Write.Length, Irp->Flags & IRP_PAGING_IO, Irp->Flags & IRP_NOCACHE, &rollback);
|
|
if (!NT_SUCCESS(Status)) {
|
|
if (Status != STATUS_PENDING)
|
|
ERR("write_file2 returned %08x\n", Status);
|
|
goto exit;
|
|
}
|
|
|
|
Status = consider_write(Vcb);
|
|
|
|
if (NT_SUCCESS(Status)) {
|
|
Irp->IoStatus.Information = IrpSp->Parameters.Write.Length;
|
|
|
|
#ifdef DEBUG_PARANOID
|
|
check_extents_consistent(Vcb, FileObject->FsContext); // TESTING
|
|
|
|
// check_extent_tree_consistent(Vcb);
|
|
#endif
|
|
}
|
|
|
|
exit:
|
|
if (locked) {
|
|
if (NT_SUCCESS(Status))
|
|
clear_rollback(&rollback);
|
|
else
|
|
do_rollback(Vcb, &rollback);
|
|
|
|
release_tree_lock(Vcb, TRUE);
|
|
}
|
|
|
|
// time2 = KeQueryPerformanceCounter(NULL);
|
|
|
|
// ERR("time = %u (freq = %u)\n", (UINT32)(time2.QuadPart - time1.QuadPart), (UINT32)freq.QuadPart);
|
|
|
|
return Status;
|
|
}
|