reactos/drivers/filesystems/btrfs/write.c
Pierre Schweitzer 321bcc056d Create the AHCI branch for Aman's work
svn path=/branches/GSoC_2016/AHCI/; revision=71203
2016-04-24 20:17:09 +00:00

6711 lines
230 KiB
C

/* Copyright (c) Mark Harmstone 2016
*
* This file is part of WinBtrfs.
*
* WinBtrfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public Licence as published by
* the Free Software Foundation, either version 3 of the Licence, or
* (at your option) any later version.
*
* WinBtrfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public Licence for more details.
*
* You should have received a copy of the GNU Lesser General Public Licence
* along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
#include "btrfs_drv.h"
#define MAX_CSUM_SIZE (4096 - sizeof(tree_header) - sizeof(leaf_node))
// BOOL did_split;
BOOL chunk_test = FALSE;
typedef struct {
KEVENT Event;
IO_STATUS_BLOCK iosb;
} write_context;
typedef struct {
EXTENT_ITEM ei;
UINT8 type;
EXTENT_DATA_REF edr;
} EXTENT_ITEM_DATA_REF;
typedef struct {
EXTENT_ITEM_TREE eit;
UINT8 type;
TREE_BLOCK_REF tbr;
} EXTENT_ITEM_TREE2;
typedef struct {
EXTENT_ITEM ei;
UINT8 type;
TREE_BLOCK_REF tbr;
} EXTENT_ITEM_SKINNY_METADATA;
typedef struct {
CHUNK_ITEM ci;
CHUNK_ITEM_STRIPE stripes[1];
} CHUNK_ITEM2;
typedef struct {
LIST_ENTRY list_entry;
UINT64 key;
} ordered_list;
typedef struct {
ordered_list ol;
ULONG length;
UINT32* checksums;
BOOL deleted;
} changed_sector;
static NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback);
static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len);
static NTSTATUS convert_shared_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback);
static NTSTATUS STDCALL write_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
write_context* context = conptr;
context->iosb = Irp->IoStatus;
KeSetEvent(&context->Event, 0, FALSE);
// return STATUS_SUCCESS;
return STATUS_MORE_PROCESSING_REQUIRED;
}
static NTSTATUS STDCALL write_data_phys(PDEVICE_OBJECT device, UINT64 address, void* data, UINT32 length) {
NTSTATUS Status;
LARGE_INTEGER offset;
PIRP Irp;
PIO_STACK_LOCATION IrpSp;
write_context* context = NULL;
TRACE("(%p, %llx, %p, %x)\n", device, address, data, length);
context = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_context), ALLOC_TAG);
if (!context) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlZeroMemory(context, sizeof(write_context));
KeInitializeEvent(&context->Event, NotificationEvent, FALSE);
offset.QuadPart = address;
// Irp = IoBuildSynchronousFsdRequest(IRP_MJ_WRITE, Vcb->device, data, length, &offset, NULL, &context->iosb);
Irp = IoAllocateIrp(device->StackSize, FALSE);
if (!Irp) {
ERR("IoAllocateIrp failed\n");
Status = STATUS_INTERNAL_ERROR;
goto exit2;
}
IrpSp = IoGetNextIrpStackLocation(Irp);
IrpSp->MajorFunction = IRP_MJ_WRITE;
if (device->Flags & DO_BUFFERED_IO) {
Irp->AssociatedIrp.SystemBuffer = data;
Irp->Flags = IRP_BUFFERED_IO;
} else if (device->Flags & DO_DIRECT_IO) {
Irp->MdlAddress = IoAllocateMdl(data, length, FALSE, FALSE, NULL);
if (!Irp->MdlAddress) {
DbgPrint("IoAllocateMdl failed\n");
goto exit;
}
MmProbeAndLockPages(Irp->MdlAddress, KernelMode, IoWriteAccess);
} else {
Irp->UserBuffer = data;
}
IrpSp->Parameters.Write.Length = length;
IrpSp->Parameters.Write.ByteOffset = offset;
Irp->UserIosb = &context->iosb;
Irp->UserEvent = &context->Event;
IoSetCompletionRoutine(Irp, write_completion, context, TRUE, TRUE, TRUE);
// FIXME - support multiple devices
Status = IoCallDriver(device, Irp);
if (Status == STATUS_PENDING) {
KeWaitForSingleObject(&context->Event, Executive, KernelMode, FALSE, NULL);
Status = context->iosb.Status;
}
if (!NT_SUCCESS(Status)) {
ERR("IoCallDriver returned %08x\n", Status);
}
if (device->Flags & DO_DIRECT_IO) {
MmUnlockPages(Irp->MdlAddress);
IoFreeMdl(Irp->MdlAddress);
}
exit:
IoFreeIrp(Irp);
exit2:
if (context)
ExFreePool(context);
return Status;
}
static NTSTATUS STDCALL write_superblock(device_extension* Vcb, device* device) {
NTSTATUS Status;
unsigned int i = 0;
UINT32 crc32;
#ifdef __REACTOS__
Status = STATUS_INTERNAL_ERROR;
#endif
// FIXME - work with RAID
// FIXME - only write one superblock if on SSD (?)
while (superblock_addrs[i] > 0 && Vcb->length >= superblock_addrs[i] + sizeof(superblock)) {
TRACE("writing superblock %u\n", i);
Vcb->superblock.sb_phys_addr = superblock_addrs[i];
RtlCopyMemory(&Vcb->superblock.dev_item, &device->devitem, sizeof(DEV_ITEM));
crc32 = calc_crc32c(0xffffffff, (UINT8*)&Vcb->superblock.uuid, (ULONG)sizeof(superblock) - sizeof(Vcb->superblock.checksum));
crc32 = ~crc32;
TRACE("crc32 is %08x\n", crc32);
RtlCopyMemory(&Vcb->superblock.checksum, &crc32, sizeof(UINT32));
Status = write_data_phys(device->devobj, superblock_addrs[i], &Vcb->superblock, sizeof(superblock));
if (!NT_SUCCESS(Status))
break;
i++;
}
return Status;
}
static BOOL find_address_in_chunk(device_extension* Vcb, chunk* c, UINT64 length, UINT64* address) {
LIST_ENTRY* le;
space *s, *bestfit = NULL;
TRACE("(%p, %llx, %llx, %p)\n", Vcb, c->offset, length, address);
le = c->space.Flink;
while (le != &c->space) {
s = CONTAINING_RECORD(le, space, list_entry);
if (s->type == SPACE_TYPE_FREE) {
if (s->size == length) {
*address = s->offset;
TRACE("returning exact fit at %llx\n", s->offset);
return TRUE;
} else if (s->size > length && (!bestfit || bestfit->size > s->size)) {
bestfit = s;
}
}
le = le->Flink;
}
if (bestfit) {
TRACE("returning best fit at %llx\n", bestfit->offset);
*address = bestfit->offset;
return TRUE;
}
return FALSE;
}
void add_to_space_list(chunk* c, UINT64 offset, UINT64 size, UINT8 type) {
LIST_ENTRY *le = c->space.Flink, *nextle, *insbef;
space *s, *s2, *s3;
#ifdef DEBUG_PARANOID
UINT64 lastaddr;
#endif
TRACE("(%p, %llx, %llx, %x)\n", c, offset, size, type);
#ifdef DEBUG_PARANOID
// TESTING
le = c->space.Flink;
while (le != &c->space) {
s = CONTAINING_RECORD(le, space, list_entry);
TRACE("%llx,%llx,%x\n", s->offset, s->size, s->type);
le = le->Flink;
}
#endif
c->space_changed = TRUE;
le = c->space.Flink;
insbef = &c->space;
while (le != &c->space) {
s = CONTAINING_RECORD(le, space, list_entry);
nextle = le->Flink;
if (s->offset >= offset + size) {
insbef = le;
break;
}
if (s->offset >= offset && s->offset + s->size <= offset + size) { // delete entirely
if (s->offset + s->size == offset + size) {
insbef = s->list_entry.Flink;
RemoveEntryList(&s->list_entry);
ExFreePool(s);
break;
}
RemoveEntryList(&s->list_entry);
ExFreePool(s);
} else if (s->offset < offset && s->offset + s->size > offset + size) { // split in two
s3 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
if (!s3) {
ERR("out of memory\n");
return;
}
s3->offset = offset + size;
s3->size = s->size - size - offset + s->offset;
s3->type = s->type;
InsertHeadList(&s->list_entry, &s3->list_entry);
insbef = &s3->list_entry;
s->size = offset - s->offset;
break;
} else if (s->offset + s->size > offset && s->offset + s->size <= offset + size) { // truncate before
s->size = offset - s->offset;
} else if (s->offset < offset + size && s->offset + s->size > offset + size) { // truncate after
s->size -= s->offset - offset + size;
s->offset = offset + size;
insbef = le;
break;
}
le = nextle;
}
s2 = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
if (!s2) {
ERR("out of memory\n");
return;
}
s2->offset = offset;
s2->size = size;
s2->type = type;
InsertTailList(insbef, &s2->list_entry);
// merge entries if same type
if (s2->list_entry.Blink != &c->space) {
s = CONTAINING_RECORD(s2->list_entry.Blink, space, list_entry);
if (s->type == type) {
s->size += s2->size;
RemoveEntryList(&s2->list_entry);
ExFreePool(s2);
s2 = s;
}
}
if (s2->list_entry.Flink != &c->space) {
s = CONTAINING_RECORD(s2->list_entry.Flink, space, list_entry);
if (s->type == type) {
s2->size += s->size;
RemoveEntryList(&s->list_entry);
ExFreePool(s);
}
}
le = c->space.Flink;
while (le != &c->space) {
s = CONTAINING_RECORD(le, space, list_entry);
TRACE("%llx,%llx,%x\n", s->offset, s->size, s->type);
le = le->Flink;
}
#ifdef DEBUG_PARANOID
// TESTING
lastaddr = c->offset;
le = c->space.Flink;
while (le != &c->space) {
s = CONTAINING_RECORD(le, space, list_entry);
if (s->offset != lastaddr) {
ERR("inconsistency detected!\n");
int3;
}
lastaddr = s->offset + s->size;
le = le->Flink;
}
if (lastaddr != c->offset + c->chunk_item->size) {
ERR("inconsistency detected - space doesn't run all the way to end of chunk\n");
int3;
}
#endif
}
chunk* get_chunk_from_address(device_extension* Vcb, UINT64 address) {
LIST_ENTRY* le2;
chunk* c;
le2 = Vcb->chunks.Flink;
while (le2 != &Vcb->chunks) {
c = CONTAINING_RECORD(le2, chunk, list_entry);
// TRACE("chunk: %llx, %llx\n", c->offset, c->chunk_item->size);
if (address >= c->offset && address < c->offset + c->chunk_item->size)
return c;
le2 = le2->Flink;
}
return NULL;
}
typedef struct {
disk_hole* dh;
device* device;
} stripe;
static void add_provisional_disk_hole(device_extension* Vcb, stripe* s, UINT64 max_stripe_size) {
// LIST_ENTRY* le = s->device->disk_holes.Flink;
// disk_hole* dh;
// ERR("old holes:\n");
// while (le != &s->device->disk_holes) {
// dh = CONTAINING_RECORD(le, disk_hole, listentry);
//
// ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
//
// le = le->Flink;
// }
if (s->dh->size <= max_stripe_size) {
s->dh->provisional = TRUE;
} else {
disk_hole* newdh = ExAllocatePoolWithTag(PagedPool, sizeof(disk_hole), ALLOC_TAG);
if (!newdh) {
ERR("out of memory\n");
return;
}
newdh->address = s->dh->address + max_stripe_size;
newdh->size = s->dh->size - max_stripe_size;
newdh->provisional = FALSE;
InsertTailList(&s->device->disk_holes, &newdh->listentry);
s->dh->size = max_stripe_size;
s->dh->provisional = TRUE;
}
// ERR("new holes:\n");
// le = s->device->disk_holes.Flink;
// while (le != &s->device->disk_holes) {
// dh = CONTAINING_RECORD(le, disk_hole, listentry);
//
// ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
//
// le = le->Flink;
// }
}
static UINT64 find_new_chunk_address(device_extension* Vcb, UINT64 size) {
KEY searchkey;
traverse_ptr tp, next_tp;
BOOL b;
UINT64 lastaddr;
NTSTATUS Status;
searchkey.obj_id = 0x100;
searchkey.obj_type = TYPE_CHUNK_ITEM;
searchkey.offset = 0;
Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return 0xffffffffffffffff;
}
lastaddr = 0;
do {
if (tp.item->key.obj_type == TYPE_CHUNK_ITEM) {
if (tp.item->size < sizeof(CHUNK_ITEM)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM));
} else {
CHUNK_ITEM* ci = (CHUNK_ITEM*)tp.item->data;
if (tp.item->key.offset >= lastaddr + size) {
free_traverse_ptr(&tp);
return lastaddr;
}
lastaddr = tp.item->key.offset + ci->size;
}
}
b = find_next_item(Vcb, &tp, &next_tp, FALSE);
if (b) {
free_traverse_ptr(&tp);
tp = next_tp;
if (tp.item->key.obj_id > searchkey.obj_id || tp.item->key.obj_type > searchkey.obj_type)
break;
}
} while (b);
free_traverse_ptr(&tp);
return lastaddr;
}
static BOOL increase_dev_item_used(device_extension* Vcb, device* device, UINT64 size, LIST_ENTRY* rollback) {
KEY searchkey;
traverse_ptr tp;
DEV_ITEM* di;
NTSTATUS Status;
searchkey.obj_id = 1;
searchkey.obj_type = TYPE_DEV_ITEM;
searchkey.offset = device->devitem.dev_id;
Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return FALSE;
}
if (keycmp(&tp.item->key, &searchkey)) {
ERR("error - could not find DEV_ITEM for device %llx\n", device->devitem.dev_id);
free_traverse_ptr(&tp);
return FALSE;
}
delete_tree_item(Vcb, &tp, rollback);
free_traverse_ptr(&tp);
device->devitem.bytes_used += size;
di = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_ITEM), ALLOC_TAG);
if (!di) {
ERR("out of memory\n");
return FALSE;
}
RtlCopyMemory(di, &device->devitem, sizeof(DEV_ITEM));
if (!insert_tree_item(Vcb, Vcb->chunk_root, 1, TYPE_DEV_ITEM, device->devitem.dev_id, di, sizeof(DEV_ITEM), NULL, rollback)) {
ERR("insert_tree_item failed\n");
return FALSE;
}
return TRUE;
}
static void reset_disk_holes(device* device, BOOL commit) {
LIST_ENTRY* le = device->disk_holes.Flink;
disk_hole* dh;
// ERR("old holes:\n");
// while (le != &device->disk_holes) {
// dh = CONTAINING_RECORD(le, disk_hole, listentry);
//
// ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
//
// le = le->Flink;
// }
le = device->disk_holes.Flink;
while (le != &device->disk_holes) {
LIST_ENTRY* le2 = le->Flink;
dh = CONTAINING_RECORD(le, disk_hole, listentry);
if (dh->provisional) {
if (commit) {
RemoveEntryList(le);
ExFreePool(dh);
} else {
dh->provisional = FALSE;
}
}
le = le2;
}
if (!commit) {
le = device->disk_holes.Flink;
while (le != &device->disk_holes) {
LIST_ENTRY* le2 = le->Flink;
dh = CONTAINING_RECORD(le, disk_hole, listentry);
while (le2 != &device->disk_holes) {
disk_hole* dh2 = CONTAINING_RECORD(le2, disk_hole, listentry);
if (dh2->address == dh->address + dh->size) {
LIST_ENTRY* le3 = le2->Flink;
dh->size += dh2->size;
RemoveEntryList(le2);
ExFreePool(dh2);
le2 = le3;
} else
break;
}
le = le->Flink;
}
}
// ERR("new holes:\n");
// le = device->disk_holes.Flink;
// while (le != &device->disk_holes) {
// dh = CONTAINING_RECORD(le, disk_hole, listentry);
//
// ERR("address %llx, size %llx, provisional %u\n", dh->address, dh->size, dh->provisional);
//
// le = le->Flink;
// }
}
static NTSTATUS add_to_bootstrap(device_extension* Vcb, UINT64 obj_id, UINT8 obj_type, UINT64 offset, void* data, ULONG size) {
sys_chunk *sc, *sc2;
LIST_ENTRY* le;
USHORT i;
if (Vcb->superblock.n + sizeof(KEY) + size > SYS_CHUNK_ARRAY_SIZE) {
ERR("error - bootstrap is full\n");
return STATUS_INTERNAL_ERROR;
}
sc = ExAllocatePoolWithTag(PagedPool, sizeof(sys_chunk), ALLOC_TAG);
if (!sc) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
sc->key.obj_id = obj_id;
sc->key.obj_type = obj_type;
sc->key.offset = offset;
sc->size = size;
sc->data = ExAllocatePoolWithTag(PagedPool, sc->size, ALLOC_TAG);
if (!sc->data) {
ERR("out of memory\n");
ExFreePool(sc);
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(sc->data, data, sc->size);
le = Vcb->sys_chunks.Flink;
while (le != &Vcb->sys_chunks) {
sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
if (keycmp(&sc2->key, &sc->key) == 1)
break;
le = le->Flink;
}
InsertTailList(le, &sc->list_entry);
Vcb->superblock.n += sizeof(KEY) + size;
i = 0;
le = Vcb->sys_chunks.Flink;
while (le != &Vcb->sys_chunks) {
sc2 = CONTAINING_RECORD(le, sys_chunk, list_entry);
TRACE("%llx,%x,%llx\n", sc2->key.obj_id, sc2->key.obj_type, sc2->key.offset);
RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], &sc2->key, sizeof(KEY));
i += sizeof(KEY);
RtlCopyMemory(&Vcb->superblock.sys_chunk_array[i], sc2->data, sc2->size);
i += sc2->size;
le = le->Flink;
}
return STATUS_SUCCESS;
}
static chunk* alloc_chunk(device_extension* Vcb, UINT64 flags, LIST_ENTRY* rollback) {
UINT64 max_stripe_size, max_chunk_size, stripe_size;
UINT64 total_size = 0, i, j, logaddr;
int num_stripes;
disk_hole* dh;
stripe* stripes;
ULONG cisize;
CHUNK_ITEM* ci;
CHUNK_ITEM_STRIPE* cis;
chunk* c = NULL;
space* s = NULL;
BOOL success = FALSE;
BLOCK_GROUP_ITEM* bgi;
for (i = 0; i < Vcb->superblock.num_devices; i++) {
total_size += Vcb->devices[i].devitem.num_bytes;
}
TRACE("total_size = %llx\n", total_size);
if (flags & BLOCK_FLAG_DATA) {
max_stripe_size = 0x40000000; // 1 GB
max_chunk_size = 10 * max_stripe_size;
} else if (flags & BLOCK_FLAG_METADATA) {
if (total_size > 0xC80000000) // 50 GB
max_stripe_size = 0x40000000; // 1 GB
else
max_stripe_size = 0x10000000; // 256 MB
max_chunk_size = max_stripe_size;
} else if (flags & BLOCK_FLAG_SYSTEM) {
max_stripe_size = 0x2000000; // 32 MB
max_chunk_size = 2 * max_stripe_size;
}
// FIXME - make sure whole number of sectors?
max_chunk_size = min(max_chunk_size, total_size / 10); // cap at 10%
TRACE("would allocate a new chunk of %llx bytes and stripe %llx\n", max_chunk_size, max_stripe_size);
if (flags & BLOCK_FLAG_DUPLICATE) {
num_stripes = 2;
} else if (flags & BLOCK_FLAG_RAID0) {
FIXME("RAID0 not yet supported\n");
return NULL;
} else if (flags & BLOCK_FLAG_RAID1) {
FIXME("RAID1 not yet supported\n");
return NULL;
} else if (flags & BLOCK_FLAG_RAID10) {
FIXME("RAID10 not yet supported\n");
return NULL;
} else if (flags & BLOCK_FLAG_RAID5) {
FIXME("RAID5 not yet supported\n");
return NULL;
} else if (flags & BLOCK_FLAG_RAID6) {
FIXME("RAID6 not yet supported\n");
return NULL;
} else { // SINGLE
num_stripes = 1;
}
stripes = ExAllocatePoolWithTag(PagedPool, sizeof(stripe) * num_stripes, ALLOC_TAG);
if (!stripes) {
ERR("out of memory\n");
return NULL;
}
for (i = 0; i < num_stripes; i++) {
stripes[i].dh = NULL;
for (j = 0; j < Vcb->superblock.num_devices; j++) {
LIST_ENTRY* le = Vcb->devices[j].disk_holes.Flink;
while (le != &Vcb->devices[j].disk_holes) {
dh = CONTAINING_RECORD(le, disk_hole, listentry);
if (!dh->provisional) {
if (!stripes[i].dh || dh->size > stripes[i].dh->size) {
stripes[i].dh = dh;
stripes[i].device = &Vcb->devices[j];
if (stripes[i].dh->size >= max_stripe_size)
break;
}
}
le = le->Flink;
}
if (stripes[i].dh && stripes[i].dh->size >= max_stripe_size)
break;
}
if (stripes[i].dh) {
TRACE("good DH: device %llx, address %llx, size %llx\n", stripes[i].device->devitem.dev_id, stripes[i].dh->address, stripes[i].dh->size);
} else {
TRACE("good DH not found\n");
goto end;
}
add_provisional_disk_hole(Vcb, &stripes[i], max_stripe_size);
}
stripe_size = min(stripes[0].dh->size, max_stripe_size);
for (i = 1; i < num_stripes; i++) {
stripe_size = min(stripe_size, stripes[1].dh->size);
}
// FIXME - make sure stripe_size aligned properly
// FIXME - obey max_chunk_size
c = ExAllocatePoolWithTag(PagedPool, sizeof(chunk), ALLOC_TAG);
if (!c) {
ERR("out of memory\n");
goto end;
}
// add CHUNK_ITEM to tree 3
cisize = sizeof(CHUNK_ITEM) + (num_stripes * sizeof(CHUNK_ITEM_STRIPE));
ci = ExAllocatePoolWithTag(PagedPool, cisize, ALLOC_TAG);
if (!ci) {
ERR("out of memory\n");
goto end;
}
ci->size = stripe_size; // FIXME for RAID
ci->root_id = Vcb->extent_root->id;
ci->stripe_length = 0x10000; // FIXME? BTRFS_STRIPE_LEN in kernel
ci->type = flags;
ci->opt_io_alignment = ci->stripe_length;
ci->opt_io_width = ci->stripe_length;
ci->sector_size = stripes[0].device->devitem.minimal_io_size;
ci->num_stripes = num_stripes;
ci->sub_stripes = 1;
c->devices = ExAllocatePoolWithTag(PagedPool, sizeof(device*) * num_stripes, ALLOC_TAG);
if (!c->devices) {
ERR("out of memory\n");
ExFreePool(ci);
goto end;
}
for (i = 0; i < num_stripes; i++) {
if (i == 0)
cis = (CHUNK_ITEM_STRIPE*)&ci[1];
else
cis = &cis[1];
cis->dev_id = stripes[i].device->devitem.dev_id;
cis->offset = stripes[i].dh->address;
cis->dev_uuid = stripes[i].device->devitem.device_uuid;
c->devices[i] = stripes[i].device;
}
logaddr = find_new_chunk_address(Vcb, ci->size);
if (logaddr == 0xffffffffffffffff) {
ERR("find_new_chunk_address failed\n");
ExFreePool(ci);
goto end;
}
if (!insert_tree_item(Vcb, Vcb->chunk_root, 0x100, TYPE_CHUNK_ITEM, logaddr, ci, cisize, NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(ci);
goto end;
}
if (flags & BLOCK_FLAG_SYSTEM) {
NTSTATUS Status = add_to_bootstrap(Vcb, 0x100, TYPE_CHUNK_ITEM, logaddr, ci, cisize);
if (!NT_SUCCESS(Status)) {
ERR("add_to_bootstrap returned %08x\n", Status);
goto end;
}
}
Vcb->superblock.chunk_root_generation = Vcb->superblock.generation;
c->chunk_item = ExAllocatePoolWithTag(PagedPool, cisize, ALLOC_TAG);
if (!c->chunk_item) {
ERR("out of memory\n");
goto end;
}
RtlCopyMemory(c->chunk_item, ci, cisize);
c->size = cisize;
c->offset = logaddr;
c->used = c->oldused = 0;
c->space_changed = FALSE;
InitializeListHead(&c->space);
s = ExAllocatePoolWithTag(PagedPool, sizeof(space), ALLOC_TAG);
if (!s) {
ERR("out of memory\n");
goto end;
}
s->offset = c->offset;
s->size = c->chunk_item->size;
s->type = SPACE_TYPE_FREE;
InsertTailList(&c->space, &s->list_entry);
protect_superblocks(Vcb, c);
// add BLOCK_GROUP_ITEM to tree 2
bgi = ExAllocatePoolWithTag(PagedPool, sizeof(BLOCK_GROUP_ITEM), ALLOC_TAG);
if (!bgi) {
ERR("out of memory\n");
goto end;
}
bgi->used = 0;
bgi->chunk_tree = 0x100;
bgi->flags = flags;
if (!insert_tree_item(Vcb, Vcb->extent_root, logaddr, TYPE_BLOCK_GROUP_ITEM, ci->size, bgi, sizeof(BLOCK_GROUP_ITEM), NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(bgi);
goto end;
}
// add DEV_EXTENTs to tree 4
for (i = 0; i < num_stripes; i++) {
DEV_EXTENT* de;
de = ExAllocatePoolWithTag(PagedPool, sizeof(DEV_EXTENT), ALLOC_TAG);
if (!de) {
ERR("out of memory\n");
goto end;
}
de->chunktree = Vcb->chunk_root->id;
de->objid = 0x100;
de->address = logaddr;
de->length = ci->size;
de->chunktree_uuid = Vcb->chunk_root->treeholder.tree->header.chunk_tree_uuid;
if (!insert_tree_item(Vcb, Vcb->dev_root, stripes[i].device->devitem.dev_id, TYPE_DEV_EXTENT, stripes[i].dh->address, de, sizeof(DEV_EXTENT), NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(de);
goto end;
}
if (!increase_dev_item_used(Vcb, stripes[i].device, ci->size, rollback)) {
ERR("increase_dev_item_used failed\n");
goto end;
}
}
for (i = 0; i < num_stripes; i++) {
BOOL b = FALSE;
for (j = 0; j < i; j++) {
if (stripes[j].device == stripes[i].device)
b = TRUE;
}
if (!b)
reset_disk_holes(stripes[i].device, TRUE);
}
success = TRUE;
end:
ExFreePool(stripes);
if (!success) {
for (i = 0; i < num_stripes; i++) {
BOOL b = FALSE;
for (j = 0; j < i; j++) {
if (stripes[j].device == stripes[i].device)
b = TRUE;
}
if (!b)
reset_disk_holes(stripes[i].device, FALSE);
}
if (c) ExFreePool(c);
if (s) ExFreePool(s);
} else
InsertTailList(&Vcb->chunks, &c->list_entry);
return success ? c : NULL;
}
static void decrease_chunk_usage(chunk* c, UINT64 delta) {
c->used -= delta;
TRACE("decreasing size of chunk %llx by %llx\n", c->offset, delta);
}
static void increase_chunk_usage(chunk* c, UINT64 delta) {
c->used += delta;
TRACE("increasing size of chunk %llx by %llx\n", c->offset, delta);
}
static NTSTATUS STDCALL write_data(device_extension* Vcb, UINT64 address, void* data, UINT32 length) {
KEY searchkey;
traverse_ptr tp;
CHUNK_ITEM2* ci;
NTSTATUS Status;
UINT32 i;
TRACE("(%p, %llx, %p, %x)\n", Vcb, address, data, length);
// FIXME - use version cached in Vcb
searchkey.obj_id = 0x100; // fixed?
searchkey.obj_type = TYPE_CHUNK_ITEM;
searchkey.offset = address;
Status = find_item(Vcb, Vcb->chunk_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
ERR("error - unexpected item in chunk tree\n");
Status = STATUS_INTERNAL_ERROR;
goto end;
}
if (tp.item->size < sizeof(CHUNK_ITEM2)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(CHUNK_ITEM2));
Status = STATUS_INTERNAL_ERROR;
goto end;
}
ci = (CHUNK_ITEM2*)tp.item->data;
if (tp.item->key.offset > address || tp.item->key.offset + ci->ci.size < address) {
ERR("error - address %llx was out of chunk bounds\n", address);
Status = STATUS_INTERNAL_ERROR;
goto end;
}
// FIXME - only do this for chunks marked DUPLICATE?
// FIXME - for multiple writes, if PENDING do waits at the end
// FIXME - work with RAID
for (i = 0; i < ci->ci.num_stripes; i++) {
Status = write_data_phys(Vcb->devices[0].devobj, address - tp.item->key.offset + ci->stripes[i].offset, data, length);
if (!NT_SUCCESS(Status)) {
ERR("error - write_data_phys failed\n");
goto end;
}
}
end:
free_traverse_ptr(&tp);
return Status;
}
static void clean_space_cache_chunk(device_extension* Vcb, chunk* c) {
LIST_ENTRY *le, *nextle;
space *s, *s2;
// // TESTING
// le = c->space.Flink;
// while (le != &c->space) {
// s = CONTAINING_RECORD(le, space, list_entry);
//
// TRACE("%x,%x,%x\n", (UINT32)s->offset, (UINT32)s->size, s->type);
//
// le = le->Flink;
// }
le = c->space.Flink;
while (le != &c->space) {
s = CONTAINING_RECORD(le, space, list_entry);
nextle = le->Flink;
if (s->type == SPACE_TYPE_DELETING)
s->type = SPACE_TYPE_FREE;
else if (s->type == SPACE_TYPE_WRITING)
s->type = SPACE_TYPE_USED;
if (le->Blink != &c->space) {
s2 = CONTAINING_RECORD(le->Blink, space, list_entry);
if (s2->type == s->type) { // do merge
s2->size += s->size;
RemoveEntryList(&s->list_entry);
ExFreePool(s);
}
}
le = nextle;
}
// le = c->space.Flink;
// while (le != &c->space) {
// s = CONTAINING_RECORD(le, space, list_entry);
//
// TRACE("%x,%x,%x\n", (UINT32)s->offset, (UINT32)s->size, s->type);
//
// le = le->Flink;
// }
}
static void clean_space_cache(device_extension* Vcb) {
LIST_ENTRY* le;
chunk* c;
TRACE("(%p)\n", Vcb);
le = Vcb->chunks.Flink;
while (le != &Vcb->chunks) {
c = CONTAINING_RECORD(le, chunk, list_entry);
if (c->space_changed) {
clean_space_cache_chunk(Vcb, c);
c->space_changed = FALSE;
}
le = le->Flink;
}
}
static BOOL trees_consistent(device_extension* Vcb) {
ULONG maxsize = Vcb->superblock.node_size - sizeof(tree_header);
LIST_ENTRY* le;
le = Vcb->tree_cache.Flink;
while (le != &Vcb->tree_cache) {
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
if (tc2->write) {
if (tc2->tree->header.num_items == 0 && tc2->tree->parent)
return FALSE;
if (tc2->tree->size > maxsize)
return FALSE;
if (!tc2->tree->has_new_address)
return FALSE;
}
le = le->Flink;
}
return TRUE;
}
static NTSTATUS add_parents(device_extension* Vcb, LIST_ENTRY* rollback) {
LIST_ENTRY* le;
NTSTATUS Status;
le = Vcb->tree_cache.Flink;
while (le != &Vcb->tree_cache) {
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
if (tc2->write) {
if (tc2->tree->parent)
add_to_tree_cache(Vcb, tc2->tree->parent, TRUE);
else if (tc2->tree->root != Vcb->chunk_root && tc2->tree->root != Vcb->root_root) {
KEY searchkey;
traverse_ptr tp;
searchkey.obj_id = tc2->tree->root->id;
searchkey.obj_type = TYPE_ROOT_ITEM;
searchkey.offset = 0xffffffffffffffff;
Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, create new entry with new bits zeroed
ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
if (!ri) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
if (tp.item->size > 0)
RtlCopyMemory(ri, tp.item->data, tp.item->size);
RtlZeroMemory(((UINT8*)ri) + tp.item->size, sizeof(ROOT_ITEM) - tp.item->size);
delete_tree_item(Vcb, &tp, rollback);
if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, 0, ri, sizeof(ROOT_ITEM), NULL, rollback)) {
ERR("insert_tree_item failed\n");
return STATUS_INTERNAL_ERROR;
}
} else {
add_to_tree_cache(Vcb, tp.tree, TRUE);
}
free_traverse_ptr(&tp);
}
}
le = le->Flink;
}
return STATUS_SUCCESS;
}
void print_trees(LIST_ENTRY* tc) {
LIST_ENTRY *le, *le2;
le = tc->Flink;
while (le != tc) {
KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc};
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
UINT32 num_items = 0;
le2 = tc2->tree->itemlist.Flink;
while (le2 != &tc2->tree->itemlist) {
tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
if (!td->ignore) {
firstitem = td->key;
num_items++;
}
le2 = le2->Flink;
}
ERR("tree: root %llx, first key %llx,%x,%llx, level %x, num_items %x / %x\n",
tc2->tree->header.tree_id, firstitem.obj_id, firstitem.obj_type, firstitem.offset, tc2->tree->header.level, num_items, tc2->tree->header.num_items);
le = le->Flink;
}
}
static void add_parents_to_cache(device_extension* Vcb, tree* t) {
KEY searchkey;
traverse_ptr tp;
NTSTATUS Status;
while (t->parent) {
t = t->parent;
add_to_tree_cache(Vcb, t, TRUE);
}
if (t->root == Vcb->root_root || t->root == Vcb->chunk_root)
return;
searchkey.obj_id = t->root->id;
searchkey.obj_type = TYPE_ROOT_ITEM;
searchkey.offset = 0xffffffffffffffff;
Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return;
}
if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
free_traverse_ptr(&tp);
return;
}
add_to_tree_cache(Vcb, tp.tree, TRUE);
free_traverse_ptr(&tp);
}
static BOOL insert_tree_extent_skinny(device_extension* Vcb, tree* t, chunk* c, UINT64 address, LIST_ENTRY* rollback) {
EXTENT_ITEM_SKINNY_METADATA* eism;
traverse_ptr insert_tp;
eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
if (!eism) {
ERR("out of memory\n");
return FALSE;
}
eism->ei.refcount = 1;
eism->ei.generation = Vcb->superblock.generation;
eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
eism->type = TYPE_TREE_BLOCK_REF;
eism->tbr.offset = t->header.tree_id;
if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, t->header.level, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(eism);
return FALSE;
}
add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_WRITING);
// add_to_tree_cache(tc, insert_tp.tree, TRUE);
add_parents_to_cache(Vcb, insert_tp.tree);
free_traverse_ptr(&insert_tp);
t->new_address = address;
t->has_new_address = TRUE;
return TRUE;
}
static BOOL insert_tree_extent(device_extension* Vcb, tree* t, chunk* c, LIST_ENTRY* rollback) {
UINT64 address;
EXTENT_ITEM_TREE2* eit2;
traverse_ptr insert_tp;
TRACE("(%p, %p, %p, %p)\n", Vcb, t, c, rollback);
if (!find_address_in_chunk(Vcb, c, Vcb->superblock.node_size, &address))
return FALSE;
if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)
return insert_tree_extent_skinny(Vcb, t, c, address, rollback);
eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
if (!eit2) {
ERR("out of memory\n");
return FALSE;
}
eit2->eit.extent_item.refcount = 1;
eit2->eit.extent_item.generation = Vcb->superblock.generation;
eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
// eit2->eit.firstitem = wt->firstitem;
eit2->eit.level = t->header.level;
eit2->type = TYPE_TREE_BLOCK_REF;
eit2->tbr.offset = t->header.tree_id;
// #ifdef DEBUG_PARANOID
// if (wt->firstitem.obj_type == 0xcc) { // TESTING
// ERR("error - firstitem not set (wt = %p, tree = %p, address = %x)\n", wt, wt->tree, (UINT32)address);
// ERR("num_items = %u, level = %u, root = %x, delete = %u\n", wt->tree->header.num_items, wt->tree->header.level, (UINT32)wt->tree->root->id, wt->delete);
// int3;
// }
// #endif
if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(eit2);
return FALSE;
}
add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_WRITING);
// add_to_tree_cache(tc, insert_tp.tree, TRUE);
add_parents_to_cache(Vcb, insert_tp.tree);
free_traverse_ptr(&insert_tp);
t->new_address = address;
t->has_new_address = TRUE;
return TRUE;
}
static NTSTATUS get_tree_new_address(device_extension* Vcb, tree* t, LIST_ENTRY* rollback) {
chunk *origchunk = NULL, *c;
LIST_ENTRY* le;
UINT64 flags = t->flags;
if (flags == 0)
flags = (t->root->id == BTRFS_ROOT_CHUNK ? BLOCK_FLAG_SYSTEM : BLOCK_FLAG_METADATA) | BLOCK_FLAG_DUPLICATE;
// TRACE("flags = %x\n", (UINT32)wt->flags);
// if (!chunk_test) { // TESTING
// if ((c = alloc_chunk(Vcb, flags))) {
// if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
// if (insert_tree_extent(Vcb, t, c)) {
// chunk_test = TRUE;
// return STATUS_SUCCESS;
// }
// }
// }
// }
if (t->has_address) {
origchunk = get_chunk_from_address(Vcb, t->header.address);
if (insert_tree_extent(Vcb, t, origchunk, rollback))
return STATUS_SUCCESS;
}
le = Vcb->chunks.Flink;
while (le != &Vcb->chunks) {
c = CONTAINING_RECORD(le, chunk, list_entry);
// FIXME - make sure to avoid superblocks
if (c != origchunk && c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
if (insert_tree_extent(Vcb, t, c, rollback))
return STATUS_SUCCESS;
}
le = le->Flink;
}
// allocate new chunk if necessary
if ((c = alloc_chunk(Vcb, flags, rollback))) {
if ((c->chunk_item->size - c->used) >= Vcb->superblock.node_size) {
if (insert_tree_extent(Vcb, t, c, rollback))
return STATUS_SUCCESS;
}
}
ERR("couldn't find any metadata chunks with %x bytes free\n", Vcb->superblock.node_size);
return STATUS_DISK_FULL;
}
static BOOL reduce_tree_extent_skinny(device_extension* Vcb, UINT64 address, tree* t, LIST_ENTRY* rollback) {
KEY searchkey;
traverse_ptr tp;
chunk* c;
EXTENT_ITEM_SKINNY_METADATA* eism;
NTSTATUS Status;
searchkey.obj_id = address;
searchkey.obj_type = TYPE_METADATA_ITEM;
searchkey.offset = 0xffffffffffffffff;
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return FALSE;
}
if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
TRACE("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
free_traverse_ptr(&tp);
return FALSE;
}
if (tp.item->size < sizeof(EXTENT_ITEM_SKINNY_METADATA)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_SKINNY_METADATA));
free_traverse_ptr(&tp);
return FALSE;
}
delete_tree_item(Vcb, &tp, rollback);
eism = (EXTENT_ITEM_SKINNY_METADATA*)tp.item->data;
if (t->header.level == 0 && eism->ei.flags & EXTENT_ITEM_SHARED_BACKREFS && eism->type == TYPE_TREE_BLOCK_REF) {
// convert shared data extents
LIST_ENTRY* le = t->itemlist.Flink;
while (le != &t->itemlist) {
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
TRACE("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
if (!td->ignore && !td->inserted) {
if (td->key.obj_type == TYPE_EXTENT_DATA) {
EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
if (ed2->address != 0) {
TRACE("trying to convert shared data extent %llx,%llx\n", ed2->address, ed2->size);
convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
}
}
}
}
le = le->Flink;
}
t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
}
c = get_chunk_from_address(Vcb, address);
if (c) {
decrease_chunk_usage(c, Vcb->superblock.node_size);
add_to_space_list(c, address, Vcb->superblock.node_size, SPACE_TYPE_DELETING);
} else
ERR("could not find chunk for address %llx\n", address);
free_traverse_ptr(&tp);
return TRUE;
}
// TESTING
// static void check_tree_num_items(tree* t) {
// LIST_ENTRY* le2;
// UINT32 ni;
//
// le2 = t->itemlist.Flink;
// ni = 0;
// while (le2 != &t->itemlist) {
// tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
// if (!td->ignore)
// ni++;
// le2 = le2->Flink;
// }
//
// if (t->header.num_items != ni) {
// ERR("tree %p not okay: num_items was %x, expecting %x\n", t, ni, t->header.num_items);
// int3;
// } else {
// ERR("tree %p okay\n", t);
// }
// }
//
// static void check_trees_num_items(LIST_ENTRY* tc) {
// LIST_ENTRY* le = tc->Flink;
// while (le != tc) {
// tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
//
// check_tree_num_items(tc2->tree);
//
// le = le->Flink;
// }
// }
static void convert_old_tree_extent(device_extension* Vcb, tree_data* td, tree* t, LIST_ENTRY* rollback) {
KEY searchkey;
traverse_ptr tp, tp2, insert_tp;
EXTENT_REF_V0* erv0;
NTSTATUS Status;
TRACE("(%p, %p, %p)\n", Vcb, td, t);
searchkey.obj_id = td->treeholder.address;
searchkey.obj_type = TYPE_EXTENT_REF_V0;
searchkey.offset = 0xffffffffffffffff;
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return;
}
if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
TRACE("could not find EXTENT_REF_V0 for %llx\n", searchkey.obj_id);
free_traverse_ptr(&tp);
return;
}
searchkey.obj_id = td->treeholder.address;
searchkey.obj_type = TYPE_EXTENT_ITEM;
searchkey.offset = Vcb->superblock.node_size;
Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
free_traverse_ptr(&tp);
return;
}
if (keycmp(&searchkey, &tp2.item->key)) {
ERR("could not find %llx,%x,%llx\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
free_traverse_ptr(&tp2);
free_traverse_ptr(&tp);
return;
}
if (tp.item->size < sizeof(EXTENT_REF_V0)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_REF_V0));
free_traverse_ptr(&tp2);
free_traverse_ptr(&tp);
return;
}
erv0 = (EXTENT_REF_V0*)tp.item->data;
delete_tree_item(Vcb, &tp, rollback);
delete_tree_item(Vcb, &tp2, rollback);
if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
EXTENT_ITEM_SKINNY_METADATA* eism = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_SKINNY_METADATA), ALLOC_TAG);
if (!eism) {
ERR("out of memory\n");
free_traverse_ptr(&tp2);
free_traverse_ptr(&tp);
return;
}
eism->ei.refcount = 1;
eism->ei.generation = erv0->gen;
eism->ei.flags = EXTENT_ITEM_TREE_BLOCK;
eism->type = TYPE_TREE_BLOCK_REF;
eism->tbr.offset = t->header.tree_id;
if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_METADATA_ITEM, t->header.level -1, eism, sizeof(EXTENT_ITEM_SKINNY_METADATA), &insert_tp, rollback)) {
ERR("insert_tree_item failed\n");
free_traverse_ptr(&tp2);
free_traverse_ptr(&tp);
return;
}
} else {
EXTENT_ITEM_TREE2* eit2 = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_TREE2), ALLOC_TAG);
if (!eit2) {
ERR("out of memory\n");
free_traverse_ptr(&tp2);
free_traverse_ptr(&tp);
return;
}
eit2->eit.extent_item.refcount = 1;
eit2->eit.extent_item.generation = erv0->gen;
eit2->eit.extent_item.flags = EXTENT_ITEM_TREE_BLOCK;
eit2->eit.firstitem = td->key;
eit2->eit.level = t->header.level - 1;
eit2->type = TYPE_TREE_BLOCK_REF;
eit2->tbr.offset = t->header.tree_id;
if (!insert_tree_item(Vcb, Vcb->extent_root, td->treeholder.address, TYPE_EXTENT_ITEM, Vcb->superblock.node_size, eit2, sizeof(EXTENT_ITEM_TREE2), &insert_tp, rollback)) {
ERR("insert_tree_item failed\n");
free_traverse_ptr(&tp2);
free_traverse_ptr(&tp);
return;
}
}
// add_to_tree_cache(tc, insert_tp.tree, TRUE);
add_parents_to_cache(Vcb, insert_tp.tree);
add_parents_to_cache(Vcb, tp.tree);
add_parents_to_cache(Vcb, tp2.tree);
free_traverse_ptr(&insert_tp);
free_traverse_ptr(&tp2);
free_traverse_ptr(&tp);
}
static NTSTATUS reduce_tree_extent(device_extension* Vcb, UINT64 address, tree* t, LIST_ENTRY* rollback) {
KEY searchkey;
traverse_ptr tp;
EXTENT_ITEM* ei;
EXTENT_ITEM_V0* eiv0;
chunk* c;
NTSTATUS Status;
// FIXME - deal with refcounts > 1
TRACE("(%p, %llx, %p)\n", Vcb, address, t);
if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
if (reduce_tree_extent_skinny(Vcb, address, t, rollback)) {
return STATUS_SUCCESS;
}
}
searchkey.obj_id = address;
searchkey.obj_type = TYPE_EXTENT_ITEM;
searchkey.offset = Vcb->superblock.node_size;
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (keycmp(&tp.item->key, &searchkey)) {
ERR("could not find %llx,%x,%llx in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
int3;
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
eiv0 = (EXTENT_ITEM_V0*)tp.item->data;
if (eiv0->refcount > 1) {
FIXME("FIXME - cannot deal with refcounts larger than 1 at present (eiv0->refcount == %llx)\n", eiv0->refcount);
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
} else {
if (tp.item->size < sizeof(EXTENT_ITEM)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
ei = (EXTENT_ITEM*)tp.item->data;
if (ei->refcount > 1) {
FIXME("FIXME - cannot deal with refcounts larger than 1 at present (ei->refcount == %llx)\n", ei->refcount);
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
if (t->header.level == 0 && ei->flags & EXTENT_ITEM_SHARED_BACKREFS) {
// convert shared data extents
LIST_ENTRY* le = t->itemlist.Flink;
while (le != &t->itemlist) {
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
TRACE("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
if (!td->ignore && !td->inserted) {
if (td->key.obj_type == TYPE_EXTENT_DATA) {
EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
if (ed2->address != 0) {
TRACE("trying to convert shared data extent %llx,%llx\n", ed2->address, ed2->size);
convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
}
}
}
}
le = le->Flink;
}
t->header.flags &= ~HEADER_FLAG_SHARED_BACKREF;
}
}
delete_tree_item(Vcb, &tp, rollback);
// if EXTENT_ITEM_V0, delete corresponding B4 item
if (tp.item->size == sizeof(EXTENT_ITEM_V0)) {
traverse_ptr tp2;
searchkey.obj_id = address;
searchkey.obj_type = TYPE_EXTENT_REF_V0;
searchkey.offset = 0xffffffffffffffff;
Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
free_traverse_ptr(&tp);
return Status;
}
if (tp2.item->key.obj_id == searchkey.obj_id && tp2.item->key.obj_type == searchkey.obj_type) {
delete_tree_item(Vcb, &tp2, rollback);
}
free_traverse_ptr(&tp2);
}
if (!(t->header.flags & HEADER_FLAG_MIXED_BACKREF)) {
LIST_ENTRY* le;
// when writing old internal trees, convert related extents
le = t->itemlist.Flink;
while (le != &t->itemlist) {
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
// ERR("%llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
if (!td->ignore && !td->inserted) {
if (t->header.level > 0) {
convert_old_tree_extent(Vcb, td, t, rollback);
} else if (td->key.obj_type == TYPE_EXTENT_DATA && td->size >= sizeof(EXTENT_DATA)) {
EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && td->size >= sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
if (ed2->address != 0) {
TRACE("trying to convert old data extent %llx,%llx\n", ed2->address, ed2->size);
convert_old_data_extent(Vcb, ed2->address, ed2->size, rollback);
}
}
}
}
le = le->Flink;
}
}
c = get_chunk_from_address(Vcb, address);
if (c) {
decrease_chunk_usage(c, tp.item->key.offset);
add_to_space_list(c, address, tp.item->key.offset, SPACE_TYPE_DELETING);
} else
ERR("could not find chunk for address %llx\n", address);
free_traverse_ptr(&tp);
return STATUS_SUCCESS;
}
static NTSTATUS allocate_tree_extents(device_extension* Vcb, LIST_ENTRY* rollback) {
LIST_ENTRY* le;
NTSTATUS Status;
TRACE("(%p)\n", Vcb);
le = Vcb->tree_cache.Flink;
while (le != &Vcb->tree_cache) {
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
if (tc2->write && !tc2->tree->has_new_address) {
chunk* c;
Status = get_tree_new_address(Vcb, tc2->tree, rollback);
if (!NT_SUCCESS(Status)) {
ERR("get_tree_new_address returned %08x\n", Status);
return Status;
}
TRACE("allocated extent %llx\n", tc2->tree->new_address);
if (tc2->tree->has_address) {
Status = reduce_tree_extent(Vcb, tc2->tree->header.address, tc2->tree, rollback);
if (!NT_SUCCESS(Status)) {
ERR("reduce_tree_extent returned %08x\n", Status);
return Status;
}
}
c = get_chunk_from_address(Vcb, tc2->tree->new_address);
if (c) {
increase_chunk_usage(c, Vcb->superblock.node_size);
} else {
ERR("could not find chunk for address %llx\n", tc2->tree->new_address);
return STATUS_INTERNAL_ERROR;
}
}
le = le->Flink;
}
return STATUS_SUCCESS;
}
static NTSTATUS update_root_root(device_extension* Vcb, LIST_ENTRY* rollback) {
LIST_ENTRY* le;
NTSTATUS Status;
TRACE("(%p)\n", Vcb);
le = Vcb->tree_cache.Flink;
while (le != &Vcb->tree_cache) {
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
if (tc2->write && !tc2->tree->parent) {
if (tc2->tree->root != Vcb->root_root && tc2->tree->root != Vcb->chunk_root) {
KEY searchkey;
traverse_ptr tp;
searchkey.obj_id = tc2->tree->root->id;
searchkey.obj_type = TYPE_ROOT_ITEM;
searchkey.offset = 0xffffffffffffffff;
Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
ERR("could not find ROOT_ITEM for tree %llx\n", searchkey.obj_id);
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
TRACE("updating the address for root %llx to %llx\n", searchkey.obj_id, tc2->tree->new_address);
tc2->tree->root->root_item.block_number = tc2->tree->new_address;
tc2->tree->root->root_item.root_level = tc2->tree->header.level;
tc2->tree->root->root_item.generation = Vcb->superblock.generation;
tc2->tree->root->root_item.generation2 = Vcb->superblock.generation;
if (tp.item->size < sizeof(ROOT_ITEM)) { // if not full length, delete and create new entry
ROOT_ITEM* ri = ExAllocatePoolWithTag(PagedPool, sizeof(ROOT_ITEM), ALLOC_TAG);
if (!ri) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(ri, &tc2->tree->root->root_item, sizeof(ROOT_ITEM));
delete_tree_item(Vcb, &tp, rollback);
if (!insert_tree_item(Vcb, Vcb->root_root, searchkey.obj_id, searchkey.obj_type, 0, ri, sizeof(ROOT_ITEM), NULL, rollback)) {
ERR("insert_tree_item failed\n");
return STATUS_INTERNAL_ERROR;
}
} else
RtlCopyMemory(tp.item->data, &tc2->tree->root->root_item, sizeof(ROOT_ITEM));
free_traverse_ptr(&tp);
}
tc2->tree->root->treeholder.address = tc2->tree->new_address;
}
le = le->Flink;
}
return STATUS_SUCCESS;
}
enum write_tree_status {
WriteTreeStatus_Pending,
WriteTreeStatus_Success,
WriteTreeStatus_Error,
WriteTreeStatus_Cancelling,
WriteTreeStatus_Cancelled
};
struct write_tree_context;
typedef struct {
struct write_tree_context* context;
UINT8* buf;
device* device;
PIRP Irp;
IO_STATUS_BLOCK iosb;
enum write_tree_status status;
LIST_ENTRY list_entry;
} write_tree_stripe;
typedef struct {
KEVENT Event;
LIST_ENTRY stripes;
} write_tree_context;
static NTSTATUS STDCALL write_tree_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
write_tree_stripe* stripe = conptr;
write_tree_context* context = (write_tree_context*)stripe->context;
LIST_ENTRY* le;
BOOL complete;
if (stripe->status == WriteTreeStatus_Cancelling) {
stripe->status = WriteTreeStatus_Cancelled;
goto end;
}
stripe->iosb = Irp->IoStatus;
if (NT_SUCCESS(Irp->IoStatus.Status)) {
stripe->status = WriteTreeStatus_Success;
} else {
le = context->stripes.Flink;
stripe->status = WriteTreeStatus_Error;
while (le != &context->stripes) {
write_tree_stripe* s2 = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
if (s2->status == WriteTreeStatus_Pending) {
s2->status = WriteTreeStatus_Cancelling;
IoCancelIrp(s2->Irp);
}
le = le->Flink;
}
}
end:
le = context->stripes.Flink;
complete = TRUE;
while (le != &context->stripes) {
write_tree_stripe* s2 = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
if (s2->status == WriteTreeStatus_Pending || s2->status == WriteTreeStatus_Cancelling) {
complete = FALSE;
break;
}
le = le->Flink;
}
if (complete)
KeSetEvent(&context->Event, 0, FALSE);
return STATUS_MORE_PROCESSING_REQUIRED;
}
static NTSTATUS write_tree(device_extension* Vcb, UINT64 addr, UINT8* data, write_tree_context* wtc) {
chunk* c;
CHUNK_ITEM_STRIPE* cis;
write_tree_stripe* stripe;
UINT64 i;
c = get_chunk_from_address(Vcb, addr);
if (!c) {
ERR("get_chunk_from_address failed\n");
return STATUS_INTERNAL_ERROR;
}
cis = (CHUNK_ITEM_STRIPE*)&c->chunk_item[1];
// FIXME - make this work with RAID
for (i = 0; i < c->chunk_item->num_stripes; i++) {
PIO_STACK_LOCATION IrpSp;
// FIXME - handle missing devices
stripe = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_tree_stripe), ALLOC_TAG);
if (!stripe) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
stripe->context = (struct write_tree_context*)wtc;
stripe->buf = data;
stripe->device = c->devices[i];
RtlZeroMemory(&stripe->iosb, sizeof(IO_STATUS_BLOCK));
stripe->status = WriteTreeStatus_Pending;
stripe->Irp = IoAllocateIrp(stripe->device->devobj->StackSize, FALSE);
if (!stripe->Irp) {
ERR("IoAllocateIrp failed\n");
return STATUS_INTERNAL_ERROR;
}
IrpSp = IoGetNextIrpStackLocation(stripe->Irp);
IrpSp->MajorFunction = IRP_MJ_WRITE;
if (stripe->device->devobj->Flags & DO_BUFFERED_IO) {
stripe->Irp->AssociatedIrp.SystemBuffer = data;
stripe->Irp->Flags = IRP_BUFFERED_IO;
} else if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
stripe->Irp->MdlAddress = IoAllocateMdl(data, Vcb->superblock.node_size, FALSE, FALSE, NULL);
if (!stripe->Irp->MdlAddress) {
ERR("IoAllocateMdl failed\n");
return STATUS_INTERNAL_ERROR;
}
MmProbeAndLockPages(stripe->Irp->MdlAddress, KernelMode, IoWriteAccess);
} else {
stripe->Irp->UserBuffer = data;
}
IrpSp->Parameters.Write.Length = Vcb->superblock.node_size;
IrpSp->Parameters.Write.ByteOffset.QuadPart = addr - c->offset + cis[i].offset;
stripe->Irp->UserIosb = &stripe->iosb;
IoSetCompletionRoutine(stripe->Irp, write_tree_completion, stripe, TRUE, TRUE, TRUE);
InsertTailList(&wtc->stripes, &stripe->list_entry);
}
return STATUS_SUCCESS;
}
static void free_stripes(write_tree_context* wtc) {
LIST_ENTRY *le, *le2, *nextle;
le = wtc->stripes.Flink;
while (le != &wtc->stripes) {
write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
if (stripe->device->devobj->Flags & DO_DIRECT_IO) {
MmUnlockPages(stripe->Irp->MdlAddress);
IoFreeMdl(stripe->Irp->MdlAddress);
}
le = le->Flink;
}
le = wtc->stripes.Flink;
while (le != &wtc->stripes) {
write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
nextle = le->Flink;
if (stripe->buf) {
ExFreePool(stripe->buf);
le2 = le->Flink;
while (le2 != &wtc->stripes) {
write_tree_stripe* s2 = CONTAINING_RECORD(le2, write_tree_stripe, list_entry);
if (s2->buf == stripe->buf)
s2->buf = NULL;
le2 = le2->Flink;
}
}
ExFreePool(stripe);
le = nextle;
}
}
static NTSTATUS write_trees(device_extension* Vcb) {
UINT8 level;
UINT8 *data, *body;
UINT32 crc32;
NTSTATUS Status;
LIST_ENTRY* le;
write_tree_context* wtc;
TRACE("(%p)\n", Vcb);
for (level = 0; level <= 255; level++) {
BOOL nothing_found = TRUE;
TRACE("level = %u\n", level);
le = Vcb->tree_cache.Flink;
while (le != &Vcb->tree_cache) {
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
if (tc2->write && tc2->tree->header.level == level) {
KEY firstitem, searchkey;
LIST_ENTRY* le2;
traverse_ptr tp;
EXTENT_ITEM_TREE* eit;
if (!tc2->tree->has_new_address) {
ERR("error - tried to write tree with no new address\n");
int3;
}
le2 = tc2->tree->itemlist.Flink;
while (le2 != &tc2->tree->itemlist) {
tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
if (!td->ignore) {
firstitem = td->key;
break;
}
le2 = le2->Flink;
}
if (tc2->tree->parent) {
tc2->tree->paritem->key = firstitem;
tc2->tree->paritem->treeholder.address = tc2->tree->new_address;
tc2->tree->paritem->treeholder.generation = Vcb->superblock.generation;
}
if (!(Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA)) {
searchkey.obj_id = tc2->tree->new_address;
searchkey.obj_type = TYPE_EXTENT_ITEM;
searchkey.offset = Vcb->superblock.node_size;
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (keycmp(&searchkey, &tp.item->key)) {
// traverse_ptr next_tp;
// BOOL b;
// tree_data* paritem;
ERR("could not find %llx,%x,%llx in extent_root (found %llx,%x,%llx instead)\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
free_traverse_ptr(&tp);
// searchkey.obj_id = 0;
// searchkey.obj_type = 0;
// searchkey.offset = 0;
//
// find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
//
// paritem = NULL;
// do {
// if (tp.tree->paritem != paritem) {
// paritem = tp.tree->paritem;
// ERR("paritem: %llx,%x,%llx\n", paritem->key.obj_id, paritem->key.obj_type, paritem->key.offset);
// }
//
// ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
//
// b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
// if (b) {
// free_traverse_ptr(&tp);
// tp = next_tp;
// }
// } while (b);
//
// free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
eit = (EXTENT_ITEM_TREE*)tp.item->data;
eit->firstitem = firstitem;
free_traverse_ptr(&tp);
}
nothing_found = FALSE;
}
le = le->Flink;
}
if (nothing_found)
break;
}
TRACE("allocated tree extents\n");
wtc = ExAllocatePoolWithTag(NonPagedPool, sizeof(write_tree_context), ALLOC_TAG);
if (!wtc) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
KeInitializeEvent(&wtc->Event, NotificationEvent, FALSE);
InitializeListHead(&wtc->stripes);
le = Vcb->tree_cache.Flink;
while (le != &Vcb->tree_cache) {
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
#ifdef DEBUG_PARANOID
UINT32 num_items = 0, size = 0;
LIST_ENTRY* le2;
BOOL crash = FALSE;
#endif
if (tc2->write) {
#ifdef DEBUG_PARANOID
le2 = tc2->tree->itemlist.Flink;
while (le2 != &tc2->tree->itemlist) {
tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
if (!td->ignore) {
num_items++;
if (tc2->tree->header.level == 0)
size += td->size;
}
le2 = le2->Flink;
}
if (tc2->tree->header.level == 0)
size += num_items * sizeof(leaf_node);
else
size += num_items * sizeof(internal_node);
if (num_items != tc2->tree->header.num_items) {
ERR("tree %llx, level %x: num_items was %x, expected %x\n", tc2->tree->root->id, tc2->tree->header.level, num_items, tc2->tree->header.num_items);
crash = TRUE;
}
if (size != tc2->tree->size) {
ERR("tree %llx, level %x: size was %x, expected %x\n", tc2->tree->root->id, tc2->tree->header.level, size, tc2->tree->size);
crash = TRUE;
}
if (tc2->tree->header.num_items == 0 && tc2->tree->parent) {
ERR("tree %llx, level %x: tried to write empty tree with parent\n", tc2->tree->root->id, tc2->tree->header.level);
crash = TRUE;
}
if (tc2->tree->size > Vcb->superblock.node_size - sizeof(tree_header)) {
ERR("tree %llx, level %x: tried to write overlarge tree (%x > %x)\n", tc2->tree->root->id, tc2->tree->header.level, tc2->tree->size, Vcb->superblock.node_size - sizeof(tree_header));
crash = TRUE;
}
if (crash) {
ERR("tree %p\n", tc2->tree);
le2 = tc2->tree->itemlist.Flink;
while (le2 != &tc2->tree->itemlist) {
tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
if (!td->ignore) {
ERR("%llx,%x,%llx inserted=%u\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->inserted);
}
le2 = le2->Flink;
}
int3;
}
#endif
tc2->tree->header.address = tc2->tree->new_address;
tc2->tree->header.generation = Vcb->superblock.generation;
tc2->tree->header.flags |= HEADER_FLAG_MIXED_BACKREF;
tc2->tree->has_address = TRUE;
data = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
if (!data) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
body = data + sizeof(tree_header);
RtlCopyMemory(data, &tc2->tree->header, sizeof(tree_header));
RtlZeroMemory(body, Vcb->superblock.node_size - sizeof(tree_header));
if (tc2->tree->header.level == 0) {
leaf_node* itemptr = (leaf_node*)body;
int i = 0;
LIST_ENTRY* le2;
UINT8* dataptr = data + Vcb->superblock.node_size;
le2 = tc2->tree->itemlist.Flink;
while (le2 != &tc2->tree->itemlist) {
tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
if (!td->ignore) {
dataptr = dataptr - td->size;
itemptr[i].key = td->key;
itemptr[i].offset = (UINT8*)dataptr - (UINT8*)body;
itemptr[i].size = td->size;
i++;
if (td->size > 0)
RtlCopyMemory(dataptr, td->data, td->size);
}
le2 = le2->Flink;
}
} else {
internal_node* itemptr = (internal_node*)body;
int i = 0;
LIST_ENTRY* le2;
le2 = tc2->tree->itemlist.Flink;
while (le2 != &tc2->tree->itemlist) {
tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
if (!td->ignore) {
itemptr[i].key = td->key;
itemptr[i].address = td->treeholder.address;
itemptr[i].generation = td->treeholder.generation;
i++;
}
le2 = le2->Flink;
}
}
crc32 = calc_crc32c(0xffffffff, (UINT8*)&((tree_header*)data)->fs_uuid, Vcb->superblock.node_size - sizeof(((tree_header*)data)->csum));
crc32 = ~crc32;
*((UINT32*)data) = crc32;
TRACE("setting crc32 to %08x\n", crc32);
Status = write_tree(Vcb, tc2->tree->new_address, data, wtc);
if (!NT_SUCCESS(Status)) {
ERR("write_tree returned %08x\n", Status);
goto end;
}
}
le = le->Flink;
}
Status = STATUS_SUCCESS;
if (wtc->stripes.Flink != &wtc->stripes) {
// launch writes and wait
le = wtc->stripes.Flink;
while (le != &wtc->stripes) {
write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
IoCallDriver(stripe->device->devobj, stripe->Irp);
le = le->Flink;
}
KeWaitForSingleObject(&wtc->Event, Executive, KernelMode, FALSE, NULL);
le = wtc->stripes.Flink;
while (le != &wtc->stripes) {
write_tree_stripe* stripe = CONTAINING_RECORD(le, write_tree_stripe, list_entry);
if (!NT_SUCCESS(stripe->iosb.Status)) {
Status = stripe->iosb.Status;
break;
}
le = le->Flink;
}
free_stripes(wtc);
}
end:
ExFreePool(wtc);
return Status;
}
static NTSTATUS write_superblocks(device_extension* Vcb) {
UINT64 i;
NTSTATUS Status;
LIST_ENTRY* le;
TRACE("(%p)\n", Vcb);
le = Vcb->tree_cache.Flink;
while (le != &Vcb->tree_cache) {
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
if (tc2->write && !tc2->tree->parent) {
if (tc2->tree->root == Vcb->root_root) {
Vcb->superblock.root_tree_addr = tc2->tree->new_address;
Vcb->superblock.root_level = tc2->tree->header.level;
} else if (tc2->tree->root == Vcb->chunk_root) {
Vcb->superblock.chunk_tree_addr = tc2->tree->new_address;
Vcb->superblock.chunk_root_generation = tc2->tree->header.generation;
Vcb->superblock.chunk_root_level = tc2->tree->header.level;
}
}
le = le->Flink;
}
for (i = 0; i < Vcb->superblock.num_devices; i++) {
if (Vcb->devices[i].devobj) {
Status = write_superblock(Vcb, &Vcb->devices[i]);
if (!NT_SUCCESS(Status)) {
ERR("write_superblock returned %08x\n", Status);
return Status;
}
}
}
return STATUS_SUCCESS;
}
static NTSTATUS update_chunk_usage(device_extension* Vcb, LIST_ENTRY* rollback) {
LIST_ENTRY* le = Vcb->chunks.Flink;
chunk* c;
KEY searchkey;
traverse_ptr tp;
BLOCK_GROUP_ITEM* bgi;
NTSTATUS Status;
TRACE("(%p)\n", Vcb);
while (le != &Vcb->chunks) {
c = CONTAINING_RECORD(le, chunk, list_entry);
if (c->used != c->oldused) {
searchkey.obj_id = c->offset;
searchkey.obj_type = TYPE_BLOCK_GROUP_ITEM;
searchkey.offset = c->chunk_item->size;
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (keycmp(&searchkey, &tp.item->key)) {
ERR("could not find (%llx,%x,%llx) in extent_root\n", searchkey.obj_id, searchkey.obj_type, searchkey.offset);
int3;
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
if (tp.item->size < sizeof(BLOCK_GROUP_ITEM)) {
ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(BLOCK_GROUP_ITEM));
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
bgi = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
if (!bgi) {
ERR("out of memory\n");
free_traverse_ptr(&tp);
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(bgi, tp.item->data, tp.item->size);
bgi->used = c->used;
TRACE("adjusting usage of chunk %llx to %llx\n", c->offset, c->used);
delete_tree_item(Vcb, &tp, rollback);
if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, bgi, tp.item->size, NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(bgi);
return STATUS_INTERNAL_ERROR;
}
TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
TRACE("chunk_item type = %llx\n", c->chunk_item->type);
if (c->chunk_item->type & BLOCK_FLAG_RAID0) {
FIXME("RAID0 not yet supported\n");
ExFreePool(bgi);
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
} else if (c->chunk_item->type & BLOCK_FLAG_RAID1) {
FIXME("RAID1 not yet supported\n");
ExFreePool(bgi);
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
} else if (c->chunk_item->type & BLOCK_FLAG_DUPLICATE) {
Vcb->superblock.bytes_used = Vcb->superblock.bytes_used + (2 * (c->used - c->oldused));
} else if (c->chunk_item->type & BLOCK_FLAG_RAID10) {
FIXME("RAID10 not yet supported\n");
ExFreePool(bgi);
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
} else if (c->chunk_item->type & BLOCK_FLAG_RAID5) {
FIXME("RAID5 not yet supported\n");
ExFreePool(bgi);
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
} else if (c->chunk_item->type & BLOCK_FLAG_RAID6) {
FIXME("RAID6 not yet supported\n");
ExFreePool(bgi);
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
} else { // SINGLE
Vcb->superblock.bytes_used = Vcb->superblock.bytes_used + c->used - c->oldused;
}
TRACE("bytes_used = %llx\n", Vcb->superblock.bytes_used);
free_traverse_ptr(&tp);
c->oldused = c->used;
}
le = le->Flink;
}
return STATUS_SUCCESS;
}
static void get_first_item(tree* t, KEY* key) {
LIST_ENTRY* le;
le = t->itemlist.Flink;
while (le != &t->itemlist) {
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
*key = td->key;
return;
}
}
static NTSTATUS STDCALL split_tree_at(device_extension* Vcb, tree* t, tree_data* newfirstitem, UINT32 numitems, UINT32 size) {
tree *nt, *pt;
tree_data* td;
tree_data* oldlastitem;
// write_tree* wt2;
// // tree_data *firsttd, *lasttd;
// // LIST_ENTRY* le;
// #ifdef DEBUG_PARANOID
// KEY lastkey1, lastkey2;
// traverse_ptr tp, next_tp;
// ULONG numitems1, numitems2;
// #endif
TRACE("splitting tree in %llx at (%llx,%x,%llx)\n", t->root->id, newfirstitem->key.obj_id, newfirstitem->key.obj_type, newfirstitem->key.offset);
// #ifdef DEBUG_PARANOID
// lastkey1.obj_id = 0xffffffffffffffff;
// lastkey1.obj_type = 0xff;
// lastkey1.offset = 0xffffffffffffffff;
//
// if (!find_item(Vcb, t->root, &tp, &lastkey1, NULL, FALSE))
// ERR("error - find_item failed\n");
// else {
// lastkey1 = tp.item->key;
// numitems1 = 0;
// while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
// free_traverse_ptr(&tp);
// tp = next_tp;
// numitems1++;
// }
// free_traverse_ptr(&tp);
// }
// #endif
nt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
if (!nt) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(&nt->header, &t->header, sizeof(tree_header));
nt->header.address = 0;
nt->header.generation = Vcb->superblock.generation;
nt->header.num_items = t->header.num_items - numitems;
nt->header.flags = HEADER_FLAG_MIXED_BACKREF;
nt->refcount = 0;
nt->has_address = FALSE;
nt->Vcb = Vcb;
nt->parent = t->parent;
nt->root = t->root;
// nt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
nt->new_address = 0;
nt->has_new_address = FALSE;
nt->flags = t->flags;
InitializeListHead(&nt->itemlist);
// ExInitializeResourceLite(&nt->nonpaged->load_tree_lock);
oldlastitem = CONTAINING_RECORD(newfirstitem->list_entry.Blink, tree_data, list_entry);
// // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
// // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
// //
// // TRACE("old tree in %x was from (%x,%x,%x) to (%x,%x,%x)\n",
// // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
// // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
// //
// // le = wt->tree->itemlist.Flink;
// // while (le != &wt->tree->itemlist) {
// // td = CONTAINING_RECORD(le, tree_data, list_entry);
// // TRACE("old tree item was (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
// // le = le->Flink;
// // }
nt->itemlist.Flink = &newfirstitem->list_entry;
nt->itemlist.Blink = t->itemlist.Blink;
nt->itemlist.Flink->Blink = &nt->itemlist;
nt->itemlist.Blink->Flink = &nt->itemlist;
t->itemlist.Blink = &oldlastitem->list_entry;
t->itemlist.Blink->Flink = &t->itemlist;
// // le = wt->tree->itemlist.Flink;
// // while (le != &wt->tree->itemlist) {
// // td = CONTAINING_RECORD(le, tree_data, list_entry);
// // TRACE("old tree item now (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
// // le = le->Flink;
// // }
// //
// // firsttd = CONTAINING_RECORD(wt->tree->itemlist.Flink, tree_data, list_entry);
// // lasttd = CONTAINING_RECORD(wt->tree->itemlist.Blink, tree_data, list_entry);
// //
// // TRACE("old tree in %x is now from (%x,%x,%x) to (%x,%x,%x)\n",
// // (UINT32)wt->tree->root->id, (UINT32)firsttd->key.obj_id, firsttd->key.obj_type, (UINT32)firsttd->key.offset,
// // (UINT32)lasttd->key.obj_id, lasttd->key.obj_type, (UINT32)lasttd->key.offset);
nt->size = t->size - size;
t->size = size;
t->header.num_items = numitems;
add_to_tree_cache(Vcb, nt, TRUE);
InterlockedIncrement(&Vcb->open_trees);
#ifdef DEBUG_TREE_REFCOUNTS
TRACE("created new split tree %p\n", nt);
#endif
InsertTailList(&Vcb->trees, &nt->list_entry);
// // // TESTING
// // td = wt->tree->items;
// // while (td) {
// // if (!td->ignore) {
// // TRACE("old tree item: (%x,%x,%x)\n", (UINT32)td->key.obj_id, td->key.obj_type, (UINT32)td->key.offset);
// // }
// // td = td->next;
// // }
// // oldlastitem->next = NULL;
// // wt->tree->lastitem = oldlastitem;
// // TRACE("last item is now (%x,%x,%x)\n", (UINT32)oldlastitem->key.obj_id, oldlastitem->key.obj_type, (UINT32)oldlastitem->key.offset);
if (nt->header.level > 0) {
LIST_ENTRY* le = nt->itemlist.Flink;
while (le != &nt->itemlist) {
tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
if (td2->treeholder.tree) {
td2->treeholder.tree->parent = nt;
increase_tree_rc(nt);
free_tree(t);
}
le = le->Flink;
}
}
if (nt->parent) {
increase_tree_rc(nt->parent);
td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
if (!td) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
td->key = newfirstitem->key;
InsertHeadList(&t->paritem->list_entry, &td->list_entry);
td->ignore = FALSE;
td->inserted = TRUE;
td->treeholder.tree = nt;
init_tree_holder(&td->treeholder);
// td->treeholder.nonpaged->status = tree_holder_loaded;
nt->paritem = td;
nt->parent->header.num_items++;
nt->parent->size += sizeof(internal_node);
goto end;
}
TRACE("adding new tree parent\n");
if (nt->header.level == 255) {
ERR("cannot add parent to tree at level 255\n");
return STATUS_INTERNAL_ERROR;
}
pt = ExAllocatePoolWithTag(PagedPool, sizeof(tree), ALLOC_TAG);
if (!pt) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(&pt->header, &nt->header, sizeof(tree_header));
pt->header.address = 0;
pt->header.num_items = 2;
pt->header.level = nt->header.level + 1;
pt->header.flags = HEADER_FLAG_MIXED_BACKREF;
pt->refcount = 2;
pt->has_address = FALSE;
pt->Vcb = Vcb;
pt->parent = NULL;
pt->paritem = NULL;
pt->root = t->root;
pt->new_address = 0;
pt->has_new_address = FALSE;
// pt->nonpaged = ExAllocatePoolWithTag(NonPagedPool, sizeof(tree_nonpaged), ALLOC_TAG);
pt->size = pt->header.num_items * sizeof(internal_node);
pt->flags = t->flags;
InitializeListHead(&pt->itemlist);
// ExInitializeResourceLite(&pt->nonpaged->load_tree_lock);
InterlockedIncrement(&Vcb->open_trees);
#ifdef DEBUG_TREE_REFCOUNTS
TRACE("created new parent tree %p\n", pt);
#endif
InsertTailList(&Vcb->trees, &pt->list_entry);
td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
if (!td) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
get_first_item(t, &td->key);
td->ignore = FALSE;
td->inserted = FALSE;
td->treeholder.address = 0;
td->treeholder.generation = Vcb->superblock.generation;
td->treeholder.tree = t;
init_tree_holder(&td->treeholder);
// td->treeholder.nonpaged->status = tree_holder_loaded;
InsertTailList(&pt->itemlist, &td->list_entry);
t->paritem = td;
td = ExAllocatePoolWithTag(PagedPool, sizeof(tree_data), ALLOC_TAG);
if (!td) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
td->key = newfirstitem->key;
td->ignore = FALSE;
td->inserted = FALSE;
td->treeholder.address = 0;
td->treeholder.generation = Vcb->superblock.generation;
td->treeholder.tree = nt;
init_tree_holder(&td->treeholder);
// td->treeholder.nonpaged->status = tree_holder_loaded;
InsertTailList(&pt->itemlist, &td->list_entry);
nt->paritem = td;
add_to_tree_cache(Vcb, pt, TRUE);
t->root->treeholder.tree = pt;
t->parent = pt;
nt->parent = pt;
end:
t->root->root_item.bytes_used += Vcb->superblock.node_size;
// #ifdef DEBUG_PARANOID
// lastkey2.obj_id = 0xffffffffffffffff;
// lastkey2.obj_type = 0xff;
// lastkey2.offset = 0xffffffffffffffff;
//
// if (!find_item(Vcb, wt->tree->root, &tp, &lastkey2, NULL, FALSE))
// ERR("error - find_item failed\n");
// else {
// lastkey2 = tp.item->key;
//
// numitems2 = 0;
// while (find_prev_item(Vcb, &tp, &next_tp, NULL, FALSE)) {
// free_traverse_ptr(&tp);
// tp = next_tp;
// numitems2++;
// }
// free_traverse_ptr(&tp);
// }
//
// ERR("lastkey1 = %llx,%x,%llx\n", lastkey1.obj_id, lastkey1.obj_type, lastkey1.offset);
// ERR("lastkey2 = %llx,%x,%llx\n", lastkey2.obj_id, lastkey2.obj_type, lastkey2.offset);
// ERR("numitems1 = %u\n", numitems1);
// ERR("numitems2 = %u\n", numitems2);
// #endif
return STATUS_SUCCESS;
}
static NTSTATUS STDCALL split_tree(device_extension* Vcb, tree* t) {
LIST_ENTRY* le;
UINT32 size, ds, numitems;
size = 0;
numitems = 0;
// FIXME - naïve implementation: maximizes number of filled trees
le = t->itemlist.Flink;
while (le != &t->itemlist) {
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
if (!td->ignore) {
if (t->header.level == 0)
ds = sizeof(leaf_node) + td->size;
else
ds = sizeof(internal_node);
// FIXME - move back if previous item was deleted item with same key
if (size + ds > Vcb->superblock.node_size - sizeof(tree_header))
return split_tree_at(Vcb, t, td, numitems, size);
size += ds;
numitems++;
}
le = le->Flink;
}
return STATUS_SUCCESS;
}
static NTSTATUS try_tree_amalgamate(device_extension* Vcb, tree* t, LIST_ENTRY* rollback) {
LIST_ENTRY* le;
tree_data* nextparitem = NULL;
NTSTATUS Status;
tree *next_tree, *par;
BOOL loaded;
TRACE("trying to amalgamate tree in root %llx, level %x (size %u)\n", t->root->id, t->header.level, t->size);
// FIXME - doesn't capture everything, as it doesn't ascend
// FIXME - write proper function and put it in treefuncs.c
le = t->paritem->list_entry.Flink;
while (le != &t->parent->itemlist) {
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
if (!td->ignore) {
nextparitem = td;
break;
}
le = le->Flink;
}
if (!nextparitem)
return STATUS_SUCCESS;
// FIXME - loop, and capture more than one tree if we can
TRACE("nextparitem: key = %llx,%x,%llx\n", nextparitem->key.obj_id, nextparitem->key.obj_type, nextparitem->key.offset);
// nextparitem = t->paritem;
// ExAcquireResourceExclusiveLite(&t->parent->nonpaged->load_tree_lock, TRUE);
Status = do_load_tree(Vcb, &nextparitem->treeholder, t->root, t->parent, nextparitem, &loaded);
if (!NT_SUCCESS(Status)) {
ERR("do_load_tree returned %08x\n", Status);
return Status;
}
if (loaded)
increase_tree_rc(t->parent);
// ExReleaseResourceLite(&t->parent->nonpaged->load_tree_lock);
next_tree = nextparitem->treeholder.tree;
if (t->size + next_tree->size <= Vcb->superblock.node_size - sizeof(tree_header)) {
// merge two trees into one
t->header.num_items += next_tree->header.num_items;
t->size += next_tree->size;
if (next_tree->header.level > 0) {
le = next_tree->itemlist.Flink;
while (le != &next_tree->itemlist) {
tree_data* td2 = CONTAINING_RECORD(le, tree_data, list_entry);
if (td2->treeholder.tree) {
td2->treeholder.tree->parent = t;
increase_tree_rc(t);
free_tree(next_tree);
}
le = le->Flink;
}
}
t->itemlist.Blink->Flink = next_tree->itemlist.Flink;
t->itemlist.Blink->Flink->Blink = t->itemlist.Blink;
t->itemlist.Blink = next_tree->itemlist.Blink;
t->itemlist.Blink->Flink = &t->itemlist;
// // TESTING
// le = t->itemlist.Flink;
// while (le != &t->itemlist) {
// tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
// if (!td->ignore) {
// ERR("key: %llx,%x,%llx\n", td->key.obj_id, td->key.obj_type, td->key.offset);
// }
// le = le->Flink;
// }
next_tree->itemlist.Flink = next_tree->itemlist.Blink = &next_tree->itemlist;
next_tree->header.num_items = 0;
next_tree->size = 0;
if (next_tree->has_new_address) { // delete associated EXTENT_ITEM
Status = reduce_tree_extent(Vcb, next_tree->new_address, next_tree, rollback);
if (!NT_SUCCESS(Status)) {
ERR("reduce_tree_extent returned %08x\n", Status);
free_tree(next_tree);
return Status;
}
} else if (next_tree->has_address) {
Status = reduce_tree_extent(Vcb, next_tree->header.address, next_tree, rollback);
if (!NT_SUCCESS(Status)) {
ERR("reduce_tree_extent returned %08x\n", Status);
free_tree(next_tree);
return Status;
}
}
if (!nextparitem->ignore) {
nextparitem->ignore = TRUE;
next_tree->parent->header.num_items--;
next_tree->parent->size -= sizeof(internal_node);
}
par = next_tree->parent;
while (par) {
add_to_tree_cache(Vcb, par, TRUE);
par = par->parent;
}
RemoveEntryList(&nextparitem->list_entry);
ExFreePool(next_tree->paritem);
next_tree->paritem = NULL;
next_tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
free_tree(next_tree);
// remove next_tree from tree cache
le = Vcb->tree_cache.Flink;
while (le != &Vcb->tree_cache) {
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
if (tc2->tree == next_tree) {
free_tree(next_tree);
RemoveEntryList(le);
ExFreePool(tc2);
break;
}
le = le->Flink;
}
} else {
// rebalance by moving items from second tree into first
ULONG avg_size = (t->size + next_tree->size) / 2;
KEY firstitem = {0, 0, 0};
TRACE("attempting rebalance\n");
le = next_tree->itemlist.Flink;
while (le != &next_tree->itemlist && t->size < avg_size && next_tree->header.num_items > 1) {
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
ULONG size;
if (!td->ignore) {
if (next_tree->header.level == 0)
size = sizeof(leaf_node) + td->size;
else
size = sizeof(internal_node);
} else
size = 0;
if (t->size + size < Vcb->superblock.node_size - sizeof(tree_header)) {
RemoveEntryList(&td->list_entry);
InsertTailList(&t->itemlist, &td->list_entry);
if (next_tree->header.level > 0 && td->treeholder.tree) {
td->treeholder.tree->parent = t;
increase_tree_rc(t);
free_tree(next_tree);
}
if (!td->ignore) {
next_tree->size -= size;
t->size += size;
next_tree->header.num_items--;
t->header.num_items++;
}
} else
break;
le = next_tree->itemlist.Flink;
}
le = next_tree->itemlist.Flink;
while (le != &next_tree->itemlist) {
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
if (!td->ignore) {
firstitem = td->key;
break;
}
le = le->Flink;
}
// ERR("firstitem = %llx,%x,%llx\n", firstitem.obj_id, firstitem.obj_type, firstitem.offset);
// FIXME - once ascension is working, make this work with parent's parent, etc.
if (next_tree->paritem)
next_tree->paritem->key = firstitem;
par = next_tree;
while (par) {
add_to_tree_cache(Vcb, par, TRUE);
par = par->parent;
}
free_tree(next_tree);
}
return STATUS_SUCCESS;
}
static NTSTATUS update_extent_level(device_extension* Vcb, UINT64 address, tree* t, UINT8 level, LIST_ENTRY* rollback) {
KEY searchkey;
traverse_ptr tp;
NTSTATUS Status;
if (Vcb->superblock.incompat_flags & BTRFS_INCOMPAT_FLAGS_SKINNY_METADATA) {
searchkey.obj_id = address;
searchkey.obj_type = TYPE_METADATA_ITEM;
searchkey.offset = t->header.level;
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (!keycmp(&tp.item->key, &searchkey)) {
EXTENT_ITEM_SKINNY_METADATA* eism;
if (tp.item->size > 0) {
eism = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
if (!eism) {
ERR("out of memory\n");
free_traverse_ptr(&tp);
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(eism, tp.item->data, tp.item->size);
} else
eism = NULL;
delete_tree_item(Vcb, &tp, rollback);
if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_METADATA_ITEM, level, eism, tp.item->size, NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(eism);
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
free_traverse_ptr(&tp);
return STATUS_SUCCESS;
}
free_traverse_ptr(&tp);
}
searchkey.obj_id = address;
searchkey.obj_type = TYPE_EXTENT_ITEM;
searchkey.offset = 0xffffffffffffffff;
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
EXTENT_ITEM_TREE* eit;
if (tp.item->size < sizeof(EXTENT_ITEM_TREE)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM_TREE));
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
eit = ExAllocatePoolWithTag(PagedPool, tp.item->size, ALLOC_TAG);
if (!eit) {
ERR("out of memory\n");
free_traverse_ptr(&tp);
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(eit, tp.item->data, tp.item->size);
delete_tree_item(Vcb, &tp, rollback);
eit->level = level;
if (!insert_tree_item(Vcb, Vcb->extent_root, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, eit, tp.item->size, NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(eit);
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
free_traverse_ptr(&tp);
return STATUS_SUCCESS;
}
ERR("could not find EXTENT_ITEM for address %llx\n", address);
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
static NTSTATUS STDCALL do_splits(device_extension* Vcb, LIST_ENTRY* rollback) {
// LIST_ENTRY *le, *le2;
// write_tree* wt;
// tree_data* td;
UINT8 level, max_level;
UINT32 min_size;
BOOL empty, done_deletions = FALSE;
NTSTATUS Status;
tree_cache* tc2;
TRACE("(%p)\n", Vcb);
max_level = 0;
for (level = 0; level <= 255; level++) {
LIST_ENTRY *le, *nextle;
empty = TRUE;
TRACE("doing level %u\n", level);
le = Vcb->tree_cache.Flink;
while (le != &Vcb->tree_cache) {
tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
nextle = le->Flink;
if (tc2->write && tc2->tree->header.level == level) {
empty = FALSE;
if (tc2->tree->header.num_items == 0) {
if (tc2->tree->parent) {
LIST_ENTRY* le2;
KEY firstitem = {0xcccccccccccccccc,0xcc,0xcccccccccccccccc};
done_deletions = TRUE;
le2 = tc2->tree->itemlist.Flink;
while (le2 != &tc2->tree->itemlist) {
tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
firstitem = td->key;
break;
}
TRACE("deleting tree in root %llx (first item was %llx,%x,%llx)\n",
tc2->tree->root->id, firstitem.obj_id, firstitem.obj_type, firstitem.offset);
tc2->tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
if (tc2->tree->has_new_address) { // delete associated EXTENT_ITEM
Status = reduce_tree_extent(Vcb, tc2->tree->new_address, tc2->tree, rollback);
if (!NT_SUCCESS(Status)) {
ERR("reduce_tree_extent returned %08x\n", Status);
return Status;
}
} else if (tc2->tree->has_address) {
Status = reduce_tree_extent(Vcb,tc2->tree->header.address, tc2->tree, rollback);
if (!NT_SUCCESS(Status)) {
ERR("reduce_tree_extent returned %08x\n", Status);
return Status;
}
}
if (!tc2->tree->paritem->ignore) {
tc2->tree->paritem->ignore = TRUE;
tc2->tree->parent->header.num_items--;
tc2->tree->parent->size -= sizeof(internal_node);
}
RemoveEntryList(&tc2->tree->paritem->list_entry);
ExFreePool(tc2->tree->paritem);
tc2->tree->paritem = NULL;
free_tree(tc2->tree);
RemoveEntryList(le);
ExFreePool(tc2);
} else if (tc2->tree->header.level != 0) {
if (tc2->tree->has_new_address) {
Status = update_extent_level(Vcb, tc2->tree->new_address, tc2->tree, 0, rollback);
if (!NT_SUCCESS(Status)) {
ERR("update_extent_level returned %08x\n", Status);
return Status;
}
}
tc2->tree->header.level = 0;
}
} else if (tc2->tree->size > Vcb->superblock.node_size - sizeof(tree_header)) {
TRACE("splitting overlarge tree (%x > %x)\n", tc2->tree->size, Vcb->superblock.node_size - sizeof(tree_header));
Status = split_tree(Vcb, tc2->tree);
if (!NT_SUCCESS(Status)) {
ERR("split_tree returned %08x\n", Status);
return Status;
}
}
}
le = nextle;
}
if (!empty) {
max_level = level;
} else {
TRACE("nothing found for level %u\n", level);
break;
}
}
min_size = (Vcb->superblock.node_size - sizeof(tree_header)) / 2;
for (level = 0; level <= max_level; level++) {
LIST_ENTRY* le;
le = Vcb->tree_cache.Flink;
while (le != &Vcb->tree_cache) {
tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
if (tc2->write && tc2->tree->header.level == level && tc2->tree->header.num_items > 0 && tc2->tree->parent && tc2->tree->size < min_size) {
Status = try_tree_amalgamate(Vcb, tc2->tree, rollback);
if (!NT_SUCCESS(Status)) {
ERR("try_tree_amalgamate returned %08x\n", Status);
return Status;
}
}
le = le->Flink;
}
}
// simplify trees if top tree only has one entry
if (done_deletions) {
for (level = max_level; level > 0; level--) {
LIST_ENTRY *le, *nextle;
le = Vcb->tree_cache.Flink;
while (le != &Vcb->tree_cache) {
nextle = le->Flink;
tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
if (tc2->write && tc2->tree->header.level == level) {
if (!tc2->tree->parent && tc2->tree->header.num_items == 1) {
LIST_ENTRY* le2 = tc2->tree->itemlist.Flink;
tree_data* td;
tree* child_tree = NULL;
while (le2 != &tc2->tree->itemlist) {
td = CONTAINING_RECORD(le2, tree_data, list_entry);
if (!td->ignore)
break;
le2 = le2->Flink;
}
TRACE("deleting top-level tree in root %llx with one item\n", tc2->tree->root->id);
if (tc2->tree->has_new_address) { // delete associated EXTENT_ITEM
Status = reduce_tree_extent(Vcb, tc2->tree->new_address, tc2->tree, rollback);
if (!NT_SUCCESS(Status)) {
ERR("reduce_tree_extent returned %08x\n", Status);
return Status;
}
} else if (tc2->tree->has_address) {
Status = reduce_tree_extent(Vcb,tc2->tree->header.address, tc2->tree, rollback);
if (!NT_SUCCESS(Status)) {
ERR("reduce_tree_extent returned %08x\n", Status);
return Status;
}
}
if (!td->treeholder.tree) { // load first item if not already loaded
KEY searchkey = {0,0,0};
traverse_ptr tp;
Status = find_item(Vcb, tc2->tree->root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
free_traverse_ptr(&tp);
}
child_tree = td->treeholder.tree;
if (child_tree) {
child_tree->parent = NULL;
child_tree->paritem = NULL;
free_tree(tc2->tree);
}
tc2->tree->root->root_item.bytes_used -= Vcb->superblock.node_size;
free_tree(tc2->tree);
if (child_tree)
child_tree->root->treeholder.tree = child_tree;
RemoveEntryList(le);
ExFreePool(tc2);
}
}
le = nextle;
}
}
}
return STATUS_SUCCESS;
}
NTSTATUS STDCALL do_write(device_extension* Vcb, LIST_ENTRY* rollback) {
NTSTATUS Status;
LIST_ENTRY* le;
TRACE("(%p)\n", Vcb);
// If only changing superblock, e.g. changing label, we still need to rewrite
// the root tree so the generations match, otherwise you won't be able to mount on Linux.
if (Vcb->write_trees > 0) {
KEY searchkey;
traverse_ptr tp;
searchkey.obj_id = 0;
searchkey.obj_type = 0;
searchkey.offset = 0;
Status = find_item(Vcb, Vcb->root_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
add_to_tree_cache(Vcb, Vcb->root_root->treeholder.tree, TRUE);
free_traverse_ptr(&tp);
}
do {
Status = add_parents(Vcb, rollback);
if (!NT_SUCCESS(Status)) {
ERR("add_parents returned %08x\n", Status);
goto end;
}
Status = do_splits(Vcb, rollback);
if (!NT_SUCCESS(Status)) {
ERR("do_splits returned %08x\n", Status);
goto end;
}
Status = allocate_tree_extents(Vcb, rollback);
if (!NT_SUCCESS(Status)) {
ERR("add_parents returned %08x\n", Status);
goto end;
}
Status = update_chunk_usage(Vcb, rollback);
if (!NT_SUCCESS(Status)) {
ERR("update_chunk_usage returned %08x\n", Status);
goto end;
}
} while (!trees_consistent(Vcb));
TRACE("trees consistent\n");
Status = update_root_root(Vcb, rollback);
if (!NT_SUCCESS(Status)) {
ERR("update_root_root returned %08x\n", Status);
goto end;
}
Status = write_trees(Vcb);
if (!NT_SUCCESS(Status)) {
ERR("write_trees returned %08x\n", Status);
goto end;
}
Status = write_superblocks(Vcb);
if (!NT_SUCCESS(Status)) {
ERR("write_superblocks returned %08x\n", Status);
goto end;
}
clean_space_cache(Vcb);
Vcb->superblock.generation++;
// print_trees(tc); // TESTING
Status = STATUS_SUCCESS;
le = Vcb->tree_cache.Flink;
while (le != &Vcb->tree_cache) {
tree_cache* tc2 = CONTAINING_RECORD(le, tree_cache, list_entry);
tc2->write = FALSE;
le = le->Flink;
}
Vcb->write_trees = 0;
end:
TRACE("do_write returning %08x\n", Status);
return Status;
}
NTSTATUS consider_write(device_extension* Vcb) {
// FIXME - call do_write if Vcb->write_trees high
#if 0
LIST_ENTRY rollback;
NTSTATUS Status = STATUS_SUCCESS;
InitializeListHead(&rollback);
if (Vcb->write_trees > 0)
Status = do_write(Vcb, &rollback);
free_tree_cache(&Vcb->tree_cache);
if (!NT_SUCCESS(Status))
do_rollback(Vcb, &rollback);
else
clear_rollback(&rollback);
return Status;
#else
return STATUS_SUCCESS;
#endif
}
static __inline void insert_into_ordered_list(LIST_ENTRY* list, ordered_list* ins) {
LIST_ENTRY* le = list->Flink;
ordered_list* ol;
while (le != list) {
ol = (ordered_list*)le;
if (ol->key > ins->key) {
le->Blink->Flink = &ins->list_entry;
ins->list_entry.Blink = le->Blink;
le->Blink = &ins->list_entry;
ins->list_entry.Flink = le;
return;
}
le = le->Flink;
}
InsertTailList(list, &ins->list_entry);
}
static UINT64 get_extent_data_ref_hash(UINT64 root, UINT64 objid, UINT64 offset) {
UINT32 high_crc = 0xffffffff, low_crc = 0xffffffff;
// FIXME - can we test this?
// FIXME - make sure numbers here are little-endian
high_crc = calc_crc32c(high_crc, (UINT8*)&root, sizeof(UINT64));
low_crc = calc_crc32c(low_crc, (UINT8*)&objid, sizeof(UINT64));
low_crc = calc_crc32c(low_crc, (UINT8*)&offset, sizeof(UINT64));
return ((UINT64)high_crc << 31) ^ (UINT64)low_crc;
}
NTSTATUS STDCALL add_extent_ref(device_extension* Vcb, UINT64 address, UINT64 size, root* subvol, UINT64 inode, UINT64 offset, LIST_ENTRY* rollback) {
KEY searchkey;
traverse_ptr tp;
EXTENT_ITEM* ei;
UINT8 *siptr, *type;
ULONG len;
UINT64 hash;
EXTENT_DATA_REF* edr;
NTSTATUS Status;
TRACE("(%p, %llx, %llx, %llx, %llx, %llx)\n", Vcb, address, size, subvol->id, inode, offset);
searchkey.obj_id = address;
searchkey.obj_type = TYPE_EXTENT_ITEM;
searchkey.offset = size;
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (keycmp(&tp.item->key, &searchkey)) {
// create new entry
len = sizeof(EXTENT_ITEM) + sizeof(UINT8) + sizeof(EXTENT_DATA_REF);
free_traverse_ptr(&tp);
ei = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
if (!ei) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
ei->refcount = 1;
ei->generation = Vcb->superblock.generation;
ei->flags = EXTENT_ITEM_DATA;
type = (UINT8*)&ei[1];
*type = TYPE_EXTENT_DATA_REF;
edr = (EXTENT_DATA_REF*)&type[1];
edr->root = subvol->id;
edr->objid = inode;
edr->offset = offset;
edr->count = 1;
if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, len, NULL, rollback)) {
ERR("error - failed to insert item\n");
return STATUS_INTERNAL_ERROR;
}
// FIXME - update free space in superblock and CHUNK_ITEM
return STATUS_SUCCESS;
}
if (tp.item->size == sizeof(EXTENT_ITEM_V0)) { // old extent ref, convert
NTSTATUS Status = convert_old_data_extent(Vcb, address, size, rollback);
if (!NT_SUCCESS(Status)) {
ERR("convert_old_data_extent returned %08x\n", Status);
free_traverse_ptr(&tp);
return Status;
}
free_traverse_ptr(&tp);
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (keycmp(&tp.item->key, &searchkey)) {
WARN("extent item not found for address %llx, size %llx\n", address, size);
free_traverse_ptr(&tp);
return STATUS_SUCCESS;
}
}
ei = (EXTENT_ITEM*)tp.item->data;
if (tp.item->size < sizeof(EXTENT_ITEM)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
if (extent_item_is_shared(ei, tp.item->size - sizeof(EXTENT_ITEM))) {
NTSTATUS Status = convert_shared_data_extent(Vcb, address, size, rollback);
if (!NT_SUCCESS(Status)) {
ERR("convert_shared_data_extent returned %08x\n", Status);
free_traverse_ptr(&tp);
return Status;
}
free_traverse_ptr(&tp);
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (keycmp(&tp.item->key, &searchkey)) {
WARN("extent item not found for address %llx, size %llx\n", address, size);
free_traverse_ptr(&tp);
return STATUS_SUCCESS;
}
ei = (EXTENT_ITEM*)tp.item->data;
}
if (ei->flags != EXTENT_ITEM_DATA) {
ERR("error - flag was not EXTENT_ITEM_DATA\n");
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
// FIXME - is ei->refcount definitely the number of items, or is it the sum of the subitem refcounts?
hash = get_extent_data_ref_hash(subvol->id, inode, offset);
len = tp.item->size - sizeof(EXTENT_ITEM);
siptr = (UINT8*)&ei[1];
// FIXME - increase subitem refcount if there already?
do {
if (*siptr == TYPE_EXTENT_DATA_REF) {
UINT64 sihash;
edr = (EXTENT_DATA_REF*)&siptr[1];
sihash = get_extent_data_ref_hash(edr->root, edr->objid, edr->offset);
if (sihash >= hash)
break;
siptr += sizeof(UINT8) + sizeof(EXTENT_DATA_REF);
if (len > sizeof(EXTENT_DATA_REF) + sizeof(UINT8)) {
len -= sizeof(EXTENT_DATA_REF) + sizeof(UINT8);
} else
break;
// FIXME - TYPE_TREE_BLOCK_REF 0xB0
} else {
ERR("unrecognized extent subitem %x\n", *siptr);
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
} while (len > 0);
len = tp.item->size + sizeof(UINT8) + sizeof(EXTENT_DATA_REF); // FIXME - die if too big
ei = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
if (!ei) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(ei, tp.item->data, siptr - tp.item->data);
ei->refcount++;
type = (UINT8*)ei + (siptr - tp.item->data);
*type = TYPE_EXTENT_DATA_REF;
edr = (EXTENT_DATA_REF*)&type[1];
edr->root = subvol->id;
edr->objid = inode;
edr->offset = offset;
edr->count = 1;
if (siptr < tp.item->data + tp.item->size)
RtlCopyMemory(&edr[1], siptr, tp.item->data + tp.item->size - siptr);
delete_tree_item(Vcb, &tp, rollback);
free_traverse_ptr(&tp);
if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, len, NULL, rollback)) {
ERR("error - failed to insert item\n");
ExFreePool(ei);
return STATUS_INTERNAL_ERROR;
}
return STATUS_SUCCESS;
}
typedef struct {
EXTENT_DATA_REF edr;
LIST_ENTRY list_entry;
} data_ref;
static void add_data_ref(LIST_ENTRY* data_refs, UINT64 root, UINT64 objid, UINT64 offset) {
data_ref* dr = ExAllocatePoolWithTag(PagedPool, sizeof(data_ref), ALLOC_TAG);
if (!dr) {
ERR("out of memory\n");
return;
}
// FIXME - increase count if entry there already
// FIXME - put in order?
dr->edr.root = root;
dr->edr.objid = objid;
dr->edr.offset = offset;
dr->edr.count = 1;
InsertTailList(data_refs, &dr->list_entry);
}
static void free_data_refs(LIST_ENTRY* data_refs) {
while (!IsListEmpty(data_refs)) {
LIST_ENTRY* le = RemoveHeadList(data_refs);
data_ref* dr = CONTAINING_RECORD(le, data_ref, list_entry);
ExFreePool(dr);
}
}
static NTSTATUS convert_old_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback) {
KEY searchkey;
traverse_ptr tp, next_tp;
BOOL b;
LIST_ENTRY data_refs;
LIST_ENTRY* le;
UINT64 refcount;
EXTENT_ITEM* ei;
ULONG eisize;
UINT8* type;
NTSTATUS Status;
searchkey.obj_id = address;
searchkey.obj_type = TYPE_EXTENT_ITEM;
searchkey.offset = size;
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (keycmp(&tp.item->key, &searchkey)) {
WARN("extent item not found for address %llx, size %llx\n", address, size);
free_traverse_ptr(&tp);
return STATUS_SUCCESS;
}
if (tp.item->size != sizeof(EXTENT_ITEM_V0)) {
TRACE("extent does not appear to be old - returning STATUS_SUCCESS\n");
free_traverse_ptr(&tp);
return STATUS_SUCCESS;
}
delete_tree_item(Vcb, &tp, rollback);
free_traverse_ptr(&tp);
searchkey.obj_id = address;
searchkey.obj_type = TYPE_EXTENT_REF_V0;
searchkey.offset = 0;
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
InitializeListHead(&data_refs);
do {
b = find_next_item(Vcb, &tp, &next_tp, FALSE);
if (tp.item->key.obj_id == searchkey.obj_id && tp.item->key.obj_type == searchkey.obj_type) {
tree* t;
// normally we'd need to acquire load_tree_lock here, but we're protected by the write tree lock
Status = load_tree(Vcb, tp.item->key.offset, NULL, &t);
if (!NT_SUCCESS(Status)) {
ERR("load tree for address %llx returned %08x\n", tp.item->key.offset, Status);
free_traverse_ptr(&tp);
free_data_refs(&data_refs);
return Status;
}
if (t->header.level == 0) {
le = t->itemlist.Flink;
while (le != &t->itemlist) {
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
if (!td->ignore && td->key.obj_type == TYPE_EXTENT_DATA) {
EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
if (ed2->address == address)
add_data_ref(&data_refs, t->header.tree_id, td->key.obj_id, td->key.offset);
}
}
le = le->Flink;
}
}
free_tree(t);
delete_tree_item(Vcb, &tp, rollback);
}
if (b) {
free_traverse_ptr(&tp);
tp = next_tp;
if (tp.item->key.obj_id > searchkey.obj_id || tp.item->key.obj_type > searchkey.obj_type)
break;
}
} while (b);
free_traverse_ptr(&tp);
if (IsListEmpty(&data_refs)) {
WARN("no data refs found\n");
return STATUS_SUCCESS;
}
// create new entry
refcount = 0;
le = data_refs.Flink;
while (le != &data_refs) {
refcount++;
le = le->Flink;
}
eisize = sizeof(EXTENT_ITEM) + ((sizeof(char) + sizeof(EXTENT_DATA_REF)) * refcount);
ei = ExAllocatePoolWithTag(PagedPool, eisize, ALLOC_TAG);
if (!ei) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
ei->refcount = refcount;
ei->generation = Vcb->superblock.generation;
ei->flags = EXTENT_ITEM_DATA;
type = (UINT8*)&ei[1];
le = data_refs.Flink;
while (le != &data_refs) {
data_ref* dr = CONTAINING_RECORD(le, data_ref, list_entry);
type[0] = TYPE_EXTENT_DATA_REF;
RtlCopyMemory(&type[1], &dr->edr, sizeof(EXTENT_DATA_REF));
type = &type[1 + sizeof(EXTENT_DATA_REF)];
le = le->Flink;
}
if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, ei, eisize, NULL, rollback)) {
ERR("error - failed to insert item\n");
ExFreePool(ei);
return STATUS_INTERNAL_ERROR;
}
free_data_refs(&data_refs);
return STATUS_SUCCESS;
}
typedef struct {
UINT8 type;
void* data;
BOOL allocated;
LIST_ENTRY list_entry;
} extent_ref;
static void free_extent_refs(LIST_ENTRY* extent_refs) {
while (!IsListEmpty(extent_refs)) {
LIST_ENTRY* le = RemoveHeadList(extent_refs);
extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
if (er->allocated)
ExFreePool(er->data);
ExFreePool(er);
}
}
static NTSTATUS convert_shared_data_extent(device_extension* Vcb, UINT64 address, UINT64 size, LIST_ENTRY* rollback) {
KEY searchkey;
traverse_ptr tp;
LIST_ENTRY extent_refs;
LIST_ENTRY *le, *next_le;
EXTENT_ITEM *ei, *newei;
UINT8* siptr;
ULONG len;
UINT64 count;
NTSTATUS Status;
searchkey.obj_id = address;
searchkey.obj_type = TYPE_EXTENT_ITEM;
searchkey.offset = size;
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (keycmp(&tp.item->key, &searchkey)) {
WARN("extent item not found for address %llx, size %llx\n", address, size);
free_traverse_ptr(&tp);
return STATUS_SUCCESS;
}
if (tp.item->size < sizeof(EXTENT_ITEM)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
ei = (EXTENT_ITEM*)tp.item->data;
len = tp.item->size - sizeof(EXTENT_ITEM);
InitializeListHead(&extent_refs);
siptr = (UINT8*)&ei[1];
do {
extent_ref* er = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG);
if (!er) {
ERR("out of memory\n");
free_traverse_ptr(&tp);
return STATUS_INSUFFICIENT_RESOURCES;
}
er->type = *siptr;
er->data = siptr+1;
er->allocated = FALSE;
InsertTailList(&extent_refs, &er->list_entry);
if (*siptr == TYPE_TREE_BLOCK_REF) {
siptr += sizeof(TREE_BLOCK_REF);
len -= sizeof(TREE_BLOCK_REF) + 1;
} else if (*siptr == TYPE_EXTENT_DATA_REF) {
siptr += sizeof(EXTENT_DATA_REF);
len -= sizeof(EXTENT_DATA_REF) + 1;
} else if (*siptr == TYPE_SHARED_BLOCK_REF) {
siptr += sizeof(SHARED_BLOCK_REF);
len -= sizeof(SHARED_BLOCK_REF) + 1;
} else if (*siptr == TYPE_SHARED_DATA_REF) {
siptr += sizeof(SHARED_DATA_REF);
len -= sizeof(SHARED_DATA_REF) + 1;
} else {
ERR("unrecognized extent subitem %x\n", *siptr);
free_traverse_ptr(&tp);
free_extent_refs(&extent_refs);
return STATUS_INTERNAL_ERROR;
}
} while (len > 0);
le = extent_refs.Flink;
while (le != &extent_refs) {
extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
next_le = le->Flink;
if (er->type == TYPE_SHARED_DATA_REF) {
// normally we'd need to acquire load_tree_lock here, but we're protected by the write tree lock
SHARED_DATA_REF* sdr = er->data;
tree* t;
Status = load_tree(Vcb, sdr->offset, NULL, &t);
if (!NT_SUCCESS(Status)) {
ERR("load_tree for address %llx returned %08x\n", sdr->offset, Status);
free_traverse_ptr(&tp);
free_data_refs(&extent_refs);
return Status;
}
if (t->header.level == 0) {
LIST_ENTRY* le2 = t->itemlist.Flink;
while (le2 != &t->itemlist) {
tree_data* td = CONTAINING_RECORD(le2, tree_data, list_entry);
if (!td->ignore && td->key.obj_type == TYPE_EXTENT_DATA) {
EXTENT_DATA* ed = (EXTENT_DATA*)td->data;
if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
if (ed2->address == address) {
extent_ref* er2;
EXTENT_DATA_REF* edr;
er2 = ExAllocatePoolWithTag(PagedPool, sizeof(extent_ref), ALLOC_TAG);
if (!er2) {
ERR("out of memory\n");
free_traverse_ptr(&tp);
return STATUS_INSUFFICIENT_RESOURCES;
}
edr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA_REF), ALLOC_TAG);
if (!edr) {
ERR("out of memory\n");
free_traverse_ptr(&tp);
ExFreePool(er2);
return STATUS_INSUFFICIENT_RESOURCES;
}
edr->root = t->header.tree_id;
edr->objid = td->key.obj_id;
edr->offset = td->key.offset;
edr->count = 1;
er2->type = TYPE_EXTENT_DATA_REF;
er2->data = edr;
er2->allocated = TRUE;
InsertTailList(&extent_refs, &er2->list_entry); // FIXME - list should be in order
}
}
}
le2 = le2->Flink;
}
}
free_tree(t);
RemoveEntryList(&er->list_entry);
if (er->allocated)
ExFreePool(er->data);
ExFreePool(er);
}
// FIXME - also do for SHARED_BLOCK_REF?
le = next_le;
}
if (IsListEmpty(&extent_refs)) {
WARN("no extent refs found\n");
delete_tree_item(Vcb, &tp, rollback);
free_traverse_ptr(&tp);
return STATUS_SUCCESS;
}
len = 0;
count = 0;
le = extent_refs.Flink;
while (le != &extent_refs) {
extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
len++;
if (er->type == TYPE_TREE_BLOCK_REF) {
len += sizeof(TREE_BLOCK_REF);
} else if (er->type == TYPE_EXTENT_DATA_REF) {
len += sizeof(EXTENT_DATA_REF);
} else {
ERR("unexpected extent subitem %x\n", er->type);
}
count++;
le = le->Flink;
}
newei = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM) + len, ALLOC_TAG);
if (!newei) {
ERR("out of memory\n");
free_traverse_ptr(&tp);
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(newei, ei, sizeof(EXTENT_ITEM));
newei->refcount = count;
siptr = (UINT8*)&newei[1];
le = extent_refs.Flink;
while (le != &extent_refs) {
extent_ref* er = CONTAINING_RECORD(le, extent_ref, list_entry);
*siptr = er->type;
siptr++;
if (er->type == TYPE_TREE_BLOCK_REF) {
RtlCopyMemory(siptr, er->data, sizeof(TREE_BLOCK_REF));
} else if (er->type == TYPE_EXTENT_DATA_REF) {
RtlCopyMemory(siptr, er->data, sizeof(EXTENT_DATA_REF));
} else {
ERR("unexpected extent subitem %x\n", er->type);
}
le = le->Flink;
}
delete_tree_item(Vcb, &tp, rollback);
free_traverse_ptr(&tp);
if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, size, newei, sizeof(EXTENT_ITEM) + len, NULL, rollback)) {
ERR("error - failed to insert item\n");
ExFreePool(newei);
free_extent_refs(&extent_refs);
return STATUS_INTERNAL_ERROR;
}
free_extent_refs(&extent_refs);
return STATUS_SUCCESS;
}
static BOOL extent_item_is_shared(EXTENT_ITEM* ei, ULONG len) {
UINT8* siptr = (UINT8*)&ei[1];
do {
if (*siptr == TYPE_TREE_BLOCK_REF) {
siptr += sizeof(TREE_BLOCK_REF) + 1;
len -= sizeof(TREE_BLOCK_REF) + 1;
} else if (*siptr == TYPE_EXTENT_DATA_REF) {
siptr += sizeof(EXTENT_DATA_REF) + 1;
len -= sizeof(EXTENT_DATA_REF) + 1;
} else if (*siptr == TYPE_SHARED_BLOCK_REF) {
return TRUE;
} else if (*siptr == TYPE_SHARED_DATA_REF) {
return TRUE;
} else {
ERR("unrecognized extent subitem %x\n", *siptr);
return FALSE;
}
} while (len > 0);
return FALSE;
}
NTSTATUS STDCALL remove_extent_ref(device_extension* Vcb, UINT64 address, UINT64 size, root* subvol, UINT64 inode, UINT64 offset, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
KEY searchkey;
traverse_ptr tp;
EXTENT_ITEM* ei;
UINT8* siptr;
ULONG len;
EXTENT_DATA_REF* edr;
BOOL found;
NTSTATUS Status;
TRACE("(%p, %llx, %llx, %llx, %llx, %llx)\n", Vcb, address, size, subvol->id, inode, offset);
searchkey.obj_id = address;
searchkey.obj_type = TYPE_EXTENT_ITEM;
searchkey.offset = size;
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (keycmp(&tp.item->key, &searchkey)) {
WARN("extent item not found for address %llx, size %llx\n", address, size);
free_traverse_ptr(&tp);
return STATUS_SUCCESS;
}
if (tp.item->size == sizeof(EXTENT_ITEM_V0)) { // old extent ref, convert
NTSTATUS Status = convert_old_data_extent(Vcb, address, size, rollback);
if (!NT_SUCCESS(Status)) {
ERR("convert_old_data_extent returned %08x\n", Status);
free_traverse_ptr(&tp);
return Status;
}
free_traverse_ptr(&tp);
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (keycmp(&tp.item->key, &searchkey)) {
WARN("extent item not found for address %llx, size %llx\n", address, size);
free_traverse_ptr(&tp);
return STATUS_SUCCESS;
}
}
ei = (EXTENT_ITEM*)tp.item->data;
if (tp.item->size < sizeof(EXTENT_ITEM)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
if (!(ei->flags & EXTENT_ITEM_DATA)) {
ERR("error - EXTENT_ITEM_DATA flag not set\n");
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
// FIXME - is ei->refcount definitely the number of items, or is it the sum of the subitem refcounts?
if (extent_item_is_shared(ei, tp.item->size - sizeof(EXTENT_ITEM))) {
NTSTATUS Status = convert_shared_data_extent(Vcb, address, size, rollback);
if (!NT_SUCCESS(Status)) {
ERR("convert_shared_data_extent returned %08x\n", Status);
free_traverse_ptr(&tp);
return Status;
}
free_traverse_ptr(&tp);
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (keycmp(&tp.item->key, &searchkey)) {
WARN("extent item not found for address %llx, size %llx\n", address, size);
free_traverse_ptr(&tp);
return STATUS_SUCCESS;
}
ei = (EXTENT_ITEM*)tp.item->data;
}
len = tp.item->size - sizeof(EXTENT_ITEM);
siptr = (UINT8*)&ei[1];
found = FALSE;
do {
if (*siptr == TYPE_EXTENT_DATA_REF) {
edr = (EXTENT_DATA_REF*)&siptr[1];
if (edr->root == subvol->id && edr->objid == inode && edr->offset == offset) {
found = TRUE;
break;
}
siptr += sizeof(UINT8) + sizeof(EXTENT_DATA_REF);
if (len > sizeof(EXTENT_DATA_REF) + sizeof(UINT8)) {
len -= sizeof(EXTENT_DATA_REF) + sizeof(UINT8);
} else
break;
// // FIXME - TYPE_TREE_BLOCK_REF 0xB0
} else {
ERR("unrecognized extent subitem %x\n", *siptr);
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
} while (len > 0);
if (!found) {
WARN("could not find extent data ref\n");
free_traverse_ptr(&tp);
return STATUS_SUCCESS;
}
// FIXME - decrease subitem refcount if there already?
len = tp.item->size - sizeof(UINT8) - sizeof(EXTENT_DATA_REF);
delete_tree_item(Vcb, &tp, rollback);
if (len == sizeof(EXTENT_ITEM)) { // extent no longer needed
chunk* c;
LIST_ENTRY* le2;
if (changed_sector_list) {
changed_sector* sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
if (!sc) {
ERR("out of memory\n");
free_traverse_ptr(&tp);
return STATUS_INSUFFICIENT_RESOURCES;
}
sc->ol.key = address;
sc->checksums = NULL;
sc->length = size / Vcb->superblock.sector_size;
sc->deleted = TRUE;
insert_into_ordered_list(changed_sector_list, &sc->ol);
}
c = NULL;
le2 = Vcb->chunks.Flink;
while (le2 != &Vcb->chunks) {
c = CONTAINING_RECORD(le2, chunk, list_entry);
TRACE("chunk: %llx, %llx\n", c->offset, c->chunk_item->size);
if (address >= c->offset && address + size < c->offset + c->chunk_item->size)
break;
le2 = le2->Flink;
}
if (le2 == &Vcb->chunks) c = NULL;
if (c) {
decrease_chunk_usage(c, size);
add_to_space_list(c, address, size, SPACE_TYPE_DELETING);
}
free_traverse_ptr(&tp);
return STATUS_SUCCESS;
}
ei = ExAllocatePoolWithTag(PagedPool, len, ALLOC_TAG);
if (!ei) {
ERR("out of memory\n");
free_traverse_ptr(&tp);
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlCopyMemory(ei, tp.item->data, siptr - tp.item->data);
ei->refcount--;
ei->generation = Vcb->superblock.generation;
if (tp.item->data + len != siptr)
RtlCopyMemory((UINT8*)ei + (siptr - tp.item->data), siptr + sizeof(UINT8) + sizeof(EXTENT_DATA_REF), tp.item->size - (siptr - tp.item->data) - sizeof(UINT8) - sizeof(EXTENT_DATA_REF));
free_traverse_ptr(&tp);
if (!insert_tree_item(Vcb, Vcb->extent_root, searchkey.obj_id, searchkey.obj_type, searchkey.offset, ei, len, NULL, rollback)) {
ERR("error - failed to insert item\n");
ExFreePool(ei);
return STATUS_INTERNAL_ERROR;
}
return STATUS_SUCCESS;
}
static __inline BOOL entry_in_ordered_list(LIST_ENTRY* list, UINT64 value) {
LIST_ENTRY* le = list->Flink;
ordered_list* ol;
while (le != list) {
ol = (ordered_list*)le;
if (ol->key > value)
return FALSE;
else if (ol->key == value)
return TRUE;
le = le->Flink;
}
return FALSE;
}
NTSTATUS excise_extents(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 end_data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
KEY searchkey;
traverse_ptr tp, next_tp;
NTSTATUS Status;
BOOL b;
TRACE("(%p, (%llx, %llx), %llx, %llx, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, end_data, changed_sector_list);
searchkey.obj_id = fcb->inode;
searchkey.obj_type = TYPE_EXTENT_DATA;
searchkey.offset = start_data;
Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
do {
EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data;
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
UINT64 len;
if (tp.item->size < sizeof(EXTENT_DATA)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
Status = STATUS_INTERNAL_ERROR;
goto end;
}
if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
Status = STATUS_INTERNAL_ERROR;
goto end;
}
b = find_next_item(Vcb, &tp, &next_tp, FALSE);
len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
if (tp.item->key.offset < end_data && tp.item->key.offset + len >= start_data) {
if (ed->compression != BTRFS_COMPRESSION_NONE) {
FIXME("FIXME - compression not supported at present\n");
Status = STATUS_NOT_SUPPORTED;
goto end;
}
if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
WARN("root %llx, inode %llx, extent %llx: encryption not supported (type %x)\n", fcb->subvol->id, fcb->inode, tp.item->key.offset, ed->encryption);
Status = STATUS_NOT_SUPPORTED;
goto end;
}
if (ed->encoding != BTRFS_ENCODING_NONE) {
WARN("other encodings not supported\n");
Status = STATUS_NOT_SUPPORTED;
goto end;
}
if (ed->type == EXTENT_TYPE_INLINE) {
if (start_data <= tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove all
delete_tree_item(Vcb, &tp, rollback);
fcb->inode_item.st_blocks -= len;
} else if (start_data <= tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove beginning
EXTENT_DATA* ned;
UINT64 size;
delete_tree_item(Vcb, &tp, rollback);
size = len - (end_data - tp.item->key.offset);
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
if (!ned) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
ned->generation = Vcb->superblock.generation;
ned->decoded_size = size;
ned->compression = ed->compression;
ned->encryption = ed->encryption;
ned->encoding = ed->encoding;
ned->type = ed->type;
RtlCopyMemory(&ned->data[0], &ed->data[end_data - tp.item->key.offset], size);
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(ned);
Status = STATUS_INTERNAL_ERROR;
goto end;
}
fcb->inode_item.st_blocks -= end_data - tp.item->key.offset;
} else if (start_data > tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove end
EXTENT_DATA* ned;
UINT64 size;
delete_tree_item(Vcb, &tp, rollback);
size = start_data - tp.item->key.offset;
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
if (!ned) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
ned->generation = Vcb->superblock.generation;
ned->decoded_size = size;
ned->compression = ed->compression;
ned->encryption = ed->encryption;
ned->encoding = ed->encoding;
ned->type = ed->type;
RtlCopyMemory(&ned->data[0], &ed->data[0], size);
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(ned);
Status = STATUS_INTERNAL_ERROR;
goto end;
}
fcb->inode_item.st_blocks -= tp.item->key.offset + len - start_data;
} else if (start_data > tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove middle
EXTENT_DATA* ned;
UINT64 size;
delete_tree_item(Vcb, &tp, rollback);
size = start_data - tp.item->key.offset;
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
if (!ned) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
ned->generation = Vcb->superblock.generation;
ned->decoded_size = size;
ned->compression = ed->compression;
ned->encryption = ed->encryption;
ned->encoding = ed->encoding;
ned->type = ed->type;
RtlCopyMemory(&ned->data[0], &ed->data[0], size);
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(ned);
Status = STATUS_INTERNAL_ERROR;
goto end;
}
size = tp.item->key.offset + len - end_data;
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + size, ALLOC_TAG);
if (!ned) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
ned->generation = Vcb->superblock.generation;
ned->decoded_size = size;
ned->compression = ed->compression;
ned->encryption = ed->encryption;
ned->encoding = ed->encoding;
ned->type = ed->type;
RtlCopyMemory(&ned->data[0], &ed->data[end_data - tp.item->key.offset], size);
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + size, NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(ned);
Status = STATUS_INTERNAL_ERROR;
goto end;
}
fcb->inode_item.st_blocks -= end_data - start_data;
}
} else if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) {
if (start_data <= tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove all
if (ed2->address != 0) {
Status = remove_extent_ref(Vcb, ed2->address, ed2->size, fcb->subvol, fcb->inode, tp.item->key.offset - ed2->offset, changed_sector_list, rollback);
if (!NT_SUCCESS(Status)) {
ERR("remove_extent_ref returned %08x\n", Status);
goto end;
}
fcb->inode_item.st_blocks -= len;
}
delete_tree_item(Vcb, &tp, rollback);
} else if (start_data <= tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove beginning
EXTENT_DATA* ned;
EXTENT_DATA2* ned2;
if (ed2->address != 0)
fcb->inode_item.st_blocks -= end_data - tp.item->key.offset;
delete_tree_item(Vcb, &tp, rollback);
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
if (!ned) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
ned2 = (EXTENT_DATA2*)&ned->data[0];
ned->generation = Vcb->superblock.generation;
ned->decoded_size = ed->decoded_size;
ned->compression = ed->compression;
ned->encryption = ed->encryption;
ned->encoding = ed->encoding;
ned->type = ed->type;
ned2->address = ed2->address;
ned2->size = ed2->size;
ned2->offset = ed2->address == 0 ? 0 : (ed2->offset + (end_data - tp.item->key.offset));
ned2->num_bytes = ed2->num_bytes - (end_data - tp.item->key.offset);
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(ned);
Status = STATUS_INTERNAL_ERROR;
goto end;
}
} else if (start_data > tp.item->key.offset && end_data >= tp.item->key.offset + len) { // remove end
EXTENT_DATA* ned;
EXTENT_DATA2* ned2;
if (ed2->address != 0)
fcb->inode_item.st_blocks -= tp.item->key.offset + len - start_data;
delete_tree_item(Vcb, &tp, rollback);
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
if (!ned) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
ned2 = (EXTENT_DATA2*)&ned->data[0];
ned->generation = Vcb->superblock.generation;
ned->decoded_size = ed->decoded_size;
ned->compression = ed->compression;
ned->encryption = ed->encryption;
ned->encoding = ed->encoding;
ned->type = ed->type;
ned2->address = ed2->address;
ned2->size = ed2->size;
ned2->offset = ed2->address == 0 ? 0 : ed2->offset;
ned2->num_bytes = start_data - tp.item->key.offset;
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(ned);
Status = STATUS_INTERNAL_ERROR;
goto end;
}
} else if (start_data > tp.item->key.offset && end_data < tp.item->key.offset + len) { // remove middle
EXTENT_DATA* ned;
EXTENT_DATA2* ned2;
if (ed2->address != 0)
fcb->inode_item.st_blocks -= end_data - start_data;
delete_tree_item(Vcb, &tp, rollback);
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
if (!ned) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
ned2 = (EXTENT_DATA2*)&ned->data[0];
ned->generation = Vcb->superblock.generation;
ned->decoded_size = ed->decoded_size;
ned->compression = ed->compression;
ned->encryption = ed->encryption;
ned->encoding = ed->encoding;
ned->type = ed->type;
ned2->address = ed2->address;
ned2->size = ed2->size;
ned2->offset = ed2->address == 0 ? 0 : ed2->offset;
ned2->num_bytes = start_data - tp.item->key.offset;
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, tp.item->key.offset, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(ned);
Status = STATUS_INTERNAL_ERROR;
goto end;
}
ned = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
if (!ned) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
ned2 = (EXTENT_DATA2*)&ned->data[0];
ned->generation = Vcb->superblock.generation;
ned->decoded_size = ed->decoded_size;
ned->compression = ed->compression;
ned->encryption = ed->encryption;
ned->encoding = ed->encoding;
ned->type = ed->type;
ned2->address = ed2->address;
ned2->size = ed2->size;
ned2->offset = ed2->address == 0 ? 0 : (ed2->offset + (end_data - tp.item->key.offset));
ned2->num_bytes = tp.item->key.offset + len - end_data;
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, end_data, ned, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(ned);
Status = STATUS_INTERNAL_ERROR;
goto end;
}
}
}
}
if (b) {
free_traverse_ptr(&tp);
tp = next_tp;
if (tp.item->key.obj_id > fcb->inode || tp.item->key.obj_type > TYPE_EXTENT_DATA || tp.item->key.offset >= end_data)
break;
}
} while (b);
// FIXME - do bitmap analysis of changed extents, and free what we can
Status = STATUS_SUCCESS;
end:
free_traverse_ptr(&tp);
return Status;
}
static BOOL insert_extent_chunk(device_extension* Vcb, fcb* fcb, chunk* c, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
UINT64 address;
NTSTATUS Status;
EXTENT_ITEM_DATA_REF* eidr;
EXTENT_DATA* ed;
EXTENT_DATA2* ed2;
ULONG edsize = sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2);
changed_sector* sc;
traverse_ptr tp;
int i;
TRACE("(%p, (%llx, %llx), %llx, %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, c->offset, start_data, length, data, changed_sector_list);
if (!find_address_in_chunk(Vcb, c, length, &address))
return FALSE;
eidr = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_ITEM_DATA_REF), ALLOC_TAG);
if (!eidr) {
ERR("out of memory\n");
return FALSE;
}
eidr->ei.refcount = 1;
eidr->ei.generation = Vcb->superblock.generation;
eidr->ei.flags = EXTENT_ITEM_DATA;
eidr->type = TYPE_EXTENT_DATA_REF;
eidr->edr.root = fcb->subvol->id;
eidr->edr.objid = fcb->inode;
eidr->edr.offset = start_data;
eidr->edr.count = 1;
if (!insert_tree_item(Vcb, Vcb->extent_root, address, TYPE_EXTENT_ITEM, length, eidr, sizeof(EXTENT_ITEM_DATA_REF), &tp, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(eidr);
return FALSE;
}
tp.tree->header.generation = eidr->ei.generation;
free_traverse_ptr(&tp);
Status = write_data(Vcb, address, data, length);
if (!NT_SUCCESS(Status)) {
ERR("write_data returned %08x\n", Status);
return FALSE;
}
if (changed_sector_list) {
sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
if (!sc) {
ERR("out of memory\n");
return FALSE;
}
sc->ol.key = address;
sc->length = length / Vcb->superblock.sector_size;
sc->deleted = FALSE;
sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
if (!sc->checksums) {
ERR("out of memory\n");
ExFreePool(sc);
return FALSE;
}
for (i = 0; i < sc->length; i++) {
sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
}
insert_into_ordered_list(changed_sector_list, &sc->ol);
}
// add extent data to inode
ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
if (!ed) {
ERR("out of memory\n");
return FALSE;
}
ed->generation = Vcb->superblock.generation;
ed->decoded_size = length;
ed->compression = BTRFS_COMPRESSION_NONE;
ed->encryption = BTRFS_ENCRYPTION_NONE;
ed->encoding = BTRFS_ENCODING_NONE;
ed->type = EXTENT_TYPE_REGULAR;
ed2 = (EXTENT_DATA2*)ed->data;
ed2->address = address;
ed2->size = length;
ed2->offset = 0;
ed2->num_bytes = length;
if (!insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, start_data, ed, edsize, NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(ed);
return FALSE;
}
increase_chunk_usage(c, length);
add_to_space_list(c, address, length, SPACE_TYPE_WRITING);
fcb->inode_item.st_blocks += length;
return TRUE;
}
static BOOL extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data,
LIST_ENTRY* changed_sector_list, traverse_ptr* edtp, traverse_ptr* eitp, LIST_ENTRY* rollback) {
EXTENT_DATA* ed;
EXTENT_DATA2* ed2;
EXTENT_ITEM* ei;
NTSTATUS Status;
changed_sector* sc;
chunk* c;
int i;
TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data,
length, data, changed_sector_list, edtp, eitp);
ed = ExAllocatePoolWithTag(PagedPool, edtp->item->size, ALLOC_TAG);
if (!ed) {
ERR("out of memory\n");
return FALSE;
}
RtlCopyMemory(ed, edtp->item->data, edtp->item->size);
ed->decoded_size += length;
ed2 = (EXTENT_DATA2*)ed->data;
ed2->size += length;
ed2->num_bytes += length;
delete_tree_item(Vcb, edtp, rollback);
if (!insert_tree_item(Vcb, fcb->subvol, edtp->item->key.obj_id, edtp->item->key.obj_type, edtp->item->key.offset, ed, edtp->item->size, NULL, rollback)) {
TRACE("insert_tree_item failed\n");
ExFreePool(ed);
return FALSE;
}
ei = ExAllocatePoolWithTag(PagedPool, eitp->item->size, ALLOC_TAG);
if (!ei) {
ERR("out of memory\n");
ExFreePool(ed);
return FALSE;
}
RtlCopyMemory(ei, eitp->item->data, eitp->item->size);
if (!insert_tree_item(Vcb, Vcb->extent_root, eitp->item->key.obj_id, eitp->item->key.obj_type, eitp->item->key.offset + length, ei, eitp->item->size, NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(ei);
return FALSE;
}
delete_tree_item(Vcb, eitp, rollback);
Status = write_data(Vcb, eitp->item->key.obj_id + eitp->item->key.offset, data, length);
if (!NT_SUCCESS(Status)) {
ERR("write_data returned %08x\n", Status);
return FALSE;
}
if (changed_sector_list) {
sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
if (!sc) {
ERR("out of memory\n");
return FALSE;
}
sc->ol.key = eitp->item->key.obj_id + eitp->item->key.offset;
sc->length = length / Vcb->superblock.sector_size;
sc->deleted = FALSE;
sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
if (!sc->checksums) {
ERR("out of memory\n");
ExFreePool(sc);
return FALSE;
}
for (i = 0; i < sc->length; i++) {
sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
}
insert_into_ordered_list(changed_sector_list, &sc->ol);
}
c = get_chunk_from_address(Vcb, eitp->item->key.obj_id);
if (c) {
increase_chunk_usage(c, length);
add_to_space_list(c, eitp->item->key.obj_id + eitp->item->key.offset, length, SPACE_TYPE_WRITING);
}
fcb->inode_item.st_blocks += length;
return TRUE;
}
static BOOL try_extend_data(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data,
LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
KEY searchkey;
traverse_ptr tp, tp2;
BOOL success = FALSE;
EXTENT_DATA* ed;
EXTENT_DATA2* ed2;
EXTENT_ITEM* ei;
chunk* c;
LIST_ENTRY* le;
space* s;
NTSTATUS Status;
searchkey.obj_id = fcb->inode;
searchkey.obj_type = TYPE_EXTENT_DATA;
searchkey.offset = start_data;
Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return FALSE;
}
if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset >= start_data) {
WARN("previous EXTENT_DATA not found\n");
goto end;
}
ed = (EXTENT_DATA*)tp.item->data;
if (tp.item->size < sizeof(EXTENT_DATA)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
goto end;
}
if (ed->type != EXTENT_TYPE_REGULAR) {
TRACE("not extending extent which is not EXTENT_TYPE_REGULAR\n");
goto end;
}
ed2 = (EXTENT_DATA2*)ed->data;
if (tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
goto end;
}
if (tp.item->key.offset + ed2->num_bytes != start_data) {
TRACE("last EXTENT_DATA does not run up to start_data (%llx + %llx != %llx)\n", tp.item->key.offset, ed2->num_bytes, start_data);
goto end;
}
if (ed->compression != BTRFS_COMPRESSION_NONE) {
FIXME("FIXME: compression not yet supported\n");
goto end;
}
if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
WARN("encryption not supported\n");
goto end;
}
if (ed->encoding != BTRFS_ENCODING_NONE) {
WARN("other encodings not supported\n");
goto end;
}
if (ed2->size - ed2->offset != ed2->num_bytes) {
TRACE("last EXTENT_DATA does not run all the way to the end of the extent\n");
goto end;
}
searchkey.obj_id = ed2->address;
searchkey.obj_type = TYPE_EXTENT_ITEM;
searchkey.offset = ed2->size;
Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
goto end;
}
if (keycmp(&tp2.item->key, &searchkey)) {
ERR("error - extent %llx,%llx not found in tree\n", ed2->address, ed2->size);
int3; // TESTING
goto end2;
}
if (tp2.item->size == sizeof(EXTENT_ITEM_V0)) { // old extent ref, convert
NTSTATUS Status = convert_old_data_extent(Vcb, ed2->address, ed2->size, rollback);
if (!NT_SUCCESS(Status)) {
ERR("convert_old_data_extent returned %08x\n", Status);
goto end2;
}
free_traverse_ptr(&tp2);
Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
goto end;
}
if (keycmp(&tp2.item->key, &searchkey)) {
WARN("extent item not found for address %llx, size %llx\n", ed2->address, ed2->size);
goto end2;
}
}
ei = (EXTENT_ITEM*)tp2.item->data;
if (tp.item->size < sizeof(EXTENT_ITEM)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
goto end2;
}
// FIXME - test this
if (extent_item_is_shared(ei, tp2.item->size - sizeof(EXTENT_ITEM))) {
NTSTATUS Status = convert_shared_data_extent(Vcb, ed2->address, ed2->size, rollback);
if (!NT_SUCCESS(Status)) {
ERR("convert_shared_data_extent returned %08x\n", Status);
goto end2;
}
free_traverse_ptr(&tp2);
Status = find_item(Vcb, Vcb->extent_root, &tp2, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
goto end;
}
if (keycmp(&tp2.item->key, &searchkey)) {
WARN("extent item not found for address %llx, size %llx\n", ed2->address, ed2->size);
goto end2;
}
ei = (EXTENT_ITEM*)tp2.item->data;
if (tp.item->size < sizeof(EXTENT_ITEM)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
goto end2;
}
}
if (ei->refcount != 1) {
TRACE("extent refcount was not 1\n");
goto end2;
}
if (ei->flags != EXTENT_ITEM_DATA) {
ERR("error - extent was not a data extent\n");
goto end2;
}
c = get_chunk_from_address(Vcb, ed2->address);
le = c->space.Flink;
while (le != &c->space) {
s = CONTAINING_RECORD(le, space, list_entry);
if (s->offset == ed2->address + ed2->size) {
if (s->type == SPACE_TYPE_FREE && s->size >= length) {
success = extend_data(Vcb, fcb, start_data, length, data, changed_sector_list, &tp, &tp2, rollback);
}
break;
} else if (s->offset > ed2->address + ed2->size)
break;
le = le->Flink;
}
end2:
free_traverse_ptr(&tp2);
end:
free_traverse_ptr(&tp);
return success;
}
NTSTATUS insert_sparse_extent(device_extension* Vcb, root* r, UINT64 inode, UINT64 start, UINT64 length, LIST_ENTRY* rollback) {
EXTENT_DATA* ed;
EXTENT_DATA2* ed2;
TRACE("(%p, %llx, %llx, %llx, %llx)\n", Vcb, r->id, inode, start, length);
ed = ExAllocatePoolWithTag(PagedPool, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), ALLOC_TAG);
if (!ed) {
ERR("out of memory\n");
return STATUS_INSUFFICIENT_RESOURCES;
}
ed->generation = Vcb->superblock.generation;
ed->decoded_size = length;
ed->compression = BTRFS_COMPRESSION_NONE;
ed->encryption = BTRFS_ENCRYPTION_NONE;
ed->encoding = BTRFS_ENCODING_NONE;
ed->type = EXTENT_TYPE_REGULAR;
ed2 = (EXTENT_DATA2*)ed->data;
ed2->address = 0;
ed2->size = 0;
ed2->offset = 0;
ed2->num_bytes = length;
if (!insert_tree_item(Vcb, r, inode, TYPE_EXTENT_DATA, start, ed, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2), NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(ed);
return STATUS_INTERNAL_ERROR;
}
return STATUS_SUCCESS;
}
// static void print_tree(tree* t) {
// LIST_ENTRY* le = t->itemlist.Flink;
// while (le != &t->itemlist) {
// tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
// ERR("%llx,%x,%llx (ignore = %s)\n", td->key.obj_id, td->key.obj_type, td->key.offset, td->ignore ? "TRUE" : "FALSE");
// le = le->Flink;
// }
// }
static NTSTATUS insert_extent(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
LIST_ENTRY* le = Vcb->chunks.Flink;
chunk* c;
KEY searchkey;
UINT64 flags;
TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data, changed_sector_list);
// FIXME - split data up if not enough space for just one extent
if (start_data > 0 && try_extend_data(Vcb, fcb, start_data, length, data, changed_sector_list, rollback))
return STATUS_SUCCESS;
// if there is a gap before start_data, plug it with a sparse extent
if (start_data > 0) {
traverse_ptr tp;
NTSTATUS Status;
EXTENT_DATA* ed;
UINT64 len;
searchkey.obj_id = fcb->inode;
searchkey.obj_type = TYPE_EXTENT_DATA;
searchkey.offset = start_data;
Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
// if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset >= start_data) {
// traverse_ptr next_tp;
//
// ERR("error - did not find EXTENT_DATA expected - looking for %llx,%x,%llx, found %llx,%x,%llx\n",
// searchkey.obj_id, searchkey.obj_type, searchkey.offset, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
// print_tree(tp.tree);
//
// if (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
// ERR("---\n");
// ERR("key = %llx,%x,%llx\n", next_tp.tree->paritem->key.obj_id, next_tp.tree->paritem->key.obj_type, next_tp.tree->paritem->key.offset);
// print_tree(next_tp.tree);
//
// free_traverse_ptr(&next_tp);
// } else
// ERR("next item not found\n");
//
// int3;
// free_traverse_ptr(&tp);
// return STATUS_INTERNAL_ERROR;
// }
if (tp.item->key.obj_type == TYPE_EXTENT_DATA && tp.item->size >= sizeof(EXTENT_DATA)) {
EXTENT_DATA2* ed2;
ed = (EXTENT_DATA*)tp.item->data;
ed2 = (EXTENT_DATA2*)ed->data;
len = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
} else
ed = NULL;
if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || !ed || tp.item->key.offset + len < start_data) {
if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA)
Status = insert_sparse_extent(Vcb, fcb->subvol, fcb->inode, 0, start_data, rollback);
else if (!ed)
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
else {
Status = insert_sparse_extent(Vcb, fcb->subvol, fcb->inode, tp.item->key.offset + len,
start_data - tp.item->key.offset - len, rollback);
}
if (!NT_SUCCESS(Status)) {
ERR("insert_sparse_extent returned %08x\n", Status);
free_traverse_ptr(&tp);
return Status;
}
}
free_traverse_ptr(&tp);
}
// FIXME - how do we know which RAID level to put this to?
flags = BLOCK_FLAG_DATA; // SINGLE
// if (!chunk_test) { // TESTING
// if ((c = alloc_chunk(Vcb, flags, NULL))) {
// ERR("chunk_item->type = %llx\n", c->chunk_item->type);
// ERR("size = %llx\n", c->chunk_item->size);
// ERR("used = %llx\n", c->used);
//
// if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
// if (insert_extent_chunk(Vcb, fcb, c, start_data, length, data, changed_sector_list)) {
// // chunk_test = TRUE;
// ERR("SUCCESS\n");
// return STATUS_SUCCESS;
// } else
// ERR(":-(\n");
// } else
// ERR("???\n");
// }
// }
while (le != &Vcb->chunks) {
c = CONTAINING_RECORD(le, chunk, list_entry);
if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
if (insert_extent_chunk(Vcb, fcb, c, start_data, length, data, changed_sector_list, rollback))
return STATUS_SUCCESS;
}
le = le->Flink;
}
if ((c = alloc_chunk(Vcb, flags, rollback))) {
if (c->chunk_item->type == flags && (c->chunk_item->size - c->used) >= length) {
if (insert_extent_chunk(Vcb, fcb, c, start_data, length, data, changed_sector_list, rollback))
return STATUS_SUCCESS;
}
}
// FIXME - rebalance chunks if free space elsewhere?
WARN("couldn't find any data chunks with %llx bytes free\n", length);
return STATUS_DISK_FULL;
}
void update_checksum_tree(device_extension* Vcb, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
LIST_ENTRY* le = changed_sector_list->Flink;
changed_sector* cs;
traverse_ptr tp, next_tp;
KEY searchkey;
UINT32* data;
NTSTATUS Status;
if (!Vcb->checksum_root) {
ERR("no checksum root\n");
goto exit;
}
while (le != changed_sector_list) {
UINT64 startaddr, endaddr;
ULONG len;
UINT32* checksums;
RTL_BITMAP bmp;
ULONG* bmparr;
ULONG runlength, index;
cs = (changed_sector*)le;
searchkey.obj_id = EXTENT_CSUM_ID;
searchkey.obj_type = TYPE_EXTENT_CSUM;
searchkey.offset = cs->ol.key;
// FIXME - create checksum_root if it doesn't exist at all
Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) { // tree is completely empty
// FIXME - do proper check here that tree is empty
if (!cs->deleted) {
checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * cs->length, ALLOC_TAG);
if (!checksums) {
ERR("out of memory\n");
goto exit;
}
RtlCopyMemory(checksums, cs->checksums, sizeof(UINT32) * cs->length);
if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, cs->ol.key, checksums, sizeof(UINT32) * cs->length, NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(checksums);
goto exit;
}
}
} else {
UINT32 tplen;
// FIXME - check entry is TYPE_EXTENT_CSUM?
if (tp.item->key.offset < cs->ol.key && tp.item->key.offset + (tp.item->size * Vcb->superblock.sector_size / sizeof(UINT32)) >= cs->ol.key)
startaddr = tp.item->key.offset;
else
startaddr = cs->ol.key;
free_traverse_ptr(&tp);
searchkey.obj_id = EXTENT_CSUM_ID;
searchkey.obj_type = TYPE_EXTENT_CSUM;
searchkey.offset = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
goto exit;
}
tplen = tp.item->size / sizeof(UINT32);
if (tp.item->key.offset + (tplen * Vcb->superblock.sector_size) >= cs->ol.key + (cs->length * Vcb->superblock.sector_size))
endaddr = tp.item->key.offset + (tplen * Vcb->superblock.sector_size);
else
endaddr = cs->ol.key + (cs->length * Vcb->superblock.sector_size);
free_traverse_ptr(&tp);
TRACE("cs starts at %llx (%x sectors)\n", cs->ol.key, cs->length);
TRACE("startaddr = %llx\n", startaddr);
TRACE("endaddr = %llx\n", endaddr);
len = (endaddr - startaddr) / Vcb->superblock.sector_size;
checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * len, ALLOC_TAG);
if (!checksums) {
ERR("out of memory\n");
goto exit;
}
bmparr = ExAllocatePoolWithTag(PagedPool, sizeof(ULONG) * ((len/8)+1), ALLOC_TAG);
if (!bmparr) {
ERR("out of memory\n");
ExFreePool(checksums);
goto exit;
}
RtlInitializeBitMap(&bmp, bmparr, len);
RtlSetAllBits(&bmp);
searchkey.obj_id = EXTENT_CSUM_ID;
searchkey.obj_type = TYPE_EXTENT_CSUM;
searchkey.offset = cs->ol.key;
Status = find_item(Vcb, Vcb->checksum_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
goto exit;
}
// set bit = free space, cleared bit = allocated sector
// ERR("start loop\n");
while (tp.item->key.offset < endaddr) {
// ERR("%llx,%x,%llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
if (tp.item->key.offset >= startaddr) {
if (tp.item->size > 0) {
RtlCopyMemory(&checksums[(tp.item->key.offset - startaddr) / Vcb->superblock.sector_size], tp.item->data, tp.item->size);
RtlClearBits(&bmp, (tp.item->key.offset - startaddr) / Vcb->superblock.sector_size, tp.item->size / sizeof(UINT32));
}
delete_tree_item(Vcb, &tp, rollback);
}
if (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
free_traverse_ptr(&tp);
tp = next_tp;
} else
break;
}
// ERR("end loop\n");
free_traverse_ptr(&tp);
if (cs->deleted) {
RtlSetBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
} else {
RtlCopyMemory(&checksums[(cs->ol.key - startaddr) / Vcb->superblock.sector_size], cs->checksums, cs->length * sizeof(UINT32));
RtlClearBits(&bmp, (cs->ol.key - startaddr) / Vcb->superblock.sector_size, cs->length);
}
runlength = RtlFindFirstRunClear(&bmp, &index);
while (runlength != 0) {
do {
ULONG rl;
if (runlength * sizeof(UINT32) > MAX_CSUM_SIZE)
rl = MAX_CSUM_SIZE / sizeof(UINT32);
else
rl = runlength;
data = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * rl, ALLOC_TAG);
if (!data) {
ERR("out of memory\n");
ExFreePool(bmparr);
ExFreePool(checksums);
goto exit;
}
RtlCopyMemory(data, &checksums[index], sizeof(UINT32) * rl);
if (!insert_tree_item(Vcb, Vcb->checksum_root, EXTENT_CSUM_ID, TYPE_EXTENT_CSUM, startaddr + (index * Vcb->superblock.sector_size), data, sizeof(UINT32) * rl, NULL, rollback)) {
ERR("insert_tree_item failed\n");
ExFreePool(data);
ExFreePool(bmparr);
ExFreePool(checksums);
goto exit;
}
runlength -= rl;
index += rl;
} while (runlength > 0);
runlength = RtlFindNextForwardRunClear(&bmp, index, &index);
}
ExFreePool(bmparr);
ExFreePool(checksums);
}
le = le->Flink;
}
exit:
while (!IsListEmpty(changed_sector_list)) {
le = RemoveHeadList(changed_sector_list);
cs = (changed_sector*)le;
if (cs->checksums)
ExFreePool(cs->checksums);
ExFreePool(cs);
}
}
NTSTATUS truncate_file(fcb* fcb, UINT64 end, LIST_ENTRY* rollback) {
LIST_ENTRY changed_sector_list;
NTSTATUS Status;
BOOL nocsum = fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
if (!nocsum)
InitializeListHead(&changed_sector_list);
// FIXME - convert into inline extent if short enough
Status = excise_extents(fcb->Vcb, fcb, sector_align(end, fcb->Vcb->superblock.sector_size),
sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size), nocsum ? NULL : &changed_sector_list, rollback);
if (!NT_SUCCESS(Status)) {
ERR("error - excise_extents failed\n");
return Status;
}
fcb->inode_item.st_size = end;
TRACE("setting st_size to %llx\n", end);
fcb->Header.AllocationSize.QuadPart = sector_align(fcb->inode_item.st_size, fcb->Vcb->superblock.sector_size);
fcb->Header.FileSize.QuadPart = fcb->inode_item.st_size;
fcb->Header.ValidDataLength.QuadPart = fcb->inode_item.st_size;
// FIXME - inform cache manager of this
TRACE("fcb %p FileSize = %llx\n", fcb, fcb->Header.FileSize.QuadPart);
if (!nocsum)
update_checksum_tree(fcb->Vcb, &changed_sector_list, rollback);
return STATUS_SUCCESS;
}
NTSTATUS extend_file(fcb* fcb, UINT64 end, LIST_ENTRY* rollback) {
UINT64 oldalloc, newalloc;
KEY searchkey;
traverse_ptr tp;
BOOL cur_inline;
NTSTATUS Status;
TRACE("(%p, %x, %p)\n", fcb, end, rollback);
if (fcb->ads) {
FIXME("FIXME - support streams here\n"); // FIXME
return STATUS_NOT_IMPLEMENTED;
} else {
searchkey.obj_id = fcb->inode;
searchkey.obj_type = TYPE_EXTENT_DATA;
searchkey.offset = 0xffffffffffffffff;
Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
oldalloc = 0;
if (tp.item->key.obj_id == fcb->inode && tp.item->key.obj_type == TYPE_EXTENT_DATA) {
EXTENT_DATA* ed = (EXTENT_DATA*)tp.item->data;
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
if (tp.item->size < sizeof(EXTENT_DATA)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
oldalloc = tp.item->key.offset + (ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes);
cur_inline = ed->type == EXTENT_TYPE_INLINE;
if (cur_inline && end > fcb->Vcb->max_inline) {
LIST_ENTRY changed_sector_list;
BOOL nocsum = fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
UINT64 origlength, length;
UINT8* data;
TRACE("giving inline file proper extents\n");
origlength = ed->decoded_size;
cur_inline = FALSE;
if (!nocsum)
InitializeListHead(&changed_sector_list);
delete_tree_item(fcb->Vcb, &tp, rollback);
length = sector_align(origlength, fcb->Vcb->superblock.sector_size);
data = ExAllocatePoolWithTag(PagedPool, length, ALLOC_TAG);
if (!data) {
ERR("could not allocate %llx bytes for data\n", length);
free_traverse_ptr(&tp);
return STATUS_INSUFFICIENT_RESOURCES;
}
if (length > origlength)
RtlZeroMemory(data + origlength, length - origlength);
RtlCopyMemory(data, ed->data, origlength);
fcb->inode_item.st_blocks -= origlength;
Status = insert_extent(fcb->Vcb, fcb, tp.item->key.offset, length, data, nocsum ? NULL : &changed_sector_list, rollback);
if (!NT_SUCCESS(Status)) {
ERR("insert_extent returned %08x\n", Status);
free_traverse_ptr(&tp);
ExFreePool(data);
return Status;
}
oldalloc = tp.item->key.offset + length;
ExFreePool(data);
if (!nocsum)
update_checksum_tree(fcb->Vcb, &changed_sector_list, rollback);
}
if (cur_inline) {
ULONG edsize;
if (end > oldalloc) {
edsize = sizeof(EXTENT_DATA) - 1 + end - tp.item->key.offset;
ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
if (!ed) {
ERR("out of memory\n");
free_traverse_ptr(&tp);
return STATUS_INSUFFICIENT_RESOURCES;
}
RtlZeroMemory(ed, edsize);
RtlCopyMemory(ed, tp.item->data, tp.item->size);
ed->decoded_size = end - tp.item->key.offset;
delete_tree_item(fcb->Vcb, &tp, rollback);
if (!insert_tree_item(fcb->Vcb, fcb->subvol, tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, ed, edsize, NULL, rollback)) {
ERR("error - failed to insert item\n");
ExFreePool(ed);
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
}
TRACE("extending inline file (oldalloc = %llx, end = %llx)\n", oldalloc, end);
fcb->inode_item.st_size = end;
TRACE("setting st_size to %llx\n", end);
fcb->inode_item.st_blocks = end;
fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
} else {
newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
if (newalloc > oldalloc) {
Status = insert_sparse_extent(fcb->Vcb, fcb->subvol, fcb->inode, oldalloc, newalloc - oldalloc, rollback);
if (!NT_SUCCESS(Status)) {
ERR("insert_sparse_extent returned %08x\n", Status);
free_traverse_ptr(&tp);
return Status;
}
}
fcb->inode_item.st_size = end;
TRACE("setting st_size to %llx\n", end);
TRACE("newalloc = %llx\n", newalloc);
fcb->Header.AllocationSize.QuadPart = newalloc;
fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
}
} else {
if (end > fcb->Vcb->max_inline) {
newalloc = sector_align(end, fcb->Vcb->superblock.sector_size);
Status = insert_sparse_extent(fcb->Vcb, fcb->subvol, fcb->inode, 0, newalloc, rollback);
if (!NT_SUCCESS(Status)) {
ERR("insert_sparse_extent returned %08x\n", Status);
free_traverse_ptr(&tp);
return Status;
}
fcb->inode_item.st_size = end;
TRACE("setting st_size to %llx\n", end);
TRACE("newalloc = %llx\n", newalloc);
fcb->Header.AllocationSize.QuadPart = newalloc;
fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
} else {
EXTENT_DATA* ed;
ULONG edsize;
edsize = sizeof(EXTENT_DATA) - 1 + end;
ed = ExAllocatePoolWithTag(PagedPool, edsize, ALLOC_TAG);
if (!ed) {
ERR("out of memory\n");
free_traverse_ptr(&tp);
return STATUS_INSUFFICIENT_RESOURCES;
}
ed->generation = fcb->Vcb->superblock.generation;
ed->decoded_size = end;
ed->compression = BTRFS_COMPRESSION_NONE;
ed->encryption = BTRFS_ENCRYPTION_NONE;
ed->encoding = BTRFS_ENCODING_NONE;
ed->type = EXTENT_TYPE_INLINE;
RtlZeroMemory(ed->data, end);
if (!insert_tree_item(fcb->Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, ed, edsize, NULL, rollback)) {
ERR("error - failed to insert item\n");
ExFreePool(ed);
free_traverse_ptr(&tp);
return STATUS_INTERNAL_ERROR;
}
fcb->inode_item.st_size = end;
TRACE("setting st_size to %llx\n", end);
fcb->inode_item.st_blocks = end;
fcb->Header.AllocationSize.QuadPart = fcb->Header.FileSize.QuadPart = fcb->Header.ValidDataLength.QuadPart = end;
}
}
free_traverse_ptr(&tp);
}
return STATUS_SUCCESS;
}
static UINT64 get_extent_item_refcount(device_extension* Vcb, UINT64 address) {
KEY searchkey;
traverse_ptr tp;
EXTENT_ITEM* ei;
UINT64 rc;
NTSTATUS Status;
searchkey.obj_id = address;
searchkey.obj_type = TYPE_EXTENT_ITEM;
searchkey.offset = 0xffffffffffffffff;
Status = find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return 0;
}
if (tp.item->key.obj_id != searchkey.obj_id || tp.item->key.obj_type != searchkey.obj_type) {
ERR("error - could not find EXTENT_ITEM for %llx\n", address);
free_traverse_ptr(&tp);
return 0;
}
if (tp.item->size < sizeof(EXTENT_ITEM)) {
ERR("(%llx,%x,%llx) was %u bytes, expected %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_ITEM));
free_traverse_ptr(&tp);
return 0;
}
ei = (EXTENT_ITEM*)tp.item->data;
rc = ei->refcount;
free_traverse_ptr(&tp);
return rc;
}
static NTSTATUS do_nocow_write(device_extension* Vcb, fcb* fcb, UINT64 start_data, UINT64 length, void* data, LIST_ENTRY* changed_sector_list, LIST_ENTRY* rollback) {
KEY searchkey;
traverse_ptr tp, next_tp;
NTSTATUS Status;
EXTENT_DATA* ed;
BOOL b, do_cow;
EXTENT_DATA2* eds;
UINT64 size, new_start, new_end, last_write = 0;
TRACE("(%p, (%llx, %llx), %llx, %llx, %p, %p)\n", Vcb, fcb->subvol->id, fcb->inode, start_data, length, data, changed_sector_list);
searchkey.obj_id = fcb->inode;
searchkey.obj_type = TYPE_EXTENT_DATA;
searchkey.offset = start_data;
Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
return Status;
}
if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset > start_data) {
ERR("previous EXTENT_DATA not found (found %llx,%x,%llx)\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset);
Status = STATUS_INTERNAL_ERROR;
goto end;
}
do {
if (tp.item->size < sizeof(EXTENT_DATA)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
Status = STATUS_INTERNAL_ERROR;
goto end;
}
ed = (EXTENT_DATA*)tp.item->data;
if ((ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC) && tp.item->size < sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA) - 1 + sizeof(EXTENT_DATA2));
Status = STATUS_INTERNAL_ERROR;
goto end;
}
eds = (EXTENT_DATA2*)&ed->data[0];
b = find_next_item(Vcb, &tp, &next_tp, TRUE);
switch (ed->type) {
case EXTENT_TYPE_REGULAR:
{
UINT64 rc = get_extent_item_refcount(Vcb, eds->address);
if (rc == 0) {
ERR("get_extent_item_refcount failed\n");
Status = STATUS_INTERNAL_ERROR;
goto end;
}
do_cow = rc > 1;
break;
}
case EXTENT_TYPE_INLINE:
do_cow = TRUE;
break;
case EXTENT_TYPE_PREALLOC:
FIXME("FIXME - handle prealloc extents\n"); // FIXME
Status = STATUS_NOT_SUPPORTED;
goto end;
default:
ERR("error - unknown extent type %x\n", ed->type);
Status = STATUS_NOT_SUPPORTED;
goto end;
}
if (ed->compression != BTRFS_COMPRESSION_NONE) {
FIXME("FIXME: compression not yet supported\n");
Status = STATUS_NOT_SUPPORTED;
goto end;
}
if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
WARN("encryption not supported\n");
Status = STATUS_INTERNAL_ERROR;
goto end;
}
if (ed->encoding != BTRFS_ENCODING_NONE) {
WARN("other encodings not supported\n");
Status = STATUS_INTERNAL_ERROR;
goto end;
}
size = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : eds->num_bytes;
TRACE("extent: start = %llx, length = %llx\n", tp.item->key.offset, size);
new_start = tp.item->key.offset < start_data ? start_data : tp.item->key.offset;
new_end = tp.item->key.offset + size > start_data + length ? (start_data + length) : (tp.item->key.offset + size);
TRACE("new_start = %llx\n", new_start);
TRACE("new_end = %llx\n", new_end);
if (do_cow) {
TRACE("doing COW write\n");
Status = excise_extents(Vcb, fcb, new_start, new_start + new_end, changed_sector_list, rollback);
if (!NT_SUCCESS(Status)) {
ERR("error - excise_extents returned %08x\n", Status);
goto end;
}
Status = insert_extent(Vcb, fcb, new_start, new_end - new_start, (UINT8*)data + new_start - start_data, changed_sector_list, rollback);
if (!NT_SUCCESS(Status)) {
ERR("error - insert_extent returned %08x\n", Status);
goto end;
}
} else {
UINT64 writeaddr;
writeaddr = eds->address + eds->offset + new_start - tp.item->key.offset;
TRACE("doing non-COW write to %llx\n", writeaddr);
Status = write_data(Vcb, writeaddr, (UINT8*)data + new_start - start_data, new_end - new_start);
if (!NT_SUCCESS(Status)) {
ERR("error - write_data returned %08x\n", Status);
goto end;
}
if (changed_sector_list) {
unsigned int i;
changed_sector* sc;
sc = ExAllocatePoolWithTag(PagedPool, sizeof(changed_sector), ALLOC_TAG);
if (!sc) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
sc->ol.key = writeaddr;
sc->length = (new_end - new_start) / Vcb->superblock.sector_size;
sc->deleted = FALSE;
sc->checksums = ExAllocatePoolWithTag(PagedPool, sizeof(UINT32) * sc->length, ALLOC_TAG);
if (!sc->checksums) {
ERR("out of memory\n");
ExFreePool(sc);
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
for (i = 0; i < sc->length; i++) {
sc->checksums[i] = ~calc_crc32c(0xffffffff, (UINT8*)data + new_start - start_data + (i * Vcb->superblock.sector_size), Vcb->superblock.sector_size);
}
insert_into_ordered_list(changed_sector_list, &sc->ol);
}
}
last_write = new_end;
if (b) {
free_traverse_ptr(&tp);
tp = next_tp;
if (tp.item->key.obj_id != fcb->inode || tp.item->key.obj_type != TYPE_EXTENT_DATA || tp.item->key.offset >= start_data + length)
b = FALSE;
}
} while (b);
if (last_write < start_data + length) {
new_start = last_write;
new_end = start_data + length;
TRACE("new_start = %llx\n", new_start);
TRACE("new_end = %llx\n", new_end);
Status = insert_extent(Vcb, fcb, new_start, new_end - new_start, (UINT8*)data + new_start - start_data, changed_sector_list, rollback);
if (!NT_SUCCESS(Status)) {
ERR("error - insert_extent returned %08x\n", Status);
goto end;
}
}
Status = STATUS_SUCCESS;
end:
free_traverse_ptr(&tp);
return Status;
}
#ifdef DEBUG_PARANOID
static void print_loaded_trees(tree* t, int spaces) {
char pref[10];
int i;
LIST_ENTRY* le;
for (i = 0; i < spaces; i++) {
pref[i] = ' ';
}
pref[spaces] = 0;
if (!t) {
ERR("%s(not loaded)\n", pref);
return;
}
le = t->itemlist.Flink;
while (le != &t->itemlist) {
tree_data* td = CONTAINING_RECORD(le, tree_data, list_entry);
ERR("%s%llx,%x,%llx ignore=%s\n", pref, td->key.obj_id, td->key.obj_type, td->key.offset, td->ignore ? "TRUE" : "FALSE");
if (t->header.level > 0) {
print_loaded_trees(td->treeholder.tree, spaces+1);
}
le = le->Flink;
}
}
static void check_extents_consistent(device_extension* Vcb, fcb* fcb) {
KEY searchkey;
traverse_ptr tp, next_tp;
UINT64 length, oldlength, lastoff, alloc;
NTSTATUS Status;
EXTENT_DATA* ed;
EXTENT_DATA2* ed2;
if (fcb->ads || fcb->inode_item.st_size == 0 || fcb->deleted)
return;
TRACE("inode = %llx, subvol = %llx\n", fcb->inode, fcb->subvol->id);
searchkey.obj_id = fcb->inode;
searchkey.obj_type = TYPE_EXTENT_DATA;
searchkey.offset = 0;
Status = find_item(Vcb, fcb->subvol, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
goto failure2;
}
if (keycmp(&searchkey, &tp.item->key)) {
ERR("could not find EXTENT_DATA at offset 0\n");
goto failure;
}
if (tp.item->size < sizeof(EXTENT_DATA)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
goto failure;
}
ed = (EXTENT_DATA*)tp.item->data;
ed2 = (EXTENT_DATA2*)&ed->data[0];
length = oldlength = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
lastoff = tp.item->key.offset;
TRACE("(%llx,%x,%llx) length = %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, length);
alloc = 0;
if (ed->type != EXTENT_TYPE_REGULAR || ed2->address != 0) {
alloc += length;
}
while (find_next_item(Vcb, &tp, &next_tp, FALSE)) {
if (next_tp.item->key.obj_id != searchkey.obj_id || next_tp.item->key.obj_type != searchkey.obj_type) {
free_traverse_ptr(&next_tp);
break;
}
free_traverse_ptr(&tp);
tp = next_tp;
if (tp.item->size < sizeof(EXTENT_DATA)) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, sizeof(EXTENT_DATA));
goto failure;
}
ed = (EXTENT_DATA*)tp.item->data;
ed2 = (EXTENT_DATA2*)&ed->data[0];
length = ed->type == EXTENT_TYPE_INLINE ? ed->decoded_size : ed2->num_bytes;
TRACE("(%llx,%x,%llx) length = %llx\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, length);
if (tp.item->key.offset != lastoff + oldlength) {
ERR("EXTENT_DATA in %llx,%llx was at %llx, expected %llx\n", fcb->subvol->id, fcb->inode, tp.item->key.offset, lastoff + oldlength);
goto failure;
}
if (ed->type != EXTENT_TYPE_REGULAR || ed2->address != 0) {
alloc += length;
}
oldlength = length;
lastoff = tp.item->key.offset;
}
if (alloc != fcb->inode_item.st_blocks) {
ERR("allocation size was %llx, expected %llx\n", alloc, fcb->inode_item.st_blocks);
goto failure;
}
// if (fcb->inode_item.st_blocks != lastoff + oldlength) {
// ERR("extents finished at %x, expected %x\n", (UINT32)(lastoff + oldlength), (UINT32)fcb->inode_item.st_blocks);
// goto failure;
// }
free_traverse_ptr(&tp);
return;
failure:
free_traverse_ptr(&tp);
failure2:
if (fcb->subvol->treeholder.tree)
print_loaded_trees(fcb->subvol->treeholder.tree, 0);
int3;
}
// static void check_extent_tree_consistent(device_extension* Vcb) {
// KEY searchkey;
// traverse_ptr tp, next_tp;
// UINT64 lastaddr;
// BOOL b, inconsistency;
//
// searchkey.obj_id = 0;
// searchkey.obj_type = 0;
// searchkey.offset = 0;
//
// if (!find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE)) {
// ERR("error - could not find any entries in extent_root\n");
// int3;
// }
//
// lastaddr = 0;
// inconsistency = FALSE;
//
// do {
// if (tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
// // ERR("%x,%x,%x\n", (UINT32)tp.item->key.obj_id, tp.item->key.obj_type, (UINT32)tp.item->key.offset);
//
// if (tp.item->key.obj_id < lastaddr) {
// // ERR("inconsistency!\n");
// // int3;
// inconsistency = TRUE;
// }
//
// lastaddr = tp.item->key.obj_id + tp.item->key.offset;
// }
//
// b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
// if (b) {
// free_traverse_ptr(&tp);
// tp = next_tp;
// }
// } while (b);
//
// free_traverse_ptr(&tp);
//
// if (!inconsistency)
// return;
//
// ERR("Inconsistency detected:\n");
//
// if (!find_item(Vcb, Vcb->extent_root, &tp, &searchkey, FALSE)) {
// ERR("error - could not find any entries in extent_root\n");
// int3;
// }
//
// do {
// if (tp.item->key.obj_type == TYPE_EXTENT_ITEM) {
// ERR("%x,%x,%x\n", (UINT32)tp.item->key.obj_id, tp.item->key.obj_type, (UINT32)tp.item->key.offset);
//
// if (tp.item->key.obj_id < lastaddr) {
// ERR("inconsistency!\n");
// }
//
// lastaddr = tp.item->key.obj_id + tp.item->key.offset;
// }
//
// b = find_next_item(Vcb, &tp, &next_tp, NULL, FALSE);
// if (b) {
// free_traverse_ptr(&tp);
// tp = next_tp;
// }
// } while (b);
//
// free_traverse_ptr(&tp);
//
// int3;
// }
#endif
NTSTATUS write_file2(device_extension* Vcb, PIRP Irp, LARGE_INTEGER offset, void* buf, ULONG* length, BOOL paging_io, BOOL no_cache, LIST_ENTRY* rollback) {
PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
PFILE_OBJECT FileObject = IrpSp->FileObject;
KEY searchkey;
traverse_ptr tp;
EXTENT_DATA* ed2;
UINT64 newlength, start_data, end_data;
UINT32 bufhead;
BOOL make_inline;
UINT8* data;
LIST_ENTRY changed_sector_list;
INODE_ITEM *ii, *origii;
BOOL changed_length = FALSE, nocsum, nocow/*, lazy_writer = FALSE, write_eof = FALSE*/;
NTSTATUS Status;
LARGE_INTEGER time;
BTRFS_TIME now;
fcb* fcb;
BOOL paging_lock = FALSE;
TRACE("(%p, %p, %llx, %p, %x, %u, %u)\n", Vcb, FileObject, offset.QuadPart, buf, *length, paging_io, no_cache);
if (*length == 0) {
WARN("returning success for zero-length write\n");
return STATUS_SUCCESS;
}
if (!FileObject) {
ERR("error - FileObject was NULL\n");
return STATUS_ACCESS_DENIED;
}
fcb = FileObject->FsContext;
if (fcb->type != BTRFS_TYPE_FILE && fcb->type != BTRFS_TYPE_SYMLINK) {
WARN("tried to write to something other than a file or symlink (inode %llx, type %u, %p, %p)\n", fcb->inode, fcb->type, &fcb->type, fcb);
return STATUS_ACCESS_DENIED;
}
if (offset.LowPart == FILE_WRITE_TO_END_OF_FILE && offset.HighPart == -1) {
offset = fcb->Header.FileSize;
// write_eof = TRUE;
}
TRACE("fcb->Header.Flags = %x\n", fcb->Header.Flags);
if (no_cache && !paging_io && FileObject->SectionObjectPointer->DataSectionObject) {
IO_STATUS_BLOCK iosb;
ExAcquireResourceExclusiveLite(fcb->Header.PagingIoResource, TRUE);
CcFlushCache(FileObject->SectionObjectPointer, &offset, *length, &iosb);
if (!NT_SUCCESS(iosb.Status)) {
ExReleaseResourceLite(fcb->Header.PagingIoResource);
ERR("CcFlushCache returned %08x\n", iosb.Status);
return iosb.Status;
}
paging_lock = TRUE;
CcPurgeCacheSection(FileObject->SectionObjectPointer, &offset, *length, FALSE);
}
if (paging_io) {
ExAcquireResourceSharedLite(fcb->Header.PagingIoResource, TRUE);
paging_lock = TRUE;
}
nocsum = fcb->ads ? TRUE : fcb->inode_item.flags & BTRFS_INODE_NODATASUM;
nocow = fcb->ads ? TRUE : fcb->inode_item.flags & BTRFS_INODE_NODATACOW;
newlength = fcb->ads ? fcb->adssize : fcb->inode_item.st_size;
if (fcb->deleted)
newlength = 0;
TRACE("newlength = %llx\n", newlength);
// if (KeGetCurrentThread() == fcb->lazy_writer_thread) {
// ERR("lazy writer on the TV\n");
// lazy_writer = TRUE;
// }
if (offset.QuadPart + *length > newlength) {
if (paging_io) {
if (offset.QuadPart >= newlength) {
TRACE("paging IO tried to write beyond end of file (file size = %llx, offset = %llx, length = %x)\n", newlength, offset.QuadPart, *length);
TRACE("filename %.*S\n", fcb->full_filename.Length / sizeof(WCHAR), fcb->full_filename.Buffer);
TRACE("FileObject: AllocationSize = %llx, FileSize = %llx, ValidDataLength = %llx\n",
fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
Status = STATUS_SUCCESS;
goto end;
}
*length = newlength - offset.QuadPart;
} else {
newlength = offset.QuadPart + *length;
changed_length = TRUE;
TRACE("extending length to %llx\n", newlength);
}
}
make_inline = fcb->ads ? FALSE : newlength <= fcb->Vcb->max_inline;
if (changed_length) {
if (newlength > fcb->Header.AllocationSize.QuadPart) {
Status = extend_file(fcb, newlength, rollback);
if (!NT_SUCCESS(Status)) {
ERR("extend_file returned %08x\n", Status);
goto end;
}
} else if (fcb->ads)
fcb->adssize = newlength;
else
fcb->inode_item.st_size = newlength;
fcb->Header.FileSize.QuadPart = newlength;
fcb->Header.ValidDataLength.QuadPart = newlength;
TRACE("AllocationSize = %llx\n", fcb->Header.AllocationSize.QuadPart);
TRACE("FileSize = %llx\n", fcb->Header.FileSize.QuadPart);
TRACE("ValidDataLength = %llx\n", fcb->Header.ValidDataLength.QuadPart);
}
if (!no_cache) {
BOOL wait;
if (!FileObject->PrivateCacheMap || changed_length) {
CC_FILE_SIZES ccfs;
ccfs.AllocationSize = fcb->Header.AllocationSize;
ccfs.FileSize = fcb->Header.FileSize;
ccfs.ValidDataLength = fcb->Header.ValidDataLength;
if (!FileObject->PrivateCacheMap) {
TRACE("calling CcInitializeCacheMap...\n");
CcInitializeCacheMap(FileObject, &ccfs, FALSE, cache_callbacks, FileObject);
CcSetReadAheadGranularity(FileObject, READ_AHEAD_GRANULARITY);
} else {
CcSetFileSizes(FileObject, &ccfs);
}
}
// FIXME - uncomment this when async is working
// wait = IoIsOperationSynchronous(Irp) ? TRUE : FALSE;
wait = TRUE;
TRACE("CcCopyWrite(%p, %llx, %x, %u, %p)\n", FileObject, offset.QuadPart, *length, wait, buf);
if (!CcCopyWrite(FileObject, &offset, *length, wait, buf)) {
TRACE("CcCopyWrite failed.\n");
IoMarkIrpPending(Irp);
Status = STATUS_PENDING;
goto end;
}
TRACE("CcCopyWrite finished\n");
Status = STATUS_SUCCESS;
goto end;
}
if (fcb->ads) {
UINT16 datalen;
UINT8* data2;
UINT32 maxlen;
if (!get_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, &data, &datalen)) {
ERR("get_xattr failed\n");
Status = STATUS_INTERNAL_ERROR;
goto end;
}
if (changed_length) {
// find maximum length of xattr
maxlen = Vcb->superblock.node_size - sizeof(tree_header) - sizeof(leaf_node);
searchkey.obj_id = fcb->inode;
searchkey.obj_type = TYPE_XATTR_ITEM;
searchkey.offset = fcb->adshash;
Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
goto end;
}
if (keycmp(&tp.item->key, &searchkey)) {
ERR("error - could not find key for xattr\n");
free_traverse_ptr(&tp);
Status = STATUS_INTERNAL_ERROR;
goto end;
}
if (tp.item->size < datalen) {
ERR("(%llx,%x,%llx) was %u bytes, expected at least %u\n", tp.item->key.obj_id, tp.item->key.obj_type, tp.item->key.offset, tp.item->size, datalen);
free_traverse_ptr(&tp);
Status = STATUS_INTERNAL_ERROR;
goto end;
}
maxlen -= tp.item->size - datalen; // subtract XATTR_ITEM overhead
free_traverse_ptr(&tp);
if (newlength > maxlen) {
ERR("error - xattr too long (%llu > %u)\n", newlength, maxlen);
Status = STATUS_DISK_FULL;
goto end;
}
fcb->adssize = newlength;
data2 = ExAllocatePoolWithTag(PagedPool, newlength, ALLOC_TAG);
if (!data2) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
RtlCopyMemory(data2, data, datalen);
if (offset.QuadPart > datalen)
RtlZeroMemory(&data2[datalen], offset.QuadPart - datalen);
} else
data2 = data;
if (*length > 0)
RtlCopyMemory(&data2[offset.QuadPart], buf, *length);
Status = set_xattr(fcb->Vcb, fcb->subvol, fcb->inode, fcb->adsxattr.Buffer, fcb->adshash, data2, newlength, rollback);
if (!NT_SUCCESS(Status)) {
ERR("set_xattr returned %08x\n", Status);
goto end;
}
if (data) ExFreePool(data);
if (data2 != data) ExFreePool(data2);
fcb->Header.ValidDataLength.QuadPart = newlength;
} else {
if (make_inline) {
start_data = 0;
end_data = sector_align(newlength, fcb->Vcb->superblock.sector_size);
bufhead = sizeof(EXTENT_DATA) - 1;
} else {
start_data = offset.QuadPart & ~(fcb->Vcb->superblock.sector_size - 1);
end_data = sector_align(offset.QuadPart + *length, fcb->Vcb->superblock.sector_size);
bufhead = 0;
}
fcb->Header.ValidDataLength.QuadPart = newlength;
TRACE("fcb %p FileSize = %llx\n", fcb, fcb->Header.FileSize.QuadPart);
data = ExAllocatePoolWithTag(PagedPool, end_data - start_data + bufhead, ALLOC_TAG);
if (!data) {
ERR("out of memory\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
RtlZeroMemory(data + bufhead, end_data - start_data);
TRACE("start_data = %llx\n", start_data);
TRACE("end_data = %llx\n", end_data);
if (offset.QuadPart > start_data || offset.QuadPart + *length < end_data) {
if (changed_length) {
if (fcb->inode_item.st_size > start_data)
Status = read_file(Vcb, fcb->subvol, fcb->inode, data + bufhead, start_data, fcb->inode_item.st_size - start_data, NULL);
else
Status = STATUS_SUCCESS;
} else
Status = read_file(Vcb, fcb->subvol, fcb->inode, data + bufhead, start_data, end_data - start_data, NULL);
if (!NT_SUCCESS(Status)) {
ERR("read_file returned %08x\n", Status);
ExFreePool(data);
goto end;
}
}
RtlCopyMemory(data + bufhead + offset.QuadPart - start_data, buf, *length);
if (!nocsum)
InitializeListHead(&changed_sector_list);
if (make_inline || !nocow) {
Status = excise_extents(fcb->Vcb, fcb, start_data, end_data, nocsum ? NULL : &changed_sector_list, rollback);
if (!NT_SUCCESS(Status)) {
ERR("error - excise_extents returned %08x\n", Status);
ExFreePool(data);
goto end;
}
if (!make_inline) {
Status = insert_extent(fcb->Vcb, fcb, start_data, end_data - start_data, data, nocsum ? NULL : &changed_sector_list, rollback);
if (!NT_SUCCESS(Status)) {
ERR("error - insert_extent returned %08x\n", Status);
ExFreePool(data);
goto end;
}
ExFreePool(data);
} else {
ed2 = (EXTENT_DATA*)data;
ed2->generation = fcb->Vcb->superblock.generation;
ed2->decoded_size = newlength;
ed2->compression = BTRFS_COMPRESSION_NONE;
ed2->encryption = BTRFS_ENCRYPTION_NONE;
ed2->encoding = BTRFS_ENCODING_NONE;
ed2->type = EXTENT_TYPE_INLINE;
insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_EXTENT_DATA, 0, ed2, sizeof(EXTENT_DATA) - 1 + newlength, NULL, rollback);
fcb->inode_item.st_blocks += newlength;
}
} else {
Status = do_nocow_write(fcb->Vcb, fcb, start_data, end_data - start_data, data, nocsum ? NULL : &changed_sector_list, rollback);
if (!NT_SUCCESS(Status)) {
ERR("error - do_nocow_write returned %08x\n", Status);
ExFreePool(data);
goto end;
}
ExFreePool(data);
}
}
KeQuerySystemTime(&time);
win_time_to_unix(time, &now);
// ERR("no_cache = %s, FileObject->PrivateCacheMap = %p\n", no_cache ? "TRUE" : "FALSE", FileObject->PrivateCacheMap);
// if (!no_cache) {
// if (!FileObject->PrivateCacheMap) {
// CC_FILE_SIZES ccfs;
//
// ccfs.AllocationSize = fcb->Header.AllocationSize;
// ccfs.FileSize = fcb->Header.FileSize;
// ccfs.ValidDataLength = fcb->Header.ValidDataLength;
//
// TRACE("calling CcInitializeCacheMap...\n");
// CcInitializeCacheMap(FileObject, &ccfs, FALSE, cache_callbacks, fcb);
//
// changed_length = FALSE;
// }
// }
if (fcb->ads)
origii = &fcb->par->inode_item;
else
origii = &fcb->inode_item;
origii->transid = Vcb->superblock.generation;
origii->sequence++;
origii->st_ctime = now;
if (!fcb->ads) {
TRACE("setting st_size to %llx\n", newlength);
origii->st_size = newlength;
origii->st_mtime = now;
}
searchkey.obj_id = fcb->inode;
searchkey.obj_type = TYPE_INODE_ITEM;
searchkey.offset = 0;
Status = find_item(fcb->Vcb, fcb->subvol, &tp, &searchkey, FALSE);
if (!NT_SUCCESS(Status)) {
ERR("error - find_item returned %08x\n", Status);
goto end;
}
if (!keycmp(&tp.item->key, &searchkey))
delete_tree_item(Vcb, &tp, rollback);
else
WARN("couldn't find existing INODE_ITEM\n");
ii = ExAllocatePoolWithTag(PagedPool, sizeof(INODE_ITEM), ALLOC_TAG);
if (!ii) {
ERR("out of memory\n");
free_traverse_ptr(&tp);
Status = STATUS_INSUFFICIENT_RESOURCES;
goto end;
}
RtlCopyMemory(ii, origii, sizeof(INODE_ITEM));
insert_tree_item(Vcb, fcb->subvol, fcb->inode, TYPE_INODE_ITEM, 0, ii, sizeof(INODE_ITEM), NULL, rollback);
free_traverse_ptr(&tp);
// FIXME - update inode_item of open FCBs pointing to the same inode (i.e. hardlinked files)
if (!nocsum)
update_checksum_tree(Vcb, &changed_sector_list, rollback);
if (changed_length) {
CC_FILE_SIZES ccfs;
ccfs.AllocationSize = fcb->Header.AllocationSize;
ccfs.FileSize = fcb->Header.FileSize;
ccfs.ValidDataLength = fcb->Header.ValidDataLength;
CcSetFileSizes(FileObject, &ccfs);
}
// FIXME - make sure this still called if STATUS_PENDING and async
// if (!no_cache) {
// if (!CcCopyWrite(FileObject, &offset, *length, TRUE, buf)) {
// ERR("CcCopyWrite failed.\n");
// }
// }
fcb->subvol->root_item.ctransid = Vcb->superblock.generation;
fcb->subvol->root_item.ctime = now;
Status = STATUS_SUCCESS;
end:
if (FileObject->Flags & FO_SYNCHRONOUS_IO && !paging_io) {
TRACE("CurrentByteOffset was: %llx\n", FileObject->CurrentByteOffset.QuadPart);
FileObject->CurrentByteOffset.QuadPart = offset.QuadPart + (NT_SUCCESS(Status) ? *length : 0);
TRACE("CurrentByteOffset now: %llx\n", FileObject->CurrentByteOffset.QuadPart);
}
if (paging_lock)
ExReleaseResourceLite(fcb->Header.PagingIoResource);
return Status;
}
NTSTATUS write_file(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
device_extension* Vcb = DeviceObject->DeviceExtension;
void* buf;
NTSTATUS Status;
LARGE_INTEGER offset = IrpSp->Parameters.Write.ByteOffset;
PFILE_OBJECT FileObject = IrpSp->FileObject;
fcb* fcb = FileObject ? FileObject->FsContext : NULL;
BOOL locked = FALSE;
// LARGE_INTEGER freq, time1, time2;
LIST_ENTRY rollback;
InitializeListHead(&rollback);
if (Vcb->readonly)
return STATUS_MEDIA_WRITE_PROTECTED;
if (fcb && fcb->subvol->root_item.flags & BTRFS_SUBVOL_READONLY)
return STATUS_ACCESS_DENIED;
// time1 = KeQueryPerformanceCounter(&freq);
TRACE("write\n");
Irp->IoStatus.Information = 0;
switch (IrpSp->MinorFunction) {
case IRP_MN_COMPLETE:
FIXME("unsupported - IRP_MN_COMPLETE\n");
break;
case IRP_MN_COMPLETE_MDL:
FIXME("unsupported - IRP_MN_COMPLETE_MDL\n");
break;
case IRP_MN_COMPLETE_MDL_DPC:
FIXME("unsupported - IRP_MN_COMPLETE_MDL_DPC\n");
break;
case IRP_MN_COMPRESSED:
FIXME("unsupported - IRP_MN_COMPRESSED\n");
break;
case IRP_MN_DPC:
FIXME("unsupported - IRP_MN_DPC\n");
break;
case IRP_MN_MDL:
FIXME("unsupported - IRP_MN_MDL\n");
break;
case IRP_MN_MDL_DPC:
FIXME("unsupported - IRP_MN_MDL_DPC\n");
break;
case IRP_MN_NORMAL:
TRACE("IRP_MN_NORMAL\n");
break;
default:
WARN("unknown minor function %x\n", IrpSp->MinorFunction);
break;
}
TRACE("offset = %llx\n", offset.QuadPart);
TRACE("length = %x\n", IrpSp->Parameters.Write.Length);
if (!Irp->AssociatedIrp.SystemBuffer) {
buf = map_user_buffer(Irp);
if (Irp->MdlAddress && !buf) {
ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
Status = STATUS_INSUFFICIENT_RESOURCES;
goto exit;
}
} else
buf = Irp->AssociatedIrp.SystemBuffer;
TRACE("buf = %p\n", buf);
acquire_tree_lock(Vcb, TRUE);
locked = TRUE;
if (fcb && !(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForWriteAccess(&fcb->lock, Irp)) {
WARN("tried to write to locked region\n");
Status = STATUS_FILE_LOCK_CONFLICT;
goto exit;
}
// ERR("Irp->Flags = %x\n", Irp->Flags);
Status = write_file2(Vcb, Irp, offset, buf, &IrpSp->Parameters.Write.Length, Irp->Flags & IRP_PAGING_IO, Irp->Flags & IRP_NOCACHE, &rollback);
if (!NT_SUCCESS(Status)) {
if (Status != STATUS_PENDING)
ERR("write_file2 returned %08x\n", Status);
goto exit;
}
Status = consider_write(Vcb);
if (NT_SUCCESS(Status)) {
Irp->IoStatus.Information = IrpSp->Parameters.Write.Length;
#ifdef DEBUG_PARANOID
check_extents_consistent(Vcb, FileObject->FsContext); // TESTING
// check_extent_tree_consistent(Vcb);
#endif
}
exit:
if (locked) {
if (NT_SUCCESS(Status))
clear_rollback(&rollback);
else
do_rollback(Vcb, &rollback);
release_tree_lock(Vcb, TRUE);
}
// time2 = KeQueryPerformanceCounter(NULL);
// ERR("time = %u (freq = %u)\n", (UINT32)(time2.QuadPart - time1.QuadPart), (UINT32)freq.QuadPart);
return Status;
}