mirror of
https://github.com/reactos/reactos.git
synced 2024-12-28 10:04:49 +00:00
6e0cf03d92
v1.8 (2022-03-12): - Added minimal support for fs-verity - ~~Added test suite~~ Not in ReactOS - Fixed incorrect disk usage statistics - Fixed potential crashes when renaming stream to file or file to stream - Fixed potential crashes when querying hard links on file - Fixed potential hang when opening oplocked file - Fixed minor issues also uncovered by test suite
3635 lines
143 KiB
C
3635 lines
143 KiB
C
/* Copyright (c) Mark Harmstone 2016-17
|
|
*
|
|
* This file is part of WinBtrfs.
|
|
*
|
|
* WinBtrfs is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Lesser General Public Licence as published by
|
|
* the Free Software Foundation, either version 3 of the Licence, or
|
|
* (at your option) any later version.
|
|
*
|
|
* WinBtrfs is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Lesser General Public Licence for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public Licence
|
|
* along with WinBtrfs. If not, see <http://www.gnu.org/licenses/>. */
|
|
|
|
#include "btrfs_drv.h"
|
|
#include "xxhash.h"
|
|
#include "crc32c.h"
|
|
|
|
enum read_data_status {
|
|
ReadDataStatus_Pending,
|
|
ReadDataStatus_Success,
|
|
ReadDataStatus_Error,
|
|
ReadDataStatus_MissingDevice,
|
|
ReadDataStatus_Skip
|
|
};
|
|
|
|
struct read_data_context;
|
|
|
|
typedef struct {
|
|
struct read_data_context* context;
|
|
uint16_t stripenum;
|
|
bool rewrite;
|
|
PIRP Irp;
|
|
IO_STATUS_BLOCK iosb;
|
|
enum read_data_status status;
|
|
PMDL mdl;
|
|
uint64_t stripestart;
|
|
uint64_t stripeend;
|
|
} read_data_stripe;
|
|
|
|
typedef struct {
|
|
KEVENT Event;
|
|
NTSTATUS Status;
|
|
chunk* c;
|
|
uint64_t address;
|
|
uint32_t buflen;
|
|
LONG num_stripes, stripes_left;
|
|
uint64_t type;
|
|
uint32_t sector_size;
|
|
uint16_t firstoff, startoffstripe, sectors_per_stripe;
|
|
void* csum;
|
|
bool tree;
|
|
read_data_stripe* stripes;
|
|
uint8_t* va;
|
|
} read_data_context;
|
|
|
|
extern bool diskacc;
|
|
extern tPsUpdateDiskCounters fPsUpdateDiskCounters;
|
|
extern tCcCopyReadEx fCcCopyReadEx;
|
|
extern tFsRtlUpdateDiskCounters fFsRtlUpdateDiskCounters;
|
|
|
|
#define LZO_PAGE_SIZE 4096
|
|
|
|
_Function_class_(IO_COMPLETION_ROUTINE)
|
|
static NTSTATUS __stdcall read_data_completion(PDEVICE_OBJECT DeviceObject, PIRP Irp, PVOID conptr) {
|
|
read_data_stripe* stripe = conptr;
|
|
read_data_context* context = (read_data_context*)stripe->context;
|
|
|
|
UNUSED(DeviceObject);
|
|
|
|
stripe->iosb = Irp->IoStatus;
|
|
|
|
if (NT_SUCCESS(Irp->IoStatus.Status))
|
|
stripe->status = ReadDataStatus_Success;
|
|
else
|
|
stripe->status = ReadDataStatus_Error;
|
|
|
|
if (InterlockedDecrement(&context->stripes_left) == 0)
|
|
KeSetEvent(&context->Event, 0, false);
|
|
|
|
return STATUS_MORE_PROCESSING_REQUIRED;
|
|
}
|
|
|
|
NTSTATUS check_csum(device_extension* Vcb, uint8_t* data, uint32_t sectors, void* csum) {
|
|
void* csum2;
|
|
|
|
csum2 = ExAllocatePoolWithTag(PagedPool, Vcb->csum_size * sectors, ALLOC_TAG);
|
|
if (!csum2) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
do_calc_job(Vcb, data, sectors, csum2);
|
|
|
|
if (RtlCompareMemory(csum2, csum, sectors * Vcb->csum_size) != sectors * Vcb->csum_size) {
|
|
ExFreePool(csum2);
|
|
return STATUS_CRC_ERROR;
|
|
}
|
|
|
|
ExFreePool(csum2);
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
void get_tree_checksum(device_extension* Vcb, tree_header* th, void* csum) {
|
|
switch (Vcb->superblock.csum_type) {
|
|
case CSUM_TYPE_CRC32C:
|
|
*(uint32_t*)csum = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
|
|
break;
|
|
|
|
case CSUM_TYPE_XXHASH:
|
|
*(uint64_t*)csum = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0);
|
|
break;
|
|
|
|
case CSUM_TYPE_SHA256:
|
|
calc_sha256(csum, &th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
|
|
break;
|
|
|
|
case CSUM_TYPE_BLAKE2:
|
|
blake2b(csum, BLAKE2_HASH_SIZE, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool check_tree_checksum(device_extension* Vcb, tree_header* th) {
|
|
switch (Vcb->superblock.csum_type) {
|
|
case CSUM_TYPE_CRC32C: {
|
|
uint32_t crc32 = ~calc_crc32c(0xffffffff, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
|
|
|
|
if (crc32 == *((uint32_t*)th->csum))
|
|
return true;
|
|
|
|
WARN("hash was %08x, expected %08x\n", crc32, *((uint32_t*)th->csum));
|
|
|
|
break;
|
|
}
|
|
|
|
case CSUM_TYPE_XXHASH: {
|
|
uint64_t hash = XXH64((uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum), 0);
|
|
|
|
if (hash == *((uint64_t*)th->csum))
|
|
return true;
|
|
|
|
WARN("hash was %I64x, expected %I64x\n", hash, *((uint64_t*)th->csum));
|
|
|
|
break;
|
|
}
|
|
|
|
case CSUM_TYPE_SHA256: {
|
|
uint8_t hash[SHA256_HASH_SIZE];
|
|
|
|
calc_sha256(hash, (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
|
|
|
|
if (RtlCompareMemory(hash, th, SHA256_HASH_SIZE) == SHA256_HASH_SIZE)
|
|
return true;
|
|
|
|
WARN("hash was invalid\n");
|
|
|
|
break;
|
|
}
|
|
|
|
case CSUM_TYPE_BLAKE2: {
|
|
uint8_t hash[BLAKE2_HASH_SIZE];
|
|
|
|
blake2b(hash, sizeof(hash), (uint8_t*)&th->fs_uuid, Vcb->superblock.node_size - sizeof(th->csum));
|
|
|
|
if (RtlCompareMemory(hash, th, BLAKE2_HASH_SIZE) == BLAKE2_HASH_SIZE)
|
|
return true;
|
|
|
|
WARN("hash was invalid\n");
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
void get_sector_csum(device_extension* Vcb, void* buf, void* csum) {
|
|
switch (Vcb->superblock.csum_type) {
|
|
case CSUM_TYPE_CRC32C:
|
|
*(uint32_t*)csum = ~calc_crc32c(0xffffffff, buf, Vcb->superblock.sector_size);
|
|
break;
|
|
|
|
case CSUM_TYPE_XXHASH:
|
|
*(uint64_t*)csum = XXH64(buf, Vcb->superblock.sector_size, 0);
|
|
break;
|
|
|
|
case CSUM_TYPE_SHA256:
|
|
calc_sha256(csum, buf, Vcb->superblock.sector_size);
|
|
break;
|
|
|
|
case CSUM_TYPE_BLAKE2:
|
|
blake2b(csum, BLAKE2_HASH_SIZE, buf, Vcb->superblock.sector_size);
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool check_sector_csum(device_extension* Vcb, void* buf, void* csum) {
|
|
switch (Vcb->superblock.csum_type) {
|
|
case CSUM_TYPE_CRC32C: {
|
|
uint32_t crc32 = ~calc_crc32c(0xffffffff, buf, Vcb->superblock.sector_size);
|
|
|
|
return *(uint32_t*)csum == crc32;
|
|
}
|
|
|
|
case CSUM_TYPE_XXHASH: {
|
|
uint64_t hash = XXH64(buf, Vcb->superblock.sector_size, 0);
|
|
|
|
return *(uint64_t*)csum == hash;
|
|
}
|
|
|
|
case CSUM_TYPE_SHA256: {
|
|
uint8_t hash[SHA256_HASH_SIZE];
|
|
|
|
calc_sha256(hash, buf, Vcb->superblock.sector_size);
|
|
|
|
return RtlCompareMemory(hash, csum, SHA256_HASH_SIZE) == SHA256_HASH_SIZE;
|
|
}
|
|
|
|
case CSUM_TYPE_BLAKE2: {
|
|
uint8_t hash[BLAKE2_HASH_SIZE];
|
|
|
|
blake2b(hash, sizeof(hash), buf, Vcb->superblock.sector_size);
|
|
|
|
return RtlCompareMemory(hash, csum, BLAKE2_HASH_SIZE) == BLAKE2_HASH_SIZE;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static NTSTATUS read_data_dup(device_extension* Vcb, uint8_t* buf, uint64_t addr, read_data_context* context, CHUNK_ITEM* ci,
|
|
device** devices, uint64_t generation) {
|
|
bool checksum_error = false;
|
|
uint16_t j, stripe = 0;
|
|
NTSTATUS Status;
|
|
CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
|
|
|
|
for (j = 0; j < ci->num_stripes; j++) {
|
|
if (context->stripes[j].status == ReadDataStatus_Error) {
|
|
WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
|
|
log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
|
|
return context->stripes[j].iosb.Status;
|
|
} else if (context->stripes[j].status == ReadDataStatus_Success) {
|
|
stripe = j;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (context->stripes[stripe].status != ReadDataStatus_Success)
|
|
return STATUS_INTERNAL_ERROR;
|
|
|
|
if (context->tree) {
|
|
tree_header* th = (tree_header*)buf;
|
|
|
|
if (th->address != context->address || !check_tree_checksum(Vcb, th)) {
|
|
checksum_error = true;
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
} else if (generation != 0 && th->generation != generation) {
|
|
checksum_error = true;
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
|
|
}
|
|
} else if (context->csum) {
|
|
Status = check_csum(Vcb, buf, (ULONG)context->stripes[stripe].Irp->IoStatus.Information / context->sector_size, context->csum);
|
|
|
|
if (Status == STATUS_CRC_ERROR) {
|
|
checksum_error = true;
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
} else if (!NT_SUCCESS(Status)) {
|
|
ERR("check_csum returned %08lx\n", Status);
|
|
return Status;
|
|
}
|
|
}
|
|
|
|
if (!checksum_error)
|
|
return STATUS_SUCCESS;
|
|
|
|
if (ci->num_stripes == 1)
|
|
return STATUS_CRC_ERROR;
|
|
|
|
if (context->tree) {
|
|
tree_header* t2;
|
|
bool recovered = false;
|
|
|
|
t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
|
|
if (!t2) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
for (j = 0; j < ci->num_stripes; j++) {
|
|
if (j != stripe && devices[j] && devices[j]->devobj) {
|
|
Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + context->stripes[stripe].stripestart,
|
|
Vcb->superblock.node_size, (uint8_t*)t2, false);
|
|
if (!NT_SUCCESS(Status)) {
|
|
WARN("sync_read_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
|
|
} else {
|
|
bool checksum_error = !check_tree_checksum(Vcb, t2);
|
|
|
|
if (t2->address == addr && !checksum_error && (generation == 0 || t2->generation == generation)) {
|
|
RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
|
|
ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
|
|
recovered = true;
|
|
|
|
if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
|
|
Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + context->stripes[stripe].stripestart,
|
|
t2, Vcb->superblock.node_size);
|
|
if (!NT_SUCCESS(Status)) {
|
|
WARN("write_data_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
|
|
}
|
|
}
|
|
|
|
break;
|
|
} else if (t2->address != addr || checksum_error)
|
|
log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
else
|
|
log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_GENERATION_ERRORS);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!recovered) {
|
|
ERR("unrecoverable checksum error at %I64x\n", addr);
|
|
ExFreePool(t2);
|
|
return STATUS_CRC_ERROR;
|
|
}
|
|
|
|
ExFreePool(t2);
|
|
} else {
|
|
ULONG sectors = (ULONG)context->stripes[stripe].Irp->IoStatus.Information >> Vcb->sector_shift;
|
|
uint8_t* sector;
|
|
void* ptr = context->csum;
|
|
|
|
sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
|
|
if (!sector) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
for (ULONG i = 0; i < sectors; i++) {
|
|
if (!check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr)) {
|
|
bool recovered = false;
|
|
|
|
for (j = 0; j < ci->num_stripes; j++) {
|
|
if (j != stripe && devices[j] && devices[j]->devobj) {
|
|
Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj,
|
|
cis[j].offset + context->stripes[stripe].stripestart + ((uint64_t)i << Vcb->sector_shift),
|
|
Vcb->superblock.sector_size, sector, false);
|
|
if (!NT_SUCCESS(Status)) {
|
|
WARN("sync_read_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
|
|
} else {
|
|
if (check_sector_csum(Vcb, sector, ptr)) {
|
|
RtlCopyMemory(buf + (i << Vcb->sector_shift), sector, Vcb->superblock.sector_size);
|
|
ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), devices[stripe]->devitem.dev_id);
|
|
recovered = true;
|
|
|
|
if (!Vcb->readonly && !devices[stripe]->readonly) { // write good data over bad
|
|
Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj,
|
|
cis[stripe].offset + context->stripes[stripe].stripestart + ((uint64_t)i << Vcb->sector_shift),
|
|
sector, Vcb->superblock.sector_size);
|
|
if (!NT_SUCCESS(Status)) {
|
|
WARN("write_data_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
|
|
}
|
|
}
|
|
|
|
break;
|
|
} else
|
|
log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!recovered) {
|
|
ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift));
|
|
ExFreePool(sector);
|
|
return STATUS_CRC_ERROR;
|
|
}
|
|
}
|
|
|
|
ptr = (uint8_t*)ptr + Vcb->csum_size;
|
|
}
|
|
|
|
ExFreePool(sector);
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
static NTSTATUS read_data_raid0(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context,
|
|
CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) {
|
|
for (uint16_t i = 0; i < ci->num_stripes; i++) {
|
|
if (context->stripes[i].status == ReadDataStatus_Error) {
|
|
WARN("stripe %u returned error %08lx\n", i, context->stripes[i].iosb.Status);
|
|
log_device_error(Vcb, devices[i], BTRFS_DEV_STAT_READ_ERRORS);
|
|
return context->stripes[i].iosb.Status;
|
|
}
|
|
}
|
|
|
|
if (context->tree) { // shouldn't happen, as trees shouldn't cross stripe boundaries
|
|
tree_header* th = (tree_header*)buf;
|
|
bool checksum_error = !check_tree_checksum(Vcb, th);
|
|
|
|
if (checksum_error || addr != th->address || (generation != 0 && generation != th->generation)) {
|
|
uint64_t off;
|
|
uint16_t stripe;
|
|
|
|
get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &off, &stripe);
|
|
|
|
ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
|
|
|
|
if (checksum_error) {
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
return STATUS_CRC_ERROR;
|
|
} else if (addr != th->address) {
|
|
WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
return STATUS_CRC_ERROR;
|
|
} else if (generation != 0 && generation != th->generation) {
|
|
WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
|
|
return STATUS_CRC_ERROR;
|
|
}
|
|
}
|
|
} else if (context->csum) {
|
|
NTSTATUS Status;
|
|
|
|
Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum);
|
|
|
|
if (Status == STATUS_CRC_ERROR) {
|
|
void* ptr = context->csum;
|
|
|
|
for (uint32_t i = 0; i < length >> Vcb->sector_shift; i++) {
|
|
if (!check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr)) {
|
|
uint64_t off;
|
|
uint16_t stripe;
|
|
|
|
get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length, ci->num_stripes, &off, &stripe);
|
|
|
|
ERR("unrecoverable checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
|
|
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
|
|
return Status;
|
|
}
|
|
|
|
ptr = (uint8_t*)ptr + Vcb->csum_size;
|
|
}
|
|
|
|
return Status;
|
|
} else if (!NT_SUCCESS(Status)) {
|
|
ERR("check_csum returned %08lx\n", Status);
|
|
return Status;
|
|
}
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
static NTSTATUS read_data_raid10(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context,
|
|
CHUNK_ITEM* ci, device** devices, uint64_t generation, uint64_t offset) {
|
|
uint16_t stripe = 0;
|
|
NTSTATUS Status;
|
|
bool checksum_error = false;
|
|
CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
|
|
|
|
for (uint16_t j = 0; j < ci->num_stripes; j++) {
|
|
if (context->stripes[j].status == ReadDataStatus_Error) {
|
|
WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
|
|
log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
|
|
return context->stripes[j].iosb.Status;
|
|
} else if (context->stripes[j].status == ReadDataStatus_Success)
|
|
stripe = j;
|
|
}
|
|
|
|
if (context->tree) {
|
|
tree_header* th = (tree_header*)buf;
|
|
|
|
if (!check_tree_checksum(Vcb, th)) {
|
|
checksum_error = true;
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
} else if (addr != th->address) {
|
|
WARN("address of tree was %I64x, not %I64x as expected\n", th->address, addr);
|
|
checksum_error = true;
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
} else if (generation != 0 && generation != th->generation) {
|
|
WARN("generation of tree was %I64x, not %I64x as expected\n", th->generation, generation);
|
|
checksum_error = true;
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
|
|
}
|
|
} else if (context->csum) {
|
|
Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum);
|
|
|
|
if (Status == STATUS_CRC_ERROR)
|
|
checksum_error = true;
|
|
else if (!NT_SUCCESS(Status)) {
|
|
ERR("check_csum returned %08lx\n", Status);
|
|
return Status;
|
|
}
|
|
}
|
|
|
|
if (!checksum_error)
|
|
return STATUS_SUCCESS;
|
|
|
|
if (context->tree) {
|
|
tree_header* t2;
|
|
uint64_t off;
|
|
uint16_t badsubstripe = 0;
|
|
bool recovered = false;
|
|
|
|
t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size, ALLOC_TAG);
|
|
if (!t2) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &off, &stripe);
|
|
|
|
stripe *= ci->sub_stripes;
|
|
|
|
for (uint16_t j = 0; j < ci->sub_stripes; j++) {
|
|
if (context->stripes[stripe + j].status == ReadDataStatus_Success) {
|
|
badsubstripe = j;
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (uint16_t j = 0; j < ci->sub_stripes; j++) {
|
|
if (context->stripes[stripe + j].status != ReadDataStatus_Success && devices[stripe + j] && devices[stripe + j]->devobj) {
|
|
Status = sync_read_phys(devices[stripe + j]->devobj, devices[stripe + j]->fileobj, cis[stripe + j].offset + off,
|
|
Vcb->superblock.node_size, (uint8_t*)t2, false);
|
|
if (!NT_SUCCESS(Status)) {
|
|
WARN("sync_read_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_READ_ERRORS);
|
|
} else {
|
|
bool checksum_error = !check_tree_checksum(Vcb, t2);
|
|
|
|
if (t2->address == addr && !checksum_error && (generation == 0 || t2->generation == generation)) {
|
|
RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
|
|
ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe + j]->devitem.dev_id);
|
|
recovered = true;
|
|
|
|
if (!Vcb->readonly && !devices[stripe + badsubstripe]->readonly && devices[stripe + badsubstripe]->devobj) { // write good data over bad
|
|
Status = write_data_phys(devices[stripe + badsubstripe]->devobj, devices[stripe + badsubstripe]->fileobj,
|
|
cis[stripe + badsubstripe].offset + off, t2, Vcb->superblock.node_size);
|
|
if (!NT_SUCCESS(Status)) {
|
|
WARN("write_data_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[stripe + badsubstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
|
|
}
|
|
}
|
|
|
|
break;
|
|
} else if (t2->address != addr || checksum_error)
|
|
log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
else
|
|
log_device_error(Vcb, devices[stripe + j], BTRFS_DEV_STAT_GENERATION_ERRORS);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!recovered) {
|
|
ERR("unrecoverable checksum error at %I64x\n", addr);
|
|
ExFreePool(t2);
|
|
return STATUS_CRC_ERROR;
|
|
}
|
|
|
|
ExFreePool(t2);
|
|
} else {
|
|
ULONG sectors = length >> Vcb->sector_shift;
|
|
uint8_t* sector;
|
|
void* ptr = context->csum;
|
|
|
|
sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size, ALLOC_TAG);
|
|
if (!sector) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
for (ULONG i = 0; i < sectors; i++) {
|
|
if (!check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr)) {
|
|
uint64_t off;
|
|
uint16_t stripe2, badsubstripe = 0;
|
|
bool recovered = false;
|
|
|
|
get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length,
|
|
ci->num_stripes / ci->sub_stripes, &off, &stripe2);
|
|
|
|
stripe2 *= ci->sub_stripes;
|
|
|
|
for (uint16_t j = 0; j < ci->sub_stripes; j++) {
|
|
if (context->stripes[stripe2 + j].status == ReadDataStatus_Success) {
|
|
badsubstripe = j;
|
|
break;
|
|
}
|
|
}
|
|
|
|
log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
|
|
for (uint16_t j = 0; j < ci->sub_stripes; j++) {
|
|
if (context->stripes[stripe2 + j].status != ReadDataStatus_Success && devices[stripe2 + j] && devices[stripe2 + j]->devobj) {
|
|
Status = sync_read_phys(devices[stripe2 + j]->devobj, devices[stripe2 + j]->fileobj, cis[stripe2 + j].offset + off,
|
|
Vcb->superblock.sector_size, sector, false);
|
|
if (!NT_SUCCESS(Status)) {
|
|
WARN("sync_read_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_READ_ERRORS);
|
|
} else {
|
|
if (check_sector_csum(Vcb, sector, ptr)) {
|
|
RtlCopyMemory(buf + (i << Vcb->sector_shift), sector, Vcb->superblock.sector_size);
|
|
ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), devices[stripe2 + j]->devitem.dev_id);
|
|
recovered = true;
|
|
|
|
if (!Vcb->readonly && !devices[stripe2 + badsubstripe]->readonly && devices[stripe2 + badsubstripe]->devobj) { // write good data over bad
|
|
Status = write_data_phys(devices[stripe2 + badsubstripe]->devobj, devices[stripe2 + badsubstripe]->fileobj,
|
|
cis[stripe2 + badsubstripe].offset + off, sector, Vcb->superblock.sector_size);
|
|
if (!NT_SUCCESS(Status)) {
|
|
WARN("write_data_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[stripe2 + badsubstripe], BTRFS_DEV_STAT_READ_ERRORS);
|
|
}
|
|
}
|
|
|
|
break;
|
|
} else
|
|
log_device_error(Vcb, devices[stripe2 + j], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!recovered) {
|
|
ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift));
|
|
ExFreePool(sector);
|
|
return STATUS_CRC_ERROR;
|
|
}
|
|
}
|
|
|
|
ptr = (uint8_t*)ptr + Vcb->csum_size;
|
|
}
|
|
|
|
ExFreePool(sector);
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
static NTSTATUS read_data_raid5(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci,
|
|
device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
|
|
NTSTATUS Status;
|
|
bool checksum_error = false;
|
|
CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
|
|
uint16_t j, stripe = 0;
|
|
bool no_success = true;
|
|
|
|
for (j = 0; j < ci->num_stripes; j++) {
|
|
if (context->stripes[j].status == ReadDataStatus_Error) {
|
|
WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
|
|
log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
|
|
return context->stripes[j].iosb.Status;
|
|
} else if (context->stripes[j].status == ReadDataStatus_Success) {
|
|
stripe = j;
|
|
no_success = false;
|
|
}
|
|
}
|
|
|
|
if (c) { // check partial stripes
|
|
LIST_ENTRY* le;
|
|
uint64_t ps_length = (ci->num_stripes - 1) * ci->stripe_length;
|
|
|
|
ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
|
|
|
|
le = c->partial_stripes.Flink;
|
|
while (le != &c->partial_stripes) {
|
|
partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
|
|
|
|
if (ps->address + ps_length > addr && ps->address < addr + length) {
|
|
ULONG runlength, index;
|
|
|
|
runlength = RtlFindFirstRunClear(&ps->bmp, &index);
|
|
|
|
while (runlength != 0) {
|
|
if (index >= ps->bmplen)
|
|
break;
|
|
|
|
if (index + runlength >= ps->bmplen) {
|
|
runlength = ps->bmplen - index;
|
|
|
|
if (runlength == 0)
|
|
break;
|
|
}
|
|
|
|
uint64_t runstart = ps->address + (index << Vcb->sector_shift);
|
|
uint64_t runend = runstart + (runlength << Vcb->sector_shift);
|
|
uint64_t start = max(runstart, addr);
|
|
uint64_t end = min(runend, addr + length);
|
|
|
|
if (end > start)
|
|
RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
|
|
|
|
runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
|
|
}
|
|
} else if (ps->address >= addr + length)
|
|
break;
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
ExReleaseResourceLite(&c->partial_stripes_lock);
|
|
}
|
|
|
|
if (context->tree) {
|
|
tree_header* th = (tree_header*)buf;
|
|
|
|
if (addr != th->address || !check_tree_checksum(Vcb, th)) {
|
|
checksum_error = true;
|
|
if (!no_success && !degraded)
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
} else if (generation != 0 && generation != th->generation) {
|
|
checksum_error = true;
|
|
if (!no_success && !degraded)
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
|
|
}
|
|
} else if (context->csum) {
|
|
Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum);
|
|
|
|
if (Status == STATUS_CRC_ERROR) {
|
|
if (!degraded)
|
|
WARN("checksum error\n");
|
|
checksum_error = true;
|
|
} else if (!NT_SUCCESS(Status)) {
|
|
ERR("check_csum returned %08lx\n", Status);
|
|
return Status;
|
|
}
|
|
} else if (degraded)
|
|
checksum_error = true;
|
|
|
|
if (!checksum_error)
|
|
return STATUS_SUCCESS;
|
|
|
|
if (context->tree) {
|
|
uint16_t parity;
|
|
uint64_t off;
|
|
bool recovered = false, first = true, failed = false;
|
|
uint8_t* t2;
|
|
|
|
t2 = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * 2, ALLOC_TAG);
|
|
if (!t2) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &off, &stripe);
|
|
|
|
parity = (((addr - offset) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
|
|
|
|
stripe = (parity + stripe + 1) % ci->num_stripes;
|
|
|
|
for (j = 0; j < ci->num_stripes; j++) {
|
|
if (j != stripe) {
|
|
if (devices[j] && devices[j]->devobj) {
|
|
if (first) {
|
|
Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2, false);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("sync_read_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
|
|
failed = true;
|
|
break;
|
|
}
|
|
|
|
first = false;
|
|
} else {
|
|
Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size, t2 + Vcb->superblock.node_size, false);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("sync_read_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
|
|
failed = true;
|
|
break;
|
|
}
|
|
|
|
do_xor(t2, t2 + Vcb->superblock.node_size, Vcb->superblock.node_size);
|
|
}
|
|
} else {
|
|
failed = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!failed) {
|
|
tree_header* t3 = (tree_header*)t2;
|
|
|
|
if (t3->address == addr && check_tree_checksum(Vcb, t3) && (generation == 0 || t3->generation == generation)) {
|
|
RtlCopyMemory(buf, t2, Vcb->superblock.node_size);
|
|
|
|
if (!degraded)
|
|
ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[stripe]->devitem.dev_id);
|
|
|
|
recovered = true;
|
|
|
|
if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
|
|
Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off, t2, Vcb->superblock.node_size);
|
|
if (!NT_SUCCESS(Status)) {
|
|
WARN("write_data_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!recovered) {
|
|
ERR("unrecoverable checksum error at %I64x\n", addr);
|
|
ExFreePool(t2);
|
|
return STATUS_CRC_ERROR;
|
|
}
|
|
|
|
ExFreePool(t2);
|
|
} else {
|
|
ULONG sectors = length >> Vcb->sector_shift;
|
|
uint8_t* sector;
|
|
void* ptr = context->csum;
|
|
|
|
sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.sector_size * 2, ALLOC_TAG);
|
|
if (!sector) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
for (ULONG i = 0; i < sectors; i++) {
|
|
uint16_t parity;
|
|
uint64_t off;
|
|
|
|
get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length,
|
|
ci->num_stripes - 1, &off, &stripe);
|
|
|
|
parity = (((addr - offset + ((uint64_t)i << Vcb->sector_shift)) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
|
|
|
|
stripe = (parity + stripe + 1) % ci->num_stripes;
|
|
|
|
if (!devices[stripe] || !devices[stripe]->devobj || (ptr && !check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr))) {
|
|
bool recovered = false, first = true, failed = false;
|
|
|
|
if (devices[stripe] && devices[stripe]->devobj)
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_READ_ERRORS);
|
|
|
|
for (j = 0; j < ci->num_stripes; j++) {
|
|
if (j != stripe) {
|
|
if (devices[j] && devices[j]->devobj) {
|
|
if (first) {
|
|
Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size, sector, false);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("sync_read_phys returned %08lx\n", Status);
|
|
failed = true;
|
|
log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
|
|
break;
|
|
}
|
|
|
|
first = false;
|
|
} else {
|
|
Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
|
|
sector + Vcb->superblock.sector_size, false);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("sync_read_phys returned %08lx\n", Status);
|
|
failed = true;
|
|
log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
|
|
break;
|
|
}
|
|
|
|
do_xor(sector, sector + Vcb->superblock.sector_size, Vcb->superblock.sector_size);
|
|
}
|
|
} else {
|
|
failed = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!failed) {
|
|
if (!ptr || check_sector_csum(Vcb, sector, ptr)) {
|
|
RtlCopyMemory(buf + (i << Vcb->sector_shift), sector, Vcb->superblock.sector_size);
|
|
|
|
if (!degraded)
|
|
ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift), devices[stripe]->devitem.dev_id);
|
|
|
|
recovered = true;
|
|
|
|
if (!Vcb->readonly && devices[stripe] && !devices[stripe]->readonly && devices[stripe]->devobj) { // write good data over bad
|
|
Status = write_data_phys(devices[stripe]->devobj, devices[stripe]->fileobj, cis[stripe].offset + off,
|
|
sector, Vcb->superblock.sector_size);
|
|
if (!NT_SUCCESS(Status)) {
|
|
WARN("write_data_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_WRITE_ERRORS);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!recovered) {
|
|
ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift));
|
|
ExFreePool(sector);
|
|
return STATUS_CRC_ERROR;
|
|
}
|
|
}
|
|
|
|
if (ptr)
|
|
ptr = (uint8_t*)ptr + Vcb->csum_size;
|
|
}
|
|
|
|
ExFreePool(sector);
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
void raid6_recover2(uint8_t* sectors, uint16_t num_stripes, ULONG sector_size, uint16_t missing1, uint16_t missing2, uint8_t* out) {
|
|
if (missing1 == num_stripes - 2 || missing2 == num_stripes - 2) { // reconstruct from q and data
|
|
uint16_t missing = missing1 == (num_stripes - 2) ? missing2 : missing1;
|
|
uint16_t stripe;
|
|
|
|
stripe = num_stripes - 3;
|
|
|
|
if (stripe == missing)
|
|
RtlZeroMemory(out, sector_size);
|
|
else
|
|
RtlCopyMemory(out, sectors + (stripe * sector_size), sector_size);
|
|
|
|
do {
|
|
stripe--;
|
|
|
|
galois_double(out, sector_size);
|
|
|
|
if (stripe != missing)
|
|
do_xor(out, sectors + (stripe * sector_size), sector_size);
|
|
} while (stripe > 0);
|
|
|
|
do_xor(out, sectors + ((num_stripes - 1) * sector_size), sector_size);
|
|
|
|
if (missing != 0)
|
|
galois_divpower(out, (uint8_t)missing, sector_size);
|
|
} else { // reconstruct from p and q
|
|
uint16_t x = missing1, y = missing2, stripe;
|
|
uint8_t gyx, gx, denom, a, b, *p, *q, *pxy, *qxy;
|
|
uint32_t j;
|
|
|
|
stripe = num_stripes - 3;
|
|
|
|
pxy = out + sector_size;
|
|
qxy = out;
|
|
|
|
if (stripe == missing1 || stripe == missing2) {
|
|
RtlZeroMemory(qxy, sector_size);
|
|
RtlZeroMemory(pxy, sector_size);
|
|
} else {
|
|
RtlCopyMemory(qxy, sectors + (stripe * sector_size), sector_size);
|
|
RtlCopyMemory(pxy, sectors + (stripe * sector_size), sector_size);
|
|
}
|
|
|
|
do {
|
|
stripe--;
|
|
|
|
galois_double(qxy, sector_size);
|
|
|
|
if (stripe != missing1 && stripe != missing2) {
|
|
do_xor(qxy, sectors + (stripe * sector_size), sector_size);
|
|
do_xor(pxy, sectors + (stripe * sector_size), sector_size);
|
|
}
|
|
} while (stripe > 0);
|
|
|
|
gyx = gpow2(y > x ? (y-x) : (255-x+y));
|
|
gx = gpow2(255-x);
|
|
|
|
denom = gdiv(1, gyx ^ 1);
|
|
a = gmul(gyx, denom);
|
|
b = gmul(gx, denom);
|
|
|
|
p = sectors + ((num_stripes - 2) * sector_size);
|
|
q = sectors + ((num_stripes - 1) * sector_size);
|
|
|
|
for (j = 0; j < sector_size; j++) {
|
|
*qxy = gmul(a, *p ^ *pxy) ^ gmul(b, *q ^ *qxy);
|
|
|
|
p++;
|
|
q++;
|
|
pxy++;
|
|
qxy++;
|
|
}
|
|
|
|
do_xor(out + sector_size, out, sector_size);
|
|
do_xor(out + sector_size, sectors + ((num_stripes - 2) * sector_size), sector_size);
|
|
}
|
|
}
|
|
|
|
static NTSTATUS read_data_raid6(device_extension* Vcb, uint8_t* buf, uint64_t addr, uint32_t length, read_data_context* context, CHUNK_ITEM* ci,
|
|
device** devices, uint64_t offset, uint64_t generation, chunk* c, bool degraded) {
|
|
NTSTATUS Status;
|
|
bool checksum_error = false;
|
|
CHUNK_ITEM_STRIPE* cis = (CHUNK_ITEM_STRIPE*)&ci[1];
|
|
uint16_t stripe = 0, j;
|
|
bool no_success = true;
|
|
|
|
for (j = 0; j < ci->num_stripes; j++) {
|
|
if (context->stripes[j].status == ReadDataStatus_Error) {
|
|
WARN("stripe %u returned error %08lx\n", j, context->stripes[j].iosb.Status);
|
|
|
|
if (devices[j])
|
|
log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
|
|
return context->stripes[j].iosb.Status;
|
|
} else if (context->stripes[j].status == ReadDataStatus_Success) {
|
|
stripe = j;
|
|
no_success = false;
|
|
}
|
|
}
|
|
|
|
if (c) { // check partial stripes
|
|
LIST_ENTRY* le;
|
|
uint64_t ps_length = (ci->num_stripes - 2) * ci->stripe_length;
|
|
|
|
ExAcquireResourceSharedLite(&c->partial_stripes_lock, true);
|
|
|
|
le = c->partial_stripes.Flink;
|
|
while (le != &c->partial_stripes) {
|
|
partial_stripe* ps = CONTAINING_RECORD(le, partial_stripe, list_entry);
|
|
|
|
if (ps->address + ps_length > addr && ps->address < addr + length) {
|
|
ULONG runlength, index;
|
|
|
|
runlength = RtlFindFirstRunClear(&ps->bmp, &index);
|
|
|
|
while (runlength != 0) {
|
|
if (index >= ps->bmplen)
|
|
break;
|
|
|
|
if (index + runlength >= ps->bmplen) {
|
|
runlength = ps->bmplen - index;
|
|
|
|
if (runlength == 0)
|
|
break;
|
|
}
|
|
|
|
uint64_t runstart = ps->address + (index << Vcb->sector_shift);
|
|
uint64_t runend = runstart + (runlength << Vcb->sector_shift);
|
|
uint64_t start = max(runstart, addr);
|
|
uint64_t end = min(runend, addr + length);
|
|
|
|
if (end > start)
|
|
RtlCopyMemory(buf + start - addr, &ps->data[start - ps->address], (ULONG)(end - start));
|
|
|
|
runlength = RtlFindNextForwardRunClear(&ps->bmp, index + runlength, &index);
|
|
}
|
|
} else if (ps->address >= addr + length)
|
|
break;
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
ExReleaseResourceLite(&c->partial_stripes_lock);
|
|
}
|
|
|
|
if (context->tree) {
|
|
tree_header* th = (tree_header*)buf;
|
|
|
|
if (addr != th->address || !check_tree_checksum(Vcb, th)) {
|
|
checksum_error = true;
|
|
if (!no_success && !degraded && devices[stripe])
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
} else if (generation != 0 && generation != th->generation) {
|
|
checksum_error = true;
|
|
if (!no_success && !degraded && devices[stripe])
|
|
log_device_error(Vcb, devices[stripe], BTRFS_DEV_STAT_GENERATION_ERRORS);
|
|
}
|
|
} else if (context->csum) {
|
|
Status = check_csum(Vcb, buf, length >> Vcb->sector_shift, context->csum);
|
|
|
|
if (Status == STATUS_CRC_ERROR) {
|
|
if (!degraded)
|
|
WARN("checksum error\n");
|
|
checksum_error = true;
|
|
} else if (!NT_SUCCESS(Status)) {
|
|
ERR("check_csum returned %08lx\n", Status);
|
|
return Status;
|
|
}
|
|
} else if (degraded)
|
|
checksum_error = true;
|
|
|
|
if (!checksum_error)
|
|
return STATUS_SUCCESS;
|
|
|
|
if (context->tree) {
|
|
uint8_t* sector;
|
|
uint16_t k, physstripe, parity1, parity2, error_stripe = 0;
|
|
uint64_t off;
|
|
bool recovered = false, failed = false;
|
|
ULONG num_errors = 0;
|
|
|
|
sector = ExAllocatePoolWithTag(NonPagedPool, Vcb->superblock.node_size * (ci->num_stripes + 2), ALLOC_TAG);
|
|
if (!sector) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &off, &stripe);
|
|
|
|
parity1 = (((addr - offset) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
|
|
parity2 = (parity1 + 1) % ci->num_stripes;
|
|
|
|
physstripe = (parity2 + stripe + 1) % ci->num_stripes;
|
|
|
|
j = (parity2 + 1) % ci->num_stripes;
|
|
|
|
for (k = 0; k < ci->num_stripes - 1; k++) {
|
|
if (j != physstripe) {
|
|
if (devices[j] && devices[j]->devobj) {
|
|
Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.node_size,
|
|
sector + (k * Vcb->superblock.node_size), false);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("sync_read_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
|
|
num_errors++;
|
|
error_stripe = k;
|
|
|
|
if (num_errors > 1) {
|
|
failed = true;
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
num_errors++;
|
|
error_stripe = k;
|
|
|
|
if (num_errors > 1) {
|
|
failed = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
j = (j + 1) % ci->num_stripes;
|
|
}
|
|
|
|
if (!failed) {
|
|
if (num_errors == 0) {
|
|
tree_header* th = (tree_header*)(sector + (stripe * Vcb->superblock.node_size));
|
|
|
|
RtlCopyMemory(sector + (stripe * Vcb->superblock.node_size), sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size),
|
|
Vcb->superblock.node_size);
|
|
|
|
for (j = 0; j < ci->num_stripes - 2; j++) {
|
|
if (j != stripe)
|
|
do_xor(sector + (stripe * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size), Vcb->superblock.node_size);
|
|
}
|
|
|
|
if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) {
|
|
RtlCopyMemory(buf, sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
|
|
|
|
if (devices[physstripe] && devices[physstripe]->devobj)
|
|
ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
|
|
|
|
recovered = true;
|
|
|
|
if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
|
|
Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
|
|
sector + (stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
|
|
if (!NT_SUCCESS(Status)) {
|
|
WARN("write_data_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!recovered) {
|
|
tree_header* th = (tree_header*)(sector + (ci->num_stripes * Vcb->superblock.node_size));
|
|
bool read_q = false;
|
|
|
|
if (devices[parity2] && devices[parity2]->devobj) {
|
|
Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
|
|
Vcb->superblock.node_size, sector + ((ci->num_stripes - 1) * Vcb->superblock.node_size), false);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("sync_read_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
|
|
} else
|
|
read_q = true;
|
|
}
|
|
|
|
if (read_q) {
|
|
if (num_errors == 1) {
|
|
raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, error_stripe, sector + (ci->num_stripes * Vcb->superblock.node_size));
|
|
|
|
if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation))
|
|
recovered = true;
|
|
} else {
|
|
for (j = 0; j < ci->num_stripes - 1; j++) {
|
|
if (j != stripe) {
|
|
raid6_recover2(sector, ci->num_stripes, Vcb->superblock.node_size, stripe, j, sector + (ci->num_stripes * Vcb->superblock.node_size));
|
|
|
|
if (th->address == addr && check_tree_checksum(Vcb, th) && (generation == 0 || th->generation == generation)) {
|
|
recovered = true;
|
|
error_stripe = j;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (recovered) {
|
|
uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
|
|
|
|
if (devices[physstripe] && devices[physstripe]->devobj)
|
|
ERR("recovering from checksum error at %I64x, device %I64x\n", addr, devices[physstripe]->devitem.dev_id);
|
|
|
|
RtlCopyMemory(buf, sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
|
|
|
|
if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
|
|
Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
|
|
sector + (ci->num_stripes * Vcb->superblock.node_size), Vcb->superblock.node_size);
|
|
if (!NT_SUCCESS(Status)) {
|
|
WARN("write_data_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
|
|
}
|
|
}
|
|
|
|
if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
|
|
if (error_stripe == ci->num_stripes - 2) {
|
|
ERR("recovering from parity error at %I64x, device %I64x\n", addr, devices[error_stripe_phys]->devitem.dev_id);
|
|
|
|
log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
|
|
RtlZeroMemory(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), Vcb->superblock.node_size);
|
|
|
|
for (j = 0; j < ci->num_stripes - 2; j++) {
|
|
if (j == stripe) {
|
|
do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (ci->num_stripes * Vcb->superblock.node_size),
|
|
Vcb->superblock.node_size);
|
|
} else {
|
|
do_xor(sector + ((ci->num_stripes - 2) * Vcb->superblock.node_size), sector + (j * Vcb->superblock.node_size),
|
|
Vcb->superblock.node_size);
|
|
}
|
|
}
|
|
} else {
|
|
ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((error_stripe - stripe) * ci->stripe_length),
|
|
devices[error_stripe_phys]->devitem.dev_id);
|
|
|
|
log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
|
|
RtlCopyMemory(sector + (error_stripe * Vcb->superblock.node_size),
|
|
sector + ((ci->num_stripes + 1) * Vcb->superblock.node_size), Vcb->superblock.node_size);
|
|
}
|
|
}
|
|
|
|
if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
|
|
Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
|
|
sector + (error_stripe * Vcb->superblock.node_size), Vcb->superblock.node_size);
|
|
if (!NT_SUCCESS(Status)) {
|
|
WARN("write_data_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!recovered) {
|
|
ERR("unrecoverable checksum error at %I64x\n", addr);
|
|
ExFreePool(sector);
|
|
return STATUS_CRC_ERROR;
|
|
}
|
|
|
|
ExFreePool(sector);
|
|
} else {
|
|
ULONG sectors = length >> Vcb->sector_shift;
|
|
uint8_t* sector;
|
|
void* ptr = context->csum;
|
|
|
|
sector = ExAllocatePoolWithTag(NonPagedPool, (ci->num_stripes + 2) << Vcb->sector_shift, ALLOC_TAG);
|
|
if (!sector) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
for (ULONG i = 0; i < sectors; i++) {
|
|
uint64_t off;
|
|
uint16_t physstripe, parity1, parity2;
|
|
|
|
get_raid0_offset(addr - offset + ((uint64_t)i << Vcb->sector_shift), ci->stripe_length,
|
|
ci->num_stripes - 2, &off, &stripe);
|
|
|
|
parity1 = (((addr - offset + ((uint64_t)i << Vcb->sector_shift)) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
|
|
parity2 = (parity1 + 1) % ci->num_stripes;
|
|
|
|
physstripe = (parity2 + stripe + 1) % ci->num_stripes;
|
|
|
|
if (!devices[physstripe] || !devices[physstripe]->devobj || (context->csum && !check_sector_csum(Vcb, buf + (i << Vcb->sector_shift), ptr))) {
|
|
uint16_t error_stripe = 0;
|
|
bool recovered = false, failed = false;
|
|
ULONG num_errors = 0;
|
|
|
|
if (devices[physstripe] && devices[physstripe]->devobj)
|
|
log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_READ_ERRORS);
|
|
|
|
j = (parity2 + 1) % ci->num_stripes;
|
|
|
|
for (uint16_t k = 0; k < ci->num_stripes - 1; k++) {
|
|
if (j != physstripe) {
|
|
if (devices[j] && devices[j]->devobj) {
|
|
Status = sync_read_phys(devices[j]->devobj, devices[j]->fileobj, cis[j].offset + off, Vcb->superblock.sector_size,
|
|
sector + ((ULONG)k << Vcb->sector_shift), false);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("sync_read_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[j], BTRFS_DEV_STAT_READ_ERRORS);
|
|
num_errors++;
|
|
error_stripe = k;
|
|
|
|
if (num_errors > 1) {
|
|
failed = true;
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
num_errors++;
|
|
error_stripe = k;
|
|
|
|
if (num_errors > 1) {
|
|
failed = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
j = (j + 1) % ci->num_stripes;
|
|
}
|
|
|
|
if (!failed) {
|
|
if (num_errors == 0) {
|
|
RtlCopyMemory(sector + ((unsigned int)stripe << Vcb->sector_shift), sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), Vcb->superblock.sector_size);
|
|
|
|
for (j = 0; j < ci->num_stripes - 2; j++) {
|
|
if (j != stripe)
|
|
do_xor(sector + ((unsigned int)stripe << Vcb->sector_shift), sector + ((unsigned int)j << Vcb->sector_shift), Vcb->superblock.sector_size);
|
|
}
|
|
|
|
if (!ptr || check_sector_csum(Vcb, sector + ((unsigned int)stripe << Vcb->sector_shift), ptr)) {
|
|
RtlCopyMemory(buf + (i << Vcb->sector_shift), sector + ((unsigned int)stripe << Vcb->sector_shift), Vcb->superblock.sector_size);
|
|
|
|
if (devices[physstripe] && devices[physstripe]->devobj)
|
|
ERR("recovering from checksum error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift),
|
|
devices[physstripe]->devitem.dev_id);
|
|
|
|
recovered = true;
|
|
|
|
if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
|
|
Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
|
|
sector + ((unsigned int)stripe << Vcb->sector_shift), Vcb->superblock.sector_size);
|
|
if (!NT_SUCCESS(Status)) {
|
|
WARN("write_data_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!recovered) {
|
|
bool read_q = false;
|
|
|
|
if (devices[parity2] && devices[parity2]->devobj) {
|
|
Status = sync_read_phys(devices[parity2]->devobj, devices[parity2]->fileobj, cis[parity2].offset + off,
|
|
Vcb->superblock.sector_size, sector + ((unsigned int)(ci->num_stripes - 1) << Vcb->sector_shift), false);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("sync_read_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[parity2], BTRFS_DEV_STAT_READ_ERRORS);
|
|
} else
|
|
read_q = true;
|
|
}
|
|
|
|
if (read_q) {
|
|
if (num_errors == 1) {
|
|
raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, error_stripe, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift));
|
|
|
|
if (!devices[physstripe] || !devices[physstripe]->devobj)
|
|
recovered = true;
|
|
else
|
|
recovered = check_sector_csum(Vcb, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), ptr);
|
|
} else {
|
|
for (j = 0; j < ci->num_stripes - 1; j++) {
|
|
if (j != stripe) {
|
|
raid6_recover2(sector, ci->num_stripes, Vcb->superblock.sector_size, stripe, j, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift));
|
|
|
|
if (check_sector_csum(Vcb, sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), ptr)) {
|
|
recovered = true;
|
|
error_stripe = j;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (recovered) {
|
|
uint16_t error_stripe_phys = (parity2 + error_stripe + 1) % ci->num_stripes;
|
|
|
|
if (devices[physstripe] && devices[physstripe]->devobj)
|
|
ERR("recovering from checksum error at %I64x, device %I64x\n",
|
|
addr + ((uint64_t)i << Vcb->sector_shift), devices[physstripe]->devitem.dev_id);
|
|
|
|
RtlCopyMemory(buf + (i << Vcb->sector_shift), sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), Vcb->superblock.sector_size);
|
|
|
|
if (!Vcb->readonly && devices[physstripe] && devices[physstripe]->devobj && !devices[physstripe]->readonly) { // write good data over bad
|
|
Status = write_data_phys(devices[physstripe]->devobj, devices[physstripe]->fileobj, cis[physstripe].offset + off,
|
|
sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift), Vcb->superblock.sector_size);
|
|
if (!NT_SUCCESS(Status)) {
|
|
WARN("write_data_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[physstripe], BTRFS_DEV_STAT_WRITE_ERRORS);
|
|
}
|
|
}
|
|
|
|
if (devices[error_stripe_phys] && devices[error_stripe_phys]->devobj) {
|
|
if (error_stripe == ci->num_stripes - 2) {
|
|
ERR("recovering from parity error at %I64x, device %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift),
|
|
devices[error_stripe_phys]->devitem.dev_id);
|
|
|
|
log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
|
|
RtlZeroMemory(sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), Vcb->superblock.sector_size);
|
|
|
|
for (j = 0; j < ci->num_stripes - 2; j++) {
|
|
if (j == stripe) {
|
|
do_xor(sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), sector + ((unsigned int)ci->num_stripes << Vcb->sector_shift),
|
|
Vcb->superblock.sector_size);
|
|
} else {
|
|
do_xor(sector + ((unsigned int)(ci->num_stripes - 2) << Vcb->sector_shift), sector + ((unsigned int)j << Vcb->sector_shift),
|
|
Vcb->superblock.sector_size);
|
|
}
|
|
}
|
|
} else {
|
|
ERR("recovering from checksum error at %I64x, device %I64x\n",
|
|
addr + ((uint64_t)i << Vcb->sector_shift) + ((error_stripe - stripe) * ci->stripe_length),
|
|
devices[error_stripe_phys]->devitem.dev_id);
|
|
|
|
log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_CORRUPTION_ERRORS);
|
|
|
|
RtlCopyMemory(sector + ((unsigned int)error_stripe << Vcb->sector_shift),
|
|
sector + ((unsigned int)(ci->num_stripes + 1) << Vcb->sector_shift), Vcb->superblock.sector_size);
|
|
}
|
|
}
|
|
|
|
if (!Vcb->readonly && devices[error_stripe_phys] && devices[error_stripe_phys]->devobj && !devices[error_stripe_phys]->readonly) { // write good data over bad
|
|
Status = write_data_phys(devices[error_stripe_phys]->devobj, devices[error_stripe_phys]->fileobj, cis[error_stripe_phys].offset + off,
|
|
sector + ((unsigned int)error_stripe << Vcb->sector_shift), Vcb->superblock.sector_size);
|
|
if (!NT_SUCCESS(Status)) {
|
|
WARN("write_data_phys returned %08lx\n", Status);
|
|
log_device_error(Vcb, devices[error_stripe_phys], BTRFS_DEV_STAT_WRITE_ERRORS);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!recovered) {
|
|
ERR("unrecoverable checksum error at %I64x\n", addr + ((uint64_t)i << Vcb->sector_shift));
|
|
ExFreePool(sector);
|
|
return STATUS_CRC_ERROR;
|
|
}
|
|
}
|
|
|
|
if (ptr)
|
|
ptr = (uint8_t*)ptr + Vcb->csum_size;
|
|
}
|
|
|
|
ExFreePool(sector);
|
|
}
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
NTSTATUS read_data(_In_ device_extension* Vcb, _In_ uint64_t addr, _In_ uint32_t length, _In_reads_bytes_opt_(length*sizeof(uint32_t)/Vcb->superblock.sector_size) void* csum,
|
|
_In_ bool is_tree, _Out_writes_bytes_(length) uint8_t* buf, _In_opt_ chunk* c, _Out_opt_ chunk** pc, _In_opt_ PIRP Irp, _In_ uint64_t generation, _In_ bool file_read,
|
|
_In_ ULONG priority) {
|
|
CHUNK_ITEM* ci;
|
|
CHUNK_ITEM_STRIPE* cis;
|
|
read_data_context context;
|
|
uint64_t type, offset, total_reading = 0;
|
|
NTSTATUS Status;
|
|
device** devices = NULL;
|
|
uint16_t i, startoffstripe, allowed_missing, missing_devices = 0;
|
|
uint8_t* dummypage = NULL;
|
|
PMDL dummy_mdl = NULL;
|
|
bool need_to_wait;
|
|
uint64_t lockaddr, locklen;
|
|
|
|
if (Vcb->log_to_phys_loaded) {
|
|
if (!c) {
|
|
c = get_chunk_from_address(Vcb, addr);
|
|
|
|
if (!c) {
|
|
ERR("get_chunk_from_address failed\n");
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
}
|
|
|
|
ci = c->chunk_item;
|
|
offset = c->offset;
|
|
devices = c->devices;
|
|
|
|
if (pc)
|
|
*pc = c;
|
|
} else {
|
|
LIST_ENTRY* le = Vcb->sys_chunks.Flink;
|
|
|
|
ci = NULL;
|
|
|
|
c = NULL;
|
|
while (le != &Vcb->sys_chunks) {
|
|
sys_chunk* sc = CONTAINING_RECORD(le, sys_chunk, list_entry);
|
|
|
|
if (sc->key.obj_id == 0x100 && sc->key.obj_type == TYPE_CHUNK_ITEM && sc->key.offset <= addr) {
|
|
CHUNK_ITEM* chunk_item = sc->data;
|
|
|
|
if ((addr - sc->key.offset) < chunk_item->size && chunk_item->num_stripes > 0) {
|
|
ci = chunk_item;
|
|
offset = sc->key.offset;
|
|
cis = (CHUNK_ITEM_STRIPE*)&chunk_item[1];
|
|
|
|
devices = ExAllocatePoolWithTag(NonPagedPool, sizeof(device*) * ci->num_stripes, ALLOC_TAG);
|
|
if (!devices) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
for (i = 0; i < ci->num_stripes; i++) {
|
|
devices[i] = find_device_from_uuid(Vcb, &cis[i].dev_uuid);
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
if (!ci) {
|
|
ERR("could not find chunk for %I64x in bootstrap\n", addr);
|
|
return STATUS_INTERNAL_ERROR;
|
|
}
|
|
|
|
if (pc)
|
|
*pc = NULL;
|
|
}
|
|
|
|
if (ci->type & BLOCK_FLAG_DUPLICATE) {
|
|
type = BLOCK_FLAG_DUPLICATE;
|
|
allowed_missing = ci->num_stripes - 1;
|
|
} else if (ci->type & BLOCK_FLAG_RAID0) {
|
|
type = BLOCK_FLAG_RAID0;
|
|
allowed_missing = 0;
|
|
} else if (ci->type & BLOCK_FLAG_RAID1) {
|
|
type = BLOCK_FLAG_DUPLICATE;
|
|
allowed_missing = 1;
|
|
} else if (ci->type & BLOCK_FLAG_RAID10) {
|
|
type = BLOCK_FLAG_RAID10;
|
|
allowed_missing = 1;
|
|
} else if (ci->type & BLOCK_FLAG_RAID5) {
|
|
type = BLOCK_FLAG_RAID5;
|
|
allowed_missing = 1;
|
|
} else if (ci->type & BLOCK_FLAG_RAID6) {
|
|
type = BLOCK_FLAG_RAID6;
|
|
allowed_missing = 2;
|
|
} else if (ci->type & BLOCK_FLAG_RAID1C3) {
|
|
type = BLOCK_FLAG_DUPLICATE;
|
|
allowed_missing = 2;
|
|
} else if (ci->type & BLOCK_FLAG_RAID1C4) {
|
|
type = BLOCK_FLAG_DUPLICATE;
|
|
allowed_missing = 3;
|
|
} else { // SINGLE
|
|
type = BLOCK_FLAG_DUPLICATE;
|
|
allowed_missing = 0;
|
|
}
|
|
|
|
cis = (CHUNK_ITEM_STRIPE*)&ci[1];
|
|
|
|
RtlZeroMemory(&context, sizeof(read_data_context));
|
|
KeInitializeEvent(&context.Event, NotificationEvent, false);
|
|
|
|
context.stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe) * ci->num_stripes, ALLOC_TAG);
|
|
if (!context.stripes) {
|
|
ERR("out of memory\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6)) {
|
|
get_raid56_lock_range(c, addr, length, &lockaddr, &locklen);
|
|
chunk_lock_range(Vcb, c, lockaddr, locklen);
|
|
}
|
|
|
|
RtlZeroMemory(context.stripes, sizeof(read_data_stripe) * ci->num_stripes);
|
|
|
|
context.buflen = length;
|
|
context.num_stripes = ci->num_stripes;
|
|
context.stripes_left = context.num_stripes;
|
|
context.sector_size = Vcb->superblock.sector_size;
|
|
context.csum = csum;
|
|
context.tree = is_tree;
|
|
context.type = type;
|
|
|
|
if (type == BLOCK_FLAG_RAID0) {
|
|
uint64_t startoff, endoff;
|
|
uint16_t endoffstripe, stripe;
|
|
uint32_t *stripeoff, pos;
|
|
PMDL master_mdl;
|
|
PFN_NUMBER* pfns;
|
|
|
|
// FIXME - test this still works if page size isn't the same as sector size
|
|
|
|
// This relies on the fact that MDLs are followed in memory by the page file numbers,
|
|
// so with a bit of jiggery-pokery you can trick your disks into deinterlacing your RAID0
|
|
// data for you without doing a memcpy yourself.
|
|
// MDLs are officially opaque, so this might very well break in future versions of Windows.
|
|
|
|
get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes, &startoff, &startoffstripe);
|
|
get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes, &endoff, &endoffstripe);
|
|
|
|
if (file_read) {
|
|
// Unfortunately we can't avoid doing at least one memcpy, as Windows can give us an MDL
|
|
// with duplicated dummy PFNs, which confuse check_csum. Ah well.
|
|
// See https://msdn.microsoft.com/en-us/library/windows/hardware/Dn614012.aspx if you're interested.
|
|
|
|
context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
|
|
|
|
if (!context.va) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
} else
|
|
context.va = buf;
|
|
|
|
master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
|
|
if (!master_mdl) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
Status = STATUS_SUCCESS;
|
|
|
|
_SEH2_TRY {
|
|
MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
|
|
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
|
|
Status = _SEH2_GetExceptionCode();
|
|
} _SEH2_END;
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
|
|
IoFreeMdl(master_mdl);
|
|
goto exit;
|
|
}
|
|
|
|
pfns = (PFN_NUMBER*)(master_mdl + 1);
|
|
|
|
for (i = 0; i < ci->num_stripes; i++) {
|
|
if (startoffstripe > i)
|
|
context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
|
|
else if (startoffstripe == i)
|
|
context.stripes[i].stripestart = startoff;
|
|
else
|
|
context.stripes[i].stripestart = startoff - (startoff % ci->stripe_length);
|
|
|
|
if (endoffstripe > i)
|
|
context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
|
|
else if (endoffstripe == i)
|
|
context.stripes[i].stripeend = endoff + 1;
|
|
else
|
|
context.stripes[i].stripeend = endoff - (endoff % ci->stripe_length);
|
|
|
|
if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
|
|
context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
|
|
|
|
if (!context.stripes[i].mdl) {
|
|
ERR("IoAllocateMdl failed\n");
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
}
|
|
}
|
|
|
|
stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
|
|
if (!stripeoff) {
|
|
ERR("out of memory\n");
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
|
|
|
|
pos = 0;
|
|
stripe = startoffstripe;
|
|
while (pos < length) {
|
|
PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
|
|
|
|
if (pos == 0) {
|
|
uint32_t readlen = (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length));
|
|
|
|
RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
|
|
|
|
stripeoff[stripe] += readlen;
|
|
pos += readlen;
|
|
} else if (length - pos < ci->stripe_length) {
|
|
RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
|
|
|
|
pos = length;
|
|
} else {
|
|
RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
|
|
|
|
stripeoff[stripe] += (uint32_t)ci->stripe_length;
|
|
pos += (uint32_t)ci->stripe_length;
|
|
}
|
|
|
|
stripe = (stripe + 1) % ci->num_stripes;
|
|
}
|
|
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
|
|
ExFreePool(stripeoff);
|
|
} else if (type == BLOCK_FLAG_RAID10) {
|
|
uint64_t startoff, endoff;
|
|
uint16_t endoffstripe, j, stripe;
|
|
ULONG orig_ls;
|
|
PMDL master_mdl;
|
|
PFN_NUMBER* pfns;
|
|
uint32_t* stripeoff, pos;
|
|
read_data_stripe** stripes;
|
|
|
|
if (c)
|
|
orig_ls = c->last_stripe;
|
|
else
|
|
orig_ls = 0;
|
|
|
|
get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &startoff, &startoffstripe);
|
|
get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes / ci->sub_stripes, &endoff, &endoffstripe);
|
|
|
|
if ((ci->num_stripes % ci->sub_stripes) != 0) {
|
|
ERR("chunk %I64x: num_stripes %x was not a multiple of sub_stripes %x!\n", offset, ci->num_stripes, ci->sub_stripes);
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto exit;
|
|
}
|
|
|
|
if (file_read) {
|
|
context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
|
|
|
|
if (!context.va) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
} else
|
|
context.va = buf;
|
|
|
|
context.firstoff = (uint16_t)((startoff % ci->stripe_length) >> Vcb->sector_shift);
|
|
context.startoffstripe = startoffstripe;
|
|
context.sectors_per_stripe = (uint16_t)(ci->stripe_length >> Vcb->sector_shift);
|
|
|
|
startoffstripe *= ci->sub_stripes;
|
|
endoffstripe *= ci->sub_stripes;
|
|
|
|
if (c)
|
|
c->last_stripe = (orig_ls + 1) % ci->sub_stripes;
|
|
|
|
master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
|
|
if (!master_mdl) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
Status = STATUS_SUCCESS;
|
|
|
|
_SEH2_TRY {
|
|
MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
|
|
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
|
|
Status = _SEH2_GetExceptionCode();
|
|
} _SEH2_END;
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
|
|
IoFreeMdl(master_mdl);
|
|
goto exit;
|
|
}
|
|
|
|
pfns = (PFN_NUMBER*)(master_mdl + 1);
|
|
|
|
stripes = ExAllocatePoolWithTag(NonPagedPool, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
|
|
if (!stripes) {
|
|
ERR("out of memory\n");
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
RtlZeroMemory(stripes, sizeof(read_data_stripe*) * ci->num_stripes / ci->sub_stripes);
|
|
|
|
for (i = 0; i < ci->num_stripes; i += ci->sub_stripes) {
|
|
uint64_t sstart, send;
|
|
bool stripeset = false;
|
|
|
|
if (startoffstripe > i)
|
|
sstart = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
|
|
else if (startoffstripe == i)
|
|
sstart = startoff;
|
|
else
|
|
sstart = startoff - (startoff % ci->stripe_length);
|
|
|
|
if (endoffstripe > i)
|
|
send = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
|
|
else if (endoffstripe == i)
|
|
send = endoff + 1;
|
|
else
|
|
send = endoff - (endoff % ci->stripe_length);
|
|
|
|
for (j = 0; j < ci->sub_stripes; j++) {
|
|
if (j == orig_ls && devices[i+j] && devices[i+j]->devobj) {
|
|
context.stripes[i+j].stripestart = sstart;
|
|
context.stripes[i+j].stripeend = send;
|
|
stripes[i / ci->sub_stripes] = &context.stripes[i+j];
|
|
|
|
if (sstart != send) {
|
|
context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
|
|
|
|
if (!context.stripes[i+j].mdl) {
|
|
ERR("IoAllocateMdl failed\n");
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
}
|
|
|
|
stripeset = true;
|
|
} else
|
|
context.stripes[i+j].status = ReadDataStatus_Skip;
|
|
}
|
|
|
|
if (!stripeset) {
|
|
for (j = 0; j < ci->sub_stripes; j++) {
|
|
if (devices[i+j] && devices[i+j]->devobj) {
|
|
context.stripes[i+j].stripestart = sstart;
|
|
context.stripes[i+j].stripeend = send;
|
|
context.stripes[i+j].status = ReadDataStatus_Pending;
|
|
stripes[i / ci->sub_stripes] = &context.stripes[i+j];
|
|
|
|
if (sstart != send) {
|
|
context.stripes[i+j].mdl = IoAllocateMdl(context.va, (ULONG)(send - sstart), false, false, NULL);
|
|
|
|
if (!context.stripes[i+j].mdl) {
|
|
ERR("IoAllocateMdl failed\n");
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
}
|
|
|
|
stripeset = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!stripeset) {
|
|
ERR("could not find stripe to read\n");
|
|
Status = STATUS_DEVICE_NOT_READY;
|
|
goto exit;
|
|
}
|
|
}
|
|
}
|
|
|
|
stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes, ALLOC_TAG);
|
|
if (!stripeoff) {
|
|
ERR("out of memory\n");
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes / ci->sub_stripes);
|
|
|
|
pos = 0;
|
|
stripe = startoffstripe / ci->sub_stripes;
|
|
while (pos < length) {
|
|
PFN_NUMBER* stripe_pfns = (PFN_NUMBER*)(stripes[stripe]->mdl + 1);
|
|
|
|
if (pos == 0) {
|
|
uint32_t readlen = (uint32_t)min(stripes[stripe]->stripeend - stripes[stripe]->stripestart,
|
|
ci->stripe_length - (stripes[stripe]->stripestart % ci->stripe_length));
|
|
|
|
RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
|
|
|
|
stripeoff[stripe] += readlen;
|
|
pos += readlen;
|
|
} else if (length - pos < ci->stripe_length) {
|
|
RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (length - pos) * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
|
|
|
|
pos = length;
|
|
} else {
|
|
RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
|
|
|
|
stripeoff[stripe] += (ULONG)ci->stripe_length;
|
|
pos += (ULONG)ci->stripe_length;
|
|
}
|
|
|
|
stripe = (stripe + 1) % (ci->num_stripes / ci->sub_stripes);
|
|
}
|
|
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
|
|
ExFreePool(stripeoff);
|
|
ExFreePool(stripes);
|
|
} else if (type == BLOCK_FLAG_DUPLICATE) {
|
|
uint64_t orig_ls;
|
|
|
|
if (c)
|
|
orig_ls = i = c->last_stripe;
|
|
else
|
|
orig_ls = i = 0;
|
|
|
|
while (!devices[i] || !devices[i]->devobj) {
|
|
i = (i + 1) % ci->num_stripes;
|
|
|
|
if (i == orig_ls) {
|
|
ERR("no devices available to service request\n");
|
|
Status = STATUS_DEVICE_NOT_READY;
|
|
goto exit;
|
|
}
|
|
}
|
|
|
|
if (c)
|
|
c->last_stripe = (i + 1) % ci->num_stripes;
|
|
|
|
context.stripes[i].stripestart = addr - offset;
|
|
context.stripes[i].stripeend = context.stripes[i].stripestart + length;
|
|
|
|
if (file_read) {
|
|
context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
|
|
|
|
if (!context.va) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
context.stripes[i].mdl = IoAllocateMdl(context.va, length, false, false, NULL);
|
|
if (!context.stripes[i].mdl) {
|
|
ERR("IoAllocateMdl failed\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
MmBuildMdlForNonPagedPool(context.stripes[i].mdl);
|
|
} else {
|
|
context.stripes[i].mdl = IoAllocateMdl(buf, length, false, false, NULL);
|
|
|
|
if (!context.stripes[i].mdl) {
|
|
ERR("IoAllocateMdl failed\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
Status = STATUS_SUCCESS;
|
|
|
|
_SEH2_TRY {
|
|
MmProbeAndLockPages(context.stripes[i].mdl, KernelMode, IoWriteAccess);
|
|
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
|
|
Status = _SEH2_GetExceptionCode();
|
|
} _SEH2_END;
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
|
|
goto exit;
|
|
}
|
|
}
|
|
} else if (type == BLOCK_FLAG_RAID5) {
|
|
uint64_t startoff, endoff;
|
|
uint16_t endoffstripe, parity;
|
|
uint32_t *stripeoff, pos;
|
|
PMDL master_mdl;
|
|
PFN_NUMBER *pfns, dummy = 0;
|
|
bool need_dummy = false;
|
|
|
|
get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 1, &startoff, &startoffstripe);
|
|
get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 1, &endoff, &endoffstripe);
|
|
|
|
if (file_read) {
|
|
context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
|
|
|
|
if (!context.va) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
} else
|
|
context.va = buf;
|
|
|
|
master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
|
|
if (!master_mdl) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
Status = STATUS_SUCCESS;
|
|
|
|
_SEH2_TRY {
|
|
MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
|
|
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
|
|
Status = _SEH2_GetExceptionCode();
|
|
} _SEH2_END;
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
|
|
IoFreeMdl(master_mdl);
|
|
goto exit;
|
|
}
|
|
|
|
pfns = (PFN_NUMBER*)(master_mdl + 1);
|
|
|
|
pos = 0;
|
|
while (pos < length) {
|
|
parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
|
|
|
|
if (pos == 0) {
|
|
uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
|
|
ULONG skip, readlen;
|
|
|
|
i = startoffstripe;
|
|
while (stripe != parity) {
|
|
if (i == startoffstripe) {
|
|
readlen = min(length, (ULONG)(ci->stripe_length - (startoff % ci->stripe_length)));
|
|
|
|
context.stripes[stripe].stripestart = startoff;
|
|
context.stripes[stripe].stripeend = startoff + readlen;
|
|
|
|
pos += readlen;
|
|
|
|
if (pos == length)
|
|
break;
|
|
} else {
|
|
readlen = min(length - pos, (ULONG)ci->stripe_length);
|
|
|
|
context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
|
|
context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
|
|
|
|
pos += readlen;
|
|
|
|
if (pos == length)
|
|
break;
|
|
}
|
|
|
|
i++;
|
|
stripe = (stripe + 1) % ci->num_stripes;
|
|
}
|
|
|
|
if (pos == length)
|
|
break;
|
|
|
|
for (i = 0; i < startoffstripe; i++) {
|
|
uint16_t stripe2 = (parity + i + 1) % ci->num_stripes;
|
|
|
|
context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
|
|
}
|
|
|
|
context.stripes[parity].stripestart = context.stripes[parity].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
|
|
|
|
if (length - pos > ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length) {
|
|
skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 1) * ci->stripe_length)) - 1);
|
|
|
|
for (i = 0; i < ci->num_stripes; i++) {
|
|
context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
|
|
}
|
|
|
|
pos += (uint32_t)(skip * (ci->num_stripes - 1) * ci->num_stripes * ci->stripe_length);
|
|
need_dummy = true;
|
|
}
|
|
} else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
|
|
for (i = 0; i < ci->num_stripes; i++) {
|
|
context.stripes[i].stripeend += ci->stripe_length;
|
|
}
|
|
|
|
pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 1));
|
|
need_dummy = true;
|
|
} else {
|
|
uint16_t stripe = (parity + 1) % ci->num_stripes;
|
|
|
|
i = 0;
|
|
while (stripe != parity) {
|
|
if (endoffstripe == i) {
|
|
context.stripes[stripe].stripeend = endoff + 1;
|
|
break;
|
|
} else if (endoffstripe > i)
|
|
context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
|
|
|
|
i++;
|
|
stripe = (stripe + 1) % ci->num_stripes;
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < ci->num_stripes; i++) {
|
|
if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
|
|
context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart),
|
|
false, false, NULL);
|
|
|
|
if (!context.stripes[i].mdl) {
|
|
ERR("IoAllocateMdl failed\n");
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (need_dummy) {
|
|
dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
|
|
if (!dummypage) {
|
|
ERR("out of memory\n");
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
|
|
if (!dummy_mdl) {
|
|
ERR("IoAllocateMdl failed\n");
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
MmBuildMdlForNonPagedPool(dummy_mdl);
|
|
|
|
dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
|
|
}
|
|
|
|
stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
|
|
if (!stripeoff) {
|
|
ERR("out of memory\n");
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
|
|
|
|
pos = 0;
|
|
|
|
while (pos < length) {
|
|
PFN_NUMBER* stripe_pfns;
|
|
|
|
parity = (((addr - offset + pos) / ((ci->num_stripes - 1) * ci->stripe_length)) + ci->num_stripes - 1) % ci->num_stripes;
|
|
|
|
if (pos == 0) {
|
|
uint16_t stripe = (parity + startoffstripe + 1) % ci->num_stripes;
|
|
uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
|
|
ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
|
|
|
|
stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
|
|
|
|
RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
|
|
|
|
stripeoff[stripe] = readlen;
|
|
pos += readlen;
|
|
|
|
stripe = (stripe + 1) % ci->num_stripes;
|
|
|
|
while (stripe != parity) {
|
|
stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
|
|
readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
|
|
|
|
if (readlen == 0)
|
|
break;
|
|
|
|
RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
|
|
|
|
stripeoff[stripe] = readlen;
|
|
pos += readlen;
|
|
|
|
stripe = (stripe + 1) % ci->num_stripes;
|
|
}
|
|
} else if (length - pos >= ci->stripe_length * (ci->num_stripes - 1)) {
|
|
uint16_t stripe = (parity + 1) % ci->num_stripes;
|
|
ULONG k;
|
|
|
|
while (stripe != parity) {
|
|
stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
|
|
|
|
RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
|
|
|
|
stripeoff[stripe] += (uint32_t)ci->stripe_length;
|
|
pos += (uint32_t)ci->stripe_length;
|
|
|
|
stripe = (stripe + 1) % ci->num_stripes;
|
|
}
|
|
|
|
stripe_pfns = (PFN_NUMBER*)(context.stripes[parity].mdl + 1);
|
|
|
|
for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
|
|
stripe_pfns[stripeoff[parity] >> PAGE_SHIFT] = dummy;
|
|
stripeoff[parity] += PAGE_SIZE;
|
|
}
|
|
} else {
|
|
uint16_t stripe = (parity + 1) % ci->num_stripes;
|
|
uint32_t readlen;
|
|
|
|
while (pos < length) {
|
|
stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
|
|
readlen = min(length - pos, (ULONG)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
|
|
|
|
if (readlen == 0)
|
|
break;
|
|
|
|
RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
|
|
|
|
stripeoff[stripe] += readlen;
|
|
pos += readlen;
|
|
|
|
stripe = (stripe + 1) % ci->num_stripes;
|
|
}
|
|
}
|
|
}
|
|
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
|
|
ExFreePool(stripeoff);
|
|
} else if (type == BLOCK_FLAG_RAID6) {
|
|
uint64_t startoff, endoff;
|
|
uint16_t endoffstripe, parity1;
|
|
uint32_t *stripeoff, pos;
|
|
PMDL master_mdl;
|
|
PFN_NUMBER *pfns, dummy = 0;
|
|
bool need_dummy = false;
|
|
|
|
get_raid0_offset(addr - offset, ci->stripe_length, ci->num_stripes - 2, &startoff, &startoffstripe);
|
|
get_raid0_offset(addr + length - offset - 1, ci->stripe_length, ci->num_stripes - 2, &endoff, &endoffstripe);
|
|
|
|
if (file_read) {
|
|
context.va = ExAllocatePoolWithTag(NonPagedPool, length, ALLOC_TAG);
|
|
|
|
if (!context.va) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
} else
|
|
context.va = buf;
|
|
|
|
master_mdl = IoAllocateMdl(context.va, length, false, false, NULL);
|
|
if (!master_mdl) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
Status = STATUS_SUCCESS;
|
|
|
|
_SEH2_TRY {
|
|
MmProbeAndLockPages(master_mdl, KernelMode, IoWriteAccess);
|
|
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
|
|
Status = _SEH2_GetExceptionCode();
|
|
} _SEH2_END;
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("MmProbeAndLockPages threw exception %08lx\n", Status);
|
|
IoFreeMdl(master_mdl);
|
|
goto exit;
|
|
}
|
|
|
|
pfns = (PFN_NUMBER*)(master_mdl + 1);
|
|
|
|
pos = 0;
|
|
while (pos < length) {
|
|
parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
|
|
|
|
if (pos == 0) {
|
|
uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes, parity2;
|
|
ULONG skip, readlen;
|
|
|
|
i = startoffstripe;
|
|
while (stripe != parity1) {
|
|
if (i == startoffstripe) {
|
|
readlen = (ULONG)min(length, ci->stripe_length - (startoff % ci->stripe_length));
|
|
|
|
context.stripes[stripe].stripestart = startoff;
|
|
context.stripes[stripe].stripeend = startoff + readlen;
|
|
|
|
pos += readlen;
|
|
|
|
if (pos == length)
|
|
break;
|
|
} else {
|
|
readlen = min(length - pos, (ULONG)ci->stripe_length);
|
|
|
|
context.stripes[stripe].stripestart = startoff - (startoff % ci->stripe_length);
|
|
context.stripes[stripe].stripeend = context.stripes[stripe].stripestart + readlen;
|
|
|
|
pos += readlen;
|
|
|
|
if (pos == length)
|
|
break;
|
|
}
|
|
|
|
i++;
|
|
stripe = (stripe + 1) % ci->num_stripes;
|
|
}
|
|
|
|
if (pos == length)
|
|
break;
|
|
|
|
for (i = 0; i < startoffstripe; i++) {
|
|
uint16_t stripe2 = (parity1 + i + 2) % ci->num_stripes;
|
|
|
|
context.stripes[stripe2].stripestart = context.stripes[stripe2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
|
|
}
|
|
|
|
context.stripes[parity1].stripestart = context.stripes[parity1].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
|
|
|
|
parity2 = (parity1 + 1) % ci->num_stripes;
|
|
context.stripes[parity2].stripestart = context.stripes[parity2].stripeend = startoff - (startoff % ci->stripe_length) + ci->stripe_length;
|
|
|
|
if (length - pos > ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length) {
|
|
skip = (ULONG)(((length - pos) / (ci->num_stripes * (ci->num_stripes - 2) * ci->stripe_length)) - 1);
|
|
|
|
for (i = 0; i < ci->num_stripes; i++) {
|
|
context.stripes[i].stripeend += skip * ci->num_stripes * ci->stripe_length;
|
|
}
|
|
|
|
pos += (uint32_t)(skip * (ci->num_stripes - 2) * ci->num_stripes * ci->stripe_length);
|
|
need_dummy = true;
|
|
}
|
|
} else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
|
|
for (i = 0; i < ci->num_stripes; i++) {
|
|
context.stripes[i].stripeend += ci->stripe_length;
|
|
}
|
|
|
|
pos += (uint32_t)(ci->stripe_length * (ci->num_stripes - 2));
|
|
need_dummy = true;
|
|
} else {
|
|
uint16_t stripe = (parity1 + 2) % ci->num_stripes;
|
|
|
|
i = 0;
|
|
while (stripe != parity1) {
|
|
if (endoffstripe == i) {
|
|
context.stripes[stripe].stripeend = endoff + 1;
|
|
break;
|
|
} else if (endoffstripe > i)
|
|
context.stripes[stripe].stripeend = endoff - (endoff % ci->stripe_length) + ci->stripe_length;
|
|
|
|
i++;
|
|
stripe = (stripe + 1) % ci->num_stripes;
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < ci->num_stripes; i++) {
|
|
if (context.stripes[i].stripestart != context.stripes[i].stripeend) {
|
|
context.stripes[i].mdl = IoAllocateMdl(context.va, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), false, false, NULL);
|
|
|
|
if (!context.stripes[i].mdl) {
|
|
ERR("IoAllocateMdl failed\n");
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (need_dummy) {
|
|
dummypage = ExAllocatePoolWithTag(NonPagedPool, PAGE_SIZE, ALLOC_TAG);
|
|
if (!dummypage) {
|
|
ERR("out of memory\n");
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
dummy_mdl = IoAllocateMdl(dummypage, PAGE_SIZE, false, false, NULL);
|
|
if (!dummy_mdl) {
|
|
ERR("IoAllocateMdl failed\n");
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
MmBuildMdlForNonPagedPool(dummy_mdl);
|
|
|
|
dummy = *(PFN_NUMBER*)(dummy_mdl + 1);
|
|
}
|
|
|
|
stripeoff = ExAllocatePoolWithTag(NonPagedPool, sizeof(uint32_t) * ci->num_stripes, ALLOC_TAG);
|
|
if (!stripeoff) {
|
|
ERR("out of memory\n");
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
RtlZeroMemory(stripeoff, sizeof(uint32_t) * ci->num_stripes);
|
|
|
|
pos = 0;
|
|
|
|
while (pos < length) {
|
|
PFN_NUMBER* stripe_pfns;
|
|
|
|
parity1 = (((addr - offset + pos) / ((ci->num_stripes - 2) * ci->stripe_length)) + ci->num_stripes - 2) % ci->num_stripes;
|
|
|
|
if (pos == 0) {
|
|
uint16_t stripe = (parity1 + startoffstripe + 2) % ci->num_stripes;
|
|
uint32_t readlen = min(length - pos, (uint32_t)min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart,
|
|
ci->stripe_length - (context.stripes[stripe].stripestart % ci->stripe_length)));
|
|
|
|
stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
|
|
|
|
RtlCopyMemory(stripe_pfns, pfns, readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
|
|
|
|
stripeoff[stripe] = readlen;
|
|
pos += readlen;
|
|
|
|
stripe = (stripe + 1) % ci->num_stripes;
|
|
|
|
while (stripe != parity1) {
|
|
stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
|
|
readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
|
|
|
|
if (readlen == 0)
|
|
break;
|
|
|
|
RtlCopyMemory(stripe_pfns, &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
|
|
|
|
stripeoff[stripe] = readlen;
|
|
pos += readlen;
|
|
|
|
stripe = (stripe + 1) % ci->num_stripes;
|
|
}
|
|
} else if (length - pos >= ci->stripe_length * (ci->num_stripes - 2)) {
|
|
uint16_t stripe = (parity1 + 2) % ci->num_stripes;
|
|
uint16_t parity2 = (parity1 + 1) % ci->num_stripes;
|
|
ULONG k;
|
|
|
|
while (stripe != parity1) {
|
|
stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
|
|
|
|
RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], (ULONG)(ci->stripe_length * sizeof(PFN_NUMBER) >> PAGE_SHIFT));
|
|
|
|
stripeoff[stripe] += (uint32_t)ci->stripe_length;
|
|
pos += (uint32_t)ci->stripe_length;
|
|
|
|
stripe = (stripe + 1) % ci->num_stripes;
|
|
}
|
|
|
|
stripe_pfns = (PFN_NUMBER*)(context.stripes[parity1].mdl + 1);
|
|
|
|
for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
|
|
stripe_pfns[stripeoff[parity1] >> PAGE_SHIFT] = dummy;
|
|
stripeoff[parity1] += PAGE_SIZE;
|
|
}
|
|
|
|
stripe_pfns = (PFN_NUMBER*)(context.stripes[parity2].mdl + 1);
|
|
|
|
for (k = 0; k < ci->stripe_length >> PAGE_SHIFT; k++) {
|
|
stripe_pfns[stripeoff[parity2] >> PAGE_SHIFT] = dummy;
|
|
stripeoff[parity2] += PAGE_SIZE;
|
|
}
|
|
} else {
|
|
uint16_t stripe = (parity1 + 2) % ci->num_stripes;
|
|
uint32_t readlen;
|
|
|
|
while (pos < length) {
|
|
stripe_pfns = (PFN_NUMBER*)(context.stripes[stripe].mdl + 1);
|
|
readlen = (uint32_t)min(length - pos, min(context.stripes[stripe].stripeend - context.stripes[stripe].stripestart, ci->stripe_length));
|
|
|
|
if (readlen == 0)
|
|
break;
|
|
|
|
RtlCopyMemory(&stripe_pfns[stripeoff[stripe] >> PAGE_SHIFT], &pfns[pos >> PAGE_SHIFT], readlen * sizeof(PFN_NUMBER) >> PAGE_SHIFT);
|
|
|
|
stripeoff[stripe] += readlen;
|
|
pos += readlen;
|
|
|
|
stripe = (stripe + 1) % ci->num_stripes;
|
|
}
|
|
}
|
|
}
|
|
|
|
MmUnlockPages(master_mdl);
|
|
IoFreeMdl(master_mdl);
|
|
|
|
ExFreePool(stripeoff);
|
|
}
|
|
|
|
context.address = addr;
|
|
|
|
for (i = 0; i < ci->num_stripes; i++) {
|
|
if (!devices[i] || !devices[i]->devobj || context.stripes[i].stripestart == context.stripes[i].stripeend) {
|
|
context.stripes[i].status = ReadDataStatus_MissingDevice;
|
|
context.stripes_left--;
|
|
|
|
if (!devices[i] || !devices[i]->devobj)
|
|
missing_devices++;
|
|
}
|
|
}
|
|
|
|
if (missing_devices > allowed_missing) {
|
|
ERR("not enough devices to service request (%u missing)\n", missing_devices);
|
|
Status = STATUS_UNEXPECTED_IO_ERROR;
|
|
goto exit;
|
|
}
|
|
|
|
for (i = 0; i < ci->num_stripes; i++) {
|
|
PIO_STACK_LOCATION IrpSp;
|
|
|
|
if (devices[i] && devices[i]->devobj && context.stripes[i].stripestart != context.stripes[i].stripeend && context.stripes[i].status != ReadDataStatus_Skip) {
|
|
context.stripes[i].context = (struct read_data_context*)&context;
|
|
|
|
if (type == BLOCK_FLAG_RAID10) {
|
|
context.stripes[i].stripenum = i / ci->sub_stripes;
|
|
}
|
|
|
|
if (!Irp) {
|
|
context.stripes[i].Irp = IoAllocateIrp(devices[i]->devobj->StackSize, false);
|
|
|
|
if (!context.stripes[i].Irp) {
|
|
ERR("IoAllocateIrp failed\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
} else {
|
|
context.stripes[i].Irp = IoMakeAssociatedIrp(Irp, devices[i]->devobj->StackSize);
|
|
|
|
if (!context.stripes[i].Irp) {
|
|
ERR("IoMakeAssociatedIrp failed\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
}
|
|
|
|
IrpSp = IoGetNextIrpStackLocation(context.stripes[i].Irp);
|
|
IrpSp->MajorFunction = IRP_MJ_READ;
|
|
IrpSp->MinorFunction = IRP_MN_NORMAL;
|
|
IrpSp->FileObject = devices[i]->fileobj;
|
|
|
|
if (devices[i]->devobj->Flags & DO_BUFFERED_IO) {
|
|
context.stripes[i].Irp->AssociatedIrp.SystemBuffer = ExAllocatePoolWithTag(NonPagedPool, (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart), ALLOC_TAG);
|
|
if (!context.stripes[i].Irp->AssociatedIrp.SystemBuffer) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
context.stripes[i].Irp->Flags |= IRP_BUFFERED_IO | IRP_DEALLOCATE_BUFFER | IRP_INPUT_OPERATION;
|
|
|
|
context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
|
|
} else if (devices[i]->devobj->Flags & DO_DIRECT_IO)
|
|
context.stripes[i].Irp->MdlAddress = context.stripes[i].mdl;
|
|
else
|
|
context.stripes[i].Irp->UserBuffer = MmGetSystemAddressForMdlSafe(context.stripes[i].mdl, priority);
|
|
|
|
IrpSp->Parameters.Read.Length = (ULONG)(context.stripes[i].stripeend - context.stripes[i].stripestart);
|
|
IrpSp->Parameters.Read.ByteOffset.QuadPart = context.stripes[i].stripestart + cis[i].offset;
|
|
|
|
total_reading += IrpSp->Parameters.Read.Length;
|
|
|
|
context.stripes[i].Irp->UserIosb = &context.stripes[i].iosb;
|
|
|
|
IoSetCompletionRoutine(context.stripes[i].Irp, read_data_completion, &context.stripes[i], true, true, true);
|
|
|
|
context.stripes[i].status = ReadDataStatus_Pending;
|
|
}
|
|
}
|
|
|
|
need_to_wait = false;
|
|
for (i = 0; i < ci->num_stripes; i++) {
|
|
if (context.stripes[i].status != ReadDataStatus_MissingDevice && context.stripes[i].status != ReadDataStatus_Skip) {
|
|
IoCallDriver(devices[i]->devobj, context.stripes[i].Irp);
|
|
need_to_wait = true;
|
|
}
|
|
}
|
|
|
|
if (need_to_wait)
|
|
KeWaitForSingleObject(&context.Event, Executive, KernelMode, false, NULL);
|
|
|
|
if (diskacc)
|
|
fFsRtlUpdateDiskCounters(total_reading, 0);
|
|
|
|
// check if any of the devices return a "user-induced" error
|
|
|
|
for (i = 0; i < ci->num_stripes; i++) {
|
|
if (context.stripes[i].status == ReadDataStatus_Error && IoIsErrorUserInduced(context.stripes[i].iosb.Status)) {
|
|
Status = context.stripes[i].iosb.Status;
|
|
goto exit;
|
|
}
|
|
}
|
|
|
|
if (type == BLOCK_FLAG_RAID0) {
|
|
Status = read_data_raid0(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("read_data_raid0 returned %08lx\n", Status);
|
|
|
|
if (file_read)
|
|
ExFreePool(context.va);
|
|
|
|
goto exit;
|
|
}
|
|
|
|
if (file_read) {
|
|
RtlCopyMemory(buf, context.va, length);
|
|
ExFreePool(context.va);
|
|
}
|
|
} else if (type == BLOCK_FLAG_RAID10) {
|
|
Status = read_data_raid10(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, generation, offset);
|
|
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("read_data_raid10 returned %08lx\n", Status);
|
|
|
|
if (file_read)
|
|
ExFreePool(context.va);
|
|
|
|
goto exit;
|
|
}
|
|
|
|
if (file_read) {
|
|
RtlCopyMemory(buf, context.va, length);
|
|
ExFreePool(context.va);
|
|
}
|
|
} else if (type == BLOCK_FLAG_DUPLICATE) {
|
|
Status = read_data_dup(Vcb, file_read ? context.va : buf, addr, &context, ci, devices, generation);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("read_data_dup returned %08lx\n", Status);
|
|
|
|
if (file_read)
|
|
ExFreePool(context.va);
|
|
|
|
goto exit;
|
|
}
|
|
|
|
if (file_read) {
|
|
RtlCopyMemory(buf, context.va, length);
|
|
ExFreePool(context.va);
|
|
}
|
|
} else if (type == BLOCK_FLAG_RAID5) {
|
|
Status = read_data_raid5(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("read_data_raid5 returned %08lx\n", Status);
|
|
|
|
if (file_read)
|
|
ExFreePool(context.va);
|
|
|
|
goto exit;
|
|
}
|
|
|
|
if (file_read) {
|
|
RtlCopyMemory(buf, context.va, length);
|
|
ExFreePool(context.va);
|
|
}
|
|
} else if (type == BLOCK_FLAG_RAID6) {
|
|
Status = read_data_raid6(Vcb, file_read ? context.va : buf, addr, length, &context, ci, devices, offset, generation, c, missing_devices > 0 ? true : false);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("read_data_raid6 returned %08lx\n", Status);
|
|
|
|
if (file_read)
|
|
ExFreePool(context.va);
|
|
|
|
goto exit;
|
|
}
|
|
|
|
if (file_read) {
|
|
RtlCopyMemory(buf, context.va, length);
|
|
ExFreePool(context.va);
|
|
}
|
|
}
|
|
|
|
exit:
|
|
if (c && (type == BLOCK_FLAG_RAID5 || type == BLOCK_FLAG_RAID6))
|
|
chunk_unlock_range(Vcb, c, lockaddr, locklen);
|
|
|
|
if (dummy_mdl)
|
|
IoFreeMdl(dummy_mdl);
|
|
|
|
if (dummypage)
|
|
ExFreePool(dummypage);
|
|
|
|
for (i = 0; i < ci->num_stripes; i++) {
|
|
if (context.stripes[i].mdl) {
|
|
if (context.stripes[i].mdl->MdlFlags & MDL_PAGES_LOCKED)
|
|
MmUnlockPages(context.stripes[i].mdl);
|
|
|
|
IoFreeMdl(context.stripes[i].mdl);
|
|
}
|
|
|
|
if (context.stripes[i].Irp)
|
|
IoFreeIrp(context.stripes[i].Irp);
|
|
}
|
|
|
|
ExFreePool(context.stripes);
|
|
|
|
if (!Vcb->log_to_phys_loaded)
|
|
ExFreePool(devices);
|
|
|
|
return Status;
|
|
}
|
|
|
|
__attribute__((nonnull(1, 2)))
|
|
NTSTATUS read_stream(fcb* fcb, uint8_t* data, uint64_t start, ULONG length, ULONG* pbr) {
|
|
ULONG readlen;
|
|
|
|
TRACE("(%p, %p, %I64x, %lx, %p)\n", fcb, data, start, length, pbr);
|
|
|
|
if (pbr) *pbr = 0;
|
|
|
|
if (start >= fcb->adsdata.Length) {
|
|
TRACE("tried to read beyond end of stream\n");
|
|
return STATUS_END_OF_FILE;
|
|
}
|
|
|
|
if (length == 0) {
|
|
WARN("tried to read zero bytes\n");
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
if (start + length < fcb->adsdata.Length)
|
|
readlen = length;
|
|
else
|
|
readlen = fcb->adsdata.Length - (ULONG)start;
|
|
|
|
if (readlen > 0)
|
|
RtlCopyMemory(data, fcb->adsdata.Buffer + start, readlen);
|
|
|
|
if (pbr) *pbr = readlen;
|
|
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
typedef struct {
|
|
uint64_t off;
|
|
uint64_t ed_size;
|
|
uint64_t ed_offset;
|
|
uint64_t ed_num_bytes;
|
|
} read_part_extent;
|
|
|
|
typedef struct {
|
|
LIST_ENTRY list_entry;
|
|
uint64_t addr;
|
|
chunk* c;
|
|
uint32_t read;
|
|
uint32_t to_read;
|
|
void* csum;
|
|
bool csum_free;
|
|
uint8_t* buf;
|
|
bool buf_free;
|
|
uint32_t bumpoff;
|
|
bool mdl;
|
|
void* data;
|
|
uint8_t compression;
|
|
unsigned int num_extents;
|
|
read_part_extent extents[1];
|
|
} read_part;
|
|
|
|
typedef struct {
|
|
LIST_ENTRY list_entry;
|
|
calc_job* cj;
|
|
void* decomp;
|
|
void* data;
|
|
unsigned int offset;
|
|
size_t length;
|
|
} comp_calc_job;
|
|
|
|
__attribute__((nonnull(1, 2)))
|
|
NTSTATUS read_file(fcb* fcb, uint8_t* data, uint64_t start, uint64_t length, ULONG* pbr, PIRP Irp) {
|
|
NTSTATUS Status;
|
|
uint32_t bytes_read = 0;
|
|
uint64_t last_end;
|
|
LIST_ENTRY* le;
|
|
POOL_TYPE pool_type;
|
|
LIST_ENTRY read_parts, calc_jobs;
|
|
|
|
TRACE("(%p, %p, %I64x, %I64x, %p)\n", fcb, data, start, length, pbr);
|
|
|
|
if (pbr)
|
|
*pbr = 0;
|
|
|
|
if (start >= fcb->inode_item.st_size) {
|
|
WARN("Tried to read beyond end of file\n");
|
|
return STATUS_END_OF_FILE;
|
|
}
|
|
|
|
InitializeListHead(&read_parts);
|
|
InitializeListHead(&calc_jobs);
|
|
|
|
pool_type = fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? NonPagedPool : PagedPool;
|
|
|
|
le = fcb->extents.Flink;
|
|
|
|
last_end = start;
|
|
|
|
while (le != &fcb->extents) {
|
|
extent* ext = CONTAINING_RECORD(le, extent, list_entry);
|
|
|
|
if (!ext->ignore) {
|
|
EXTENT_DATA* ed = &ext->extent_data;
|
|
uint64_t len;
|
|
|
|
if (ed->type == EXTENT_TYPE_REGULAR || ed->type == EXTENT_TYPE_PREALLOC)
|
|
len = ((EXTENT_DATA2*)ed->data)->num_bytes;
|
|
else
|
|
len = ed->decoded_size;
|
|
|
|
if (ext->offset + len <= start) {
|
|
last_end = ext->offset + len;
|
|
goto nextitem;
|
|
}
|
|
|
|
if (ext->offset > last_end && ext->offset > start + bytes_read) {
|
|
uint32_t read = (uint32_t)min(length, ext->offset - max(start, last_end));
|
|
|
|
RtlZeroMemory(data + bytes_read, read);
|
|
bytes_read += read;
|
|
length -= read;
|
|
}
|
|
|
|
if (length == 0 || ext->offset > start + bytes_read + length)
|
|
break;
|
|
|
|
if (ed->encryption != BTRFS_ENCRYPTION_NONE) {
|
|
WARN("Encryption not supported\n");
|
|
Status = STATUS_NOT_IMPLEMENTED;
|
|
goto exit;
|
|
}
|
|
|
|
if (ed->encoding != BTRFS_ENCODING_NONE) {
|
|
WARN("Other encodings not supported\n");
|
|
Status = STATUS_NOT_IMPLEMENTED;
|
|
goto exit;
|
|
}
|
|
|
|
switch (ed->type) {
|
|
case EXTENT_TYPE_INLINE:
|
|
{
|
|
uint64_t off = start + bytes_read - ext->offset;
|
|
uint32_t read;
|
|
|
|
if (ed->compression == BTRFS_COMPRESSION_NONE) {
|
|
read = (uint32_t)min(min(len, ext->datalen) - off, length);
|
|
|
|
RtlCopyMemory(data + bytes_read, &ed->data[off], read);
|
|
} else if (ed->compression == BTRFS_COMPRESSION_ZLIB || ed->compression == BTRFS_COMPRESSION_LZO || ed->compression == BTRFS_COMPRESSION_ZSTD) {
|
|
uint8_t* decomp;
|
|
bool decomp_alloc;
|
|
uint16_t inlen = ext->datalen - (uint16_t)offsetof(EXTENT_DATA, data[0]);
|
|
|
|
if (ed->decoded_size == 0 || ed->decoded_size > 0xffffffff) {
|
|
ERR("ed->decoded_size was invalid (%I64x)\n", ed->decoded_size);
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto exit;
|
|
}
|
|
|
|
read = (uint32_t)min(ed->decoded_size - off, length);
|
|
|
|
if (off > 0) {
|
|
decomp = ExAllocatePoolWithTag(NonPagedPool, (uint32_t)ed->decoded_size, ALLOC_TAG);
|
|
if (!decomp) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
decomp_alloc = true;
|
|
} else {
|
|
decomp = data + bytes_read;
|
|
decomp_alloc = false;
|
|
}
|
|
|
|
if (ed->compression == BTRFS_COMPRESSION_ZLIB) {
|
|
Status = zlib_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("zlib_decompress returned %08lx\n", Status);
|
|
if (decomp_alloc) ExFreePool(decomp);
|
|
goto exit;
|
|
}
|
|
} else if (ed->compression == BTRFS_COMPRESSION_LZO) {
|
|
if (inlen < sizeof(uint32_t)) {
|
|
ERR("extent data was truncated\n");
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
if (decomp_alloc) ExFreePool(decomp);
|
|
goto exit;
|
|
} else
|
|
inlen -= sizeof(uint32_t);
|
|
|
|
Status = lzo_decompress(ed->data + sizeof(uint32_t), inlen, decomp, (uint32_t)(read + off), sizeof(uint32_t));
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("lzo_decompress returned %08lx\n", Status);
|
|
if (decomp_alloc) ExFreePool(decomp);
|
|
goto exit;
|
|
}
|
|
} else if (ed->compression == BTRFS_COMPRESSION_ZSTD) {
|
|
Status = zstd_decompress(ed->data, inlen, decomp, (uint32_t)(read + off));
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("zstd_decompress returned %08lx\n", Status);
|
|
if (decomp_alloc) ExFreePool(decomp);
|
|
goto exit;
|
|
}
|
|
}
|
|
|
|
if (decomp_alloc) {
|
|
RtlCopyMemory(data + bytes_read, decomp + off, read);
|
|
ExFreePool(decomp);
|
|
}
|
|
} else {
|
|
ERR("unhandled compression type %x\n", ed->compression);
|
|
Status = STATUS_NOT_IMPLEMENTED;
|
|
goto exit;
|
|
}
|
|
|
|
bytes_read += read;
|
|
length -= read;
|
|
|
|
break;
|
|
}
|
|
|
|
case EXTENT_TYPE_REGULAR:
|
|
{
|
|
EXTENT_DATA2* ed2 = (EXTENT_DATA2*)ed->data;
|
|
read_part* rp;
|
|
|
|
rp = ExAllocatePoolWithTag(pool_type, sizeof(read_part), ALLOC_TAG);
|
|
if (!rp) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
rp->mdl = (Irp && Irp->MdlAddress) ? true : false;
|
|
rp->extents[0].off = start + bytes_read - ext->offset;
|
|
rp->bumpoff = 0;
|
|
rp->num_extents = 1;
|
|
rp->csum_free = false;
|
|
|
|
rp->read = (uint32_t)(len - rp->extents[0].off);
|
|
if (rp->read > length) rp->read = (uint32_t)length;
|
|
|
|
if (ed->compression == BTRFS_COMPRESSION_NONE) {
|
|
rp->addr = ed2->address + ed2->offset + rp->extents[0].off;
|
|
rp->to_read = (uint32_t)sector_align(rp->read, fcb->Vcb->superblock.sector_size);
|
|
|
|
if (rp->addr & (fcb->Vcb->superblock.sector_size - 1)) {
|
|
rp->bumpoff = rp->addr & (fcb->Vcb->superblock.sector_size - 1);
|
|
rp->addr -= rp->bumpoff;
|
|
rp->to_read = (uint32_t)sector_align(rp->read + rp->bumpoff, fcb->Vcb->superblock.sector_size);
|
|
}
|
|
} else {
|
|
rp->addr = ed2->address;
|
|
rp->to_read = (uint32_t)sector_align(ed2->size, fcb->Vcb->superblock.sector_size);
|
|
}
|
|
|
|
if (ed->compression == BTRFS_COMPRESSION_NONE && (start & (fcb->Vcb->superblock.sector_size - 1)) == 0 &&
|
|
(length & (fcb->Vcb->superblock.sector_size - 1)) == 0) {
|
|
rp->buf = data + bytes_read;
|
|
rp->buf_free = false;
|
|
} else {
|
|
rp->buf = ExAllocatePoolWithTag(pool_type, rp->to_read, ALLOC_TAG);
|
|
rp->buf_free = true;
|
|
|
|
if (!rp->buf) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
ExFreePool(rp);
|
|
goto exit;
|
|
}
|
|
|
|
rp->mdl = false;
|
|
}
|
|
|
|
rp->c = get_chunk_from_address(fcb->Vcb, rp->addr);
|
|
|
|
if (!rp->c) {
|
|
ERR("get_chunk_from_address(%I64x) failed\n", rp->addr);
|
|
|
|
if (rp->buf_free)
|
|
ExFreePool(rp->buf);
|
|
|
|
ExFreePool(rp);
|
|
|
|
Status = STATUS_INTERNAL_ERROR;
|
|
goto exit;
|
|
}
|
|
|
|
if (ext->csum) {
|
|
if (ed->compression == BTRFS_COMPRESSION_NONE) {
|
|
rp->csum = (uint8_t*)ext->csum + (fcb->Vcb->csum_size * (rp->extents[0].off >> fcb->Vcb->sector_shift));
|
|
} else
|
|
rp->csum = ext->csum;
|
|
} else
|
|
rp->csum = NULL;
|
|
|
|
rp->data = data + bytes_read;
|
|
rp->compression = ed->compression;
|
|
rp->extents[0].ed_offset = ed2->offset;
|
|
rp->extents[0].ed_size = ed2->size;
|
|
rp->extents[0].ed_num_bytes = ed2->num_bytes;
|
|
|
|
InsertTailList(&read_parts, &rp->list_entry);
|
|
|
|
bytes_read += rp->read;
|
|
length -= rp->read;
|
|
|
|
break;
|
|
}
|
|
|
|
case EXTENT_TYPE_PREALLOC:
|
|
{
|
|
uint64_t off = start + bytes_read - ext->offset;
|
|
uint32_t read = (uint32_t)(len - off);
|
|
|
|
if (read > length) read = (uint32_t)length;
|
|
|
|
RtlZeroMemory(data + bytes_read, read);
|
|
|
|
bytes_read += read;
|
|
length -= read;
|
|
|
|
break;
|
|
}
|
|
|
|
default:
|
|
WARN("Unsupported extent data type %u\n", ed->type);
|
|
Status = STATUS_NOT_IMPLEMENTED;
|
|
goto exit;
|
|
}
|
|
|
|
last_end = ext->offset + len;
|
|
|
|
if (length == 0)
|
|
break;
|
|
}
|
|
|
|
nextitem:
|
|
le = le->Flink;
|
|
}
|
|
|
|
if (!IsListEmpty(&read_parts) && read_parts.Flink->Flink != &read_parts) { // at least two entries in list
|
|
read_part* last_rp = CONTAINING_RECORD(read_parts.Flink, read_part, list_entry);
|
|
|
|
le = read_parts.Flink->Flink;
|
|
while (le != &read_parts) {
|
|
LIST_ENTRY* le2 = le->Flink;
|
|
read_part* rp = CONTAINING_RECORD(le, read_part, list_entry);
|
|
|
|
// merge together runs
|
|
if (rp->compression != BTRFS_COMPRESSION_NONE && rp->compression == last_rp->compression && rp->addr == last_rp->addr + last_rp->to_read &&
|
|
rp->data == (uint8_t*)last_rp->data + last_rp->read && rp->c == last_rp->c && ((rp->csum && last_rp->csum) || (!rp->csum && !last_rp->csum))) {
|
|
read_part* rp2;
|
|
|
|
rp2 = ExAllocatePoolWithTag(pool_type, offsetof(read_part, extents) + (sizeof(read_part_extent) * (last_rp->num_extents + 1)), ALLOC_TAG);
|
|
|
|
rp2->addr = last_rp->addr;
|
|
rp2->c = last_rp->c;
|
|
rp2->read = last_rp->read + rp->read;
|
|
rp2->to_read = last_rp->to_read + rp->to_read;
|
|
rp2->csum_free = false;
|
|
|
|
if (last_rp->csum) {
|
|
uint32_t sectors = (last_rp->to_read + rp->to_read) >> fcb->Vcb->sector_shift;
|
|
|
|
rp2->csum = ExAllocatePoolWithTag(pool_type, sectors * fcb->Vcb->csum_size, ALLOC_TAG);
|
|
if (!rp2->csum) {
|
|
ERR("out of memory\n");
|
|
ExFreePool(rp2);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
RtlCopyMemory(rp2->csum, last_rp->csum, (last_rp->to_read * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift);
|
|
RtlCopyMemory((uint8_t*)rp2->csum + ((last_rp->to_read * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift), rp->csum,
|
|
(rp->to_read * fcb->Vcb->csum_size) >> fcb->Vcb->sector_shift);
|
|
|
|
rp2->csum_free = true;
|
|
} else
|
|
rp2->csum = NULL;
|
|
|
|
rp2->buf = ExAllocatePoolWithTag(pool_type, rp2->to_read, ALLOC_TAG);
|
|
if (!rp2->buf) {
|
|
ERR("out of memory\n");
|
|
|
|
if (rp2->csum)
|
|
ExFreePool(rp2->csum);
|
|
|
|
ExFreePool(rp2);
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
rp2->buf_free = true;
|
|
rp2->bumpoff = 0;
|
|
rp2->mdl = false;
|
|
rp2->data = last_rp->data;
|
|
rp2->compression = last_rp->compression;
|
|
rp2->num_extents = last_rp->num_extents + 1;
|
|
|
|
RtlCopyMemory(rp2->extents, last_rp->extents, last_rp->num_extents * sizeof(read_part_extent));
|
|
RtlCopyMemory(&rp2->extents[last_rp->num_extents], rp->extents, sizeof(read_part_extent));
|
|
|
|
InsertHeadList(le->Blink, &rp2->list_entry);
|
|
|
|
if (rp->buf_free)
|
|
ExFreePool(rp->buf);
|
|
|
|
if (rp->csum_free)
|
|
ExFreePool(rp->csum);
|
|
|
|
RemoveEntryList(&rp->list_entry);
|
|
|
|
ExFreePool(rp);
|
|
|
|
if (last_rp->buf_free)
|
|
ExFreePool(last_rp->buf);
|
|
|
|
if (last_rp->csum_free)
|
|
ExFreePool(last_rp->csum);
|
|
|
|
RemoveEntryList(&last_rp->list_entry);
|
|
|
|
ExFreePool(last_rp);
|
|
|
|
last_rp = rp2;
|
|
} else
|
|
last_rp = rp;
|
|
|
|
le = le2;
|
|
}
|
|
}
|
|
|
|
le = read_parts.Flink;
|
|
while (le != &read_parts) {
|
|
read_part* rp = CONTAINING_RECORD(le, read_part, list_entry);
|
|
|
|
Status = read_data(fcb->Vcb, rp->addr, rp->to_read, rp->csum, false, rp->buf, rp->c, NULL, Irp, 0, rp->mdl,
|
|
fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("read_data returned %08lx\n", Status);
|
|
goto exit;
|
|
}
|
|
|
|
if (rp->compression == BTRFS_COMPRESSION_NONE) {
|
|
if (rp->buf_free)
|
|
RtlCopyMemory(rp->data, rp->buf + rp->bumpoff, rp->read);
|
|
} else {
|
|
uint8_t* buf = rp->buf;
|
|
|
|
for (unsigned int i = 0; i < rp->num_extents; i++) {
|
|
uint8_t *decomp = NULL, *buf2;
|
|
ULONG outlen, inlen, off2;
|
|
uint32_t inpageoff = 0;
|
|
comp_calc_job* ccj;
|
|
|
|
off2 = (ULONG)(rp->extents[i].ed_offset + rp->extents[i].off);
|
|
buf2 = buf;
|
|
inlen = (ULONG)rp->extents[i].ed_size;
|
|
|
|
if (rp->compression == BTRFS_COMPRESSION_LZO) {
|
|
ULONG inoff = sizeof(uint32_t);
|
|
|
|
inlen -= sizeof(uint32_t);
|
|
|
|
// If reading a few sectors in, skip to the interesting bit
|
|
while (off2 > LZO_PAGE_SIZE) {
|
|
uint32_t partlen;
|
|
|
|
if (inlen < sizeof(uint32_t))
|
|
break;
|
|
|
|
partlen = *(uint32_t*)(buf2 + inoff);
|
|
|
|
if (partlen < inlen) {
|
|
off2 -= LZO_PAGE_SIZE;
|
|
inoff += partlen + sizeof(uint32_t);
|
|
inlen -= partlen + sizeof(uint32_t);
|
|
|
|
if (LZO_PAGE_SIZE - (inoff % LZO_PAGE_SIZE) < sizeof(uint32_t))
|
|
inoff = ((inoff / LZO_PAGE_SIZE) + 1) * LZO_PAGE_SIZE;
|
|
} else
|
|
break;
|
|
}
|
|
|
|
buf2 = &buf2[inoff];
|
|
inpageoff = inoff % LZO_PAGE_SIZE;
|
|
}
|
|
|
|
/* Previous versions of this code decompressed directly into the destination buffer,
|
|
* but unfortunately that can't be relied on - Windows likes to use dummy pages sometimes
|
|
* when mmap-ing, which breaks the backtracking used by e.g. zstd. */
|
|
|
|
if (off2 != 0)
|
|
outlen = off2 + min(rp->read, (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off));
|
|
else
|
|
outlen = min(rp->read, (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off));
|
|
|
|
decomp = ExAllocatePoolWithTag(pool_type, outlen, ALLOC_TAG);
|
|
if (!decomp) {
|
|
ERR("out of memory\n");
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
ccj = (comp_calc_job*)ExAllocatePoolWithTag(pool_type, sizeof(comp_calc_job), ALLOC_TAG);
|
|
if (!ccj) {
|
|
ERR("out of memory\n");
|
|
|
|
ExFreePool(decomp);
|
|
|
|
Status = STATUS_INSUFFICIENT_RESOURCES;
|
|
goto exit;
|
|
}
|
|
|
|
ccj->data = rp->data;
|
|
ccj->decomp = decomp;
|
|
|
|
ccj->offset = off2;
|
|
ccj->length = (size_t)min(rp->read, rp->extents[i].ed_num_bytes - rp->extents[i].off);
|
|
|
|
Status = add_calc_job_decomp(fcb->Vcb, rp->compression, buf2, inlen, decomp, outlen,
|
|
inpageoff, &ccj->cj);
|
|
if (!NT_SUCCESS(Status)) {
|
|
ERR("add_calc_job_decomp returned %08lx\n", Status);
|
|
|
|
ExFreePool(decomp);
|
|
ExFreePool(ccj);
|
|
|
|
goto exit;
|
|
}
|
|
|
|
InsertTailList(&calc_jobs, &ccj->list_entry);
|
|
|
|
buf += rp->extents[i].ed_size;
|
|
rp->data = (uint8_t*)rp->data + rp->extents[i].ed_num_bytes - rp->extents[i].off;
|
|
rp->read -= (uint32_t)(rp->extents[i].ed_num_bytes - rp->extents[i].off);
|
|
}
|
|
}
|
|
|
|
le = le->Flink;
|
|
}
|
|
|
|
if (length > 0 && start + bytes_read < fcb->inode_item.st_size) {
|
|
uint32_t read = (uint32_t)min(fcb->inode_item.st_size - start - bytes_read, length);
|
|
|
|
RtlZeroMemory(data + bytes_read, read);
|
|
|
|
bytes_read += read;
|
|
length -= read;
|
|
}
|
|
|
|
Status = STATUS_SUCCESS;
|
|
|
|
while (!IsListEmpty(&calc_jobs)) {
|
|
comp_calc_job* ccj = CONTAINING_RECORD(RemoveTailList(&calc_jobs), comp_calc_job, list_entry);
|
|
|
|
calc_thread_main(fcb->Vcb, ccj->cj);
|
|
|
|
KeWaitForSingleObject(&ccj->cj->event, Executive, KernelMode, false, NULL);
|
|
|
|
if (!NT_SUCCESS(ccj->cj->Status))
|
|
Status = ccj->cj->Status;
|
|
|
|
RtlCopyMemory(ccj->data, (uint8_t*)ccj->decomp + ccj->offset, ccj->length);
|
|
ExFreePool(ccj->decomp);
|
|
|
|
ExFreePool(ccj);
|
|
}
|
|
|
|
if (pbr)
|
|
*pbr = bytes_read;
|
|
|
|
exit:
|
|
while (!IsListEmpty(&read_parts)) {
|
|
read_part* rp = CONTAINING_RECORD(RemoveHeadList(&read_parts), read_part, list_entry);
|
|
|
|
if (rp->buf_free)
|
|
ExFreePool(rp->buf);
|
|
|
|
if (rp->csum_free)
|
|
ExFreePool(rp->csum);
|
|
|
|
ExFreePool(rp);
|
|
}
|
|
|
|
while (!IsListEmpty(&calc_jobs)) {
|
|
comp_calc_job* ccj = CONTAINING_RECORD(RemoveHeadList(&calc_jobs), comp_calc_job, list_entry);
|
|
|
|
KeWaitForSingleObject(&ccj->cj->event, Executive, KernelMode, false, NULL);
|
|
|
|
if (ccj->decomp)
|
|
ExFreePool(ccj->decomp);
|
|
|
|
ExFreePool(ccj->cj);
|
|
|
|
ExFreePool(ccj);
|
|
}
|
|
|
|
return Status;
|
|
}
|
|
|
|
NTSTATUS do_read(PIRP Irp, bool wait, ULONG* bytes_read) {
|
|
PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
|
|
PFILE_OBJECT FileObject = IrpSp->FileObject;
|
|
fcb* fcb = FileObject->FsContext;
|
|
uint8_t* data = NULL;
|
|
ULONG length = IrpSp->Parameters.Read.Length, addon = 0;
|
|
uint64_t start = IrpSp->Parameters.Read.ByteOffset.QuadPart;
|
|
|
|
*bytes_read = 0;
|
|
|
|
if (!fcb || !fcb->Vcb || !fcb->subvol)
|
|
return STATUS_INTERNAL_ERROR;
|
|
|
|
TRACE("fcb = %p\n", fcb);
|
|
TRACE("offset = %I64x, length = %lx\n", start, length);
|
|
TRACE("paging_io = %s, no cache = %s\n", Irp->Flags & IRP_PAGING_IO ? "true" : "false", Irp->Flags & IRP_NOCACHE ? "true" : "false");
|
|
|
|
if (!fcb->ads && fcb->type == BTRFS_TYPE_DIRECTORY)
|
|
return STATUS_INVALID_DEVICE_REQUEST;
|
|
|
|
if (!(Irp->Flags & IRP_PAGING_IO) && !FsRtlCheckLockForReadAccess(&fcb->lock, Irp)) {
|
|
WARN("tried to read locked region\n");
|
|
return STATUS_FILE_LOCK_CONFLICT;
|
|
}
|
|
|
|
if (length == 0) {
|
|
TRACE("tried to read zero bytes\n");
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
if (start >= (uint64_t)fcb->Header.FileSize.QuadPart) {
|
|
TRACE("tried to read with offset after file end (%I64x >= %I64x)\n", start, fcb->Header.FileSize.QuadPart);
|
|
return STATUS_END_OF_FILE;
|
|
}
|
|
|
|
TRACE("FileObject %p fcb %p FileSize = %I64x st_size = %I64x (%p)\n", FileObject, fcb, fcb->Header.FileSize.QuadPart, fcb->inode_item.st_size, &fcb->inode_item.st_size);
|
|
|
|
if (!(Irp->Flags & IRP_NOCACHE) && IrpSp->MinorFunction & IRP_MN_MDL) {
|
|
NTSTATUS Status = STATUS_SUCCESS;
|
|
|
|
_SEH2_TRY {
|
|
if (!FileObject->PrivateCacheMap) {
|
|
CC_FILE_SIZES ccfs;
|
|
|
|
ccfs.AllocationSize = fcb->Header.AllocationSize;
|
|
ccfs.FileSize = fcb->Header.FileSize;
|
|
ccfs.ValidDataLength = fcb->Header.ValidDataLength;
|
|
|
|
init_file_cache(FileObject, &ccfs);
|
|
}
|
|
|
|
CcMdlRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, &Irp->MdlAddress, &Irp->IoStatus);
|
|
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
|
|
Status = _SEH2_GetExceptionCode();
|
|
} _SEH2_END;
|
|
|
|
if (NT_SUCCESS(Status)) {
|
|
Status = Irp->IoStatus.Status;
|
|
Irp->IoStatus.Information += addon;
|
|
*bytes_read = (ULONG)Irp->IoStatus.Information;
|
|
} else
|
|
ERR("EXCEPTION - %08lx\n", Status);
|
|
|
|
return Status;
|
|
}
|
|
|
|
data = map_user_buffer(Irp, fcb->Header.Flags2 & FSRTL_FLAG2_IS_PAGING_FILE ? HighPagePriority : NormalPagePriority);
|
|
|
|
if (Irp->MdlAddress && !data) {
|
|
ERR("MmGetSystemAddressForMdlSafe returned NULL\n");
|
|
return STATUS_INSUFFICIENT_RESOURCES;
|
|
}
|
|
|
|
if (start >= (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
|
|
length = (ULONG)min(length, min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
|
|
RtlZeroMemory(data, length);
|
|
Irp->IoStatus.Information = *bytes_read = length;
|
|
return STATUS_SUCCESS;
|
|
}
|
|
|
|
if (length + start > (uint64_t)fcb->Header.ValidDataLength.QuadPart) {
|
|
addon = (ULONG)(min(start + length, (uint64_t)fcb->Header.FileSize.QuadPart) - fcb->Header.ValidDataLength.QuadPart);
|
|
RtlZeroMemory(data + (fcb->Header.ValidDataLength.QuadPart - start), addon);
|
|
length = (ULONG)(fcb->Header.ValidDataLength.QuadPart - start);
|
|
}
|
|
|
|
if (!(Irp->Flags & IRP_NOCACHE)) {
|
|
NTSTATUS Status = STATUS_SUCCESS;
|
|
|
|
_SEH2_TRY {
|
|
if (!FileObject->PrivateCacheMap) {
|
|
CC_FILE_SIZES ccfs;
|
|
|
|
ccfs.AllocationSize = fcb->Header.AllocationSize;
|
|
ccfs.FileSize = fcb->Header.FileSize;
|
|
ccfs.ValidDataLength = fcb->Header.ValidDataLength;
|
|
|
|
init_file_cache(FileObject, &ccfs);
|
|
}
|
|
|
|
if (fCcCopyReadEx) {
|
|
TRACE("CcCopyReadEx(%p, %I64x, %lx, %u, %p, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart,
|
|
length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread);
|
|
TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
|
|
if (!fCcCopyReadEx(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus, Irp->Tail.Overlay.Thread)) {
|
|
TRACE("CcCopyReadEx could not wait\n");
|
|
|
|
IoMarkIrpPending(Irp);
|
|
return STATUS_PENDING;
|
|
}
|
|
TRACE("CcCopyReadEx finished\n");
|
|
} else {
|
|
TRACE("CcCopyRead(%p, %I64x, %lx, %u, %p, %p)\n", FileObject, IrpSp->Parameters.Read.ByteOffset.QuadPart, length, wait, data, &Irp->IoStatus);
|
|
TRACE("sizes = %I64x, %I64x, %I64x\n", fcb->Header.AllocationSize.QuadPart, fcb->Header.FileSize.QuadPart, fcb->Header.ValidDataLength.QuadPart);
|
|
if (!CcCopyRead(FileObject, &IrpSp->Parameters.Read.ByteOffset, length, wait, data, &Irp->IoStatus)) {
|
|
TRACE("CcCopyRead could not wait\n");
|
|
|
|
IoMarkIrpPending(Irp);
|
|
return STATUS_PENDING;
|
|
}
|
|
TRACE("CcCopyRead finished\n");
|
|
}
|
|
} _SEH2_EXCEPT (EXCEPTION_EXECUTE_HANDLER) {
|
|
Status = _SEH2_GetExceptionCode();
|
|
} _SEH2_END;
|
|
|
|
if (NT_SUCCESS(Status)) {
|
|
Status = Irp->IoStatus.Status;
|
|
Irp->IoStatus.Information += addon;
|
|
*bytes_read = (ULONG)Irp->IoStatus.Information;
|
|
} else
|
|
ERR("EXCEPTION - %08lx\n", Status);
|
|
|
|
return Status;
|
|
} else {
|
|
NTSTATUS Status;
|
|
|
|
if (!wait) {
|
|
IoMarkIrpPending(Irp);
|
|
return STATUS_PENDING;
|
|
}
|
|
|
|
if (fcb->ads) {
|
|
Status = read_stream(fcb, data, start, length, bytes_read);
|
|
|
|
if (!NT_SUCCESS(Status))
|
|
ERR("read_stream returned %08lx\n", Status);
|
|
} else {
|
|
Status = read_file(fcb, data, start, length, bytes_read, Irp);
|
|
|
|
if (!NT_SUCCESS(Status))
|
|
ERR("read_file returned %08lx\n", Status);
|
|
}
|
|
|
|
*bytes_read += addon;
|
|
TRACE("read %lu bytes\n", *bytes_read);
|
|
|
|
Irp->IoStatus.Information = *bytes_read;
|
|
|
|
if (diskacc && Status != STATUS_PENDING) {
|
|
PETHREAD thread = NULL;
|
|
|
|
if (Irp->Tail.Overlay.Thread && !IoIsSystemThread(Irp->Tail.Overlay.Thread))
|
|
thread = Irp->Tail.Overlay.Thread;
|
|
else if (!IoIsSystemThread(PsGetCurrentThread()))
|
|
thread = PsGetCurrentThread();
|
|
else if (IoIsSystemThread(PsGetCurrentThread()) && IoGetTopLevelIrp() == Irp)
|
|
thread = PsGetCurrentThread();
|
|
|
|
if (thread)
|
|
fPsUpdateDiskCounters(PsGetThreadProcess(thread), *bytes_read, 0, 1, 0, 0);
|
|
}
|
|
|
|
return Status;
|
|
}
|
|
}
|
|
|
|
_Dispatch_type_(IRP_MJ_READ)
|
|
_Function_class_(DRIVER_DISPATCH)
|
|
NTSTATUS __stdcall drv_read(PDEVICE_OBJECT DeviceObject, PIRP Irp) {
|
|
device_extension* Vcb = DeviceObject->DeviceExtension;
|
|
PIO_STACK_LOCATION IrpSp = IoGetCurrentIrpStackLocation(Irp);
|
|
PFILE_OBJECT FileObject = IrpSp->FileObject;
|
|
ULONG bytes_read = 0;
|
|
NTSTATUS Status;
|
|
bool top_level;
|
|
fcb* fcb;
|
|
ccb* ccb;
|
|
bool acquired_fcb_lock = false, wait;
|
|
|
|
FsRtlEnterFileSystem();
|
|
|
|
top_level = is_top_level(Irp);
|
|
|
|
TRACE("read\n");
|
|
|
|
if (Vcb && Vcb->type == VCB_TYPE_VOLUME) {
|
|
Status = vol_read(DeviceObject, Irp);
|
|
goto exit2;
|
|
} else if (!Vcb || Vcb->type != VCB_TYPE_FS) {
|
|
Status = STATUS_INVALID_PARAMETER;
|
|
goto end;
|
|
}
|
|
|
|
Irp->IoStatus.Information = 0;
|
|
|
|
if (IrpSp->MinorFunction & IRP_MN_COMPLETE) {
|
|
CcMdlReadComplete(IrpSp->FileObject, Irp->MdlAddress);
|
|
|
|
Irp->MdlAddress = NULL;
|
|
Status = STATUS_SUCCESS;
|
|
|
|
goto exit;
|
|
}
|
|
|
|
fcb = FileObject->FsContext;
|
|
|
|
if (!fcb) {
|
|
ERR("fcb was NULL\n");
|
|
Status = STATUS_INVALID_PARAMETER;
|
|
goto exit;
|
|
}
|
|
|
|
ccb = FileObject->FsContext2;
|
|
|
|
if (!ccb) {
|
|
ERR("ccb was NULL\n");
|
|
Status = STATUS_INVALID_PARAMETER;
|
|
goto exit;
|
|
}
|
|
|
|
if (Irp->RequestorMode == UserMode && !(ccb->access & FILE_READ_DATA)) {
|
|
WARN("insufficient privileges\n");
|
|
Status = STATUS_ACCESS_DENIED;
|
|
goto exit;
|
|
}
|
|
|
|
if (fcb == Vcb->volume_fcb) {
|
|
TRACE("reading volume FCB\n");
|
|
|
|
IoSkipCurrentIrpStackLocation(Irp);
|
|
|
|
Status = IoCallDriver(Vcb->Vpb->RealDevice, Irp);
|
|
|
|
goto exit2;
|
|
}
|
|
|
|
if (!(Irp->Flags & IRP_PAGING_IO))
|
|
FsRtlCheckOplock(fcb_oplock(fcb), Irp, NULL, NULL, NULL);
|
|
|
|
wait = IoIsOperationSynchronous(Irp);
|
|
|
|
// Don't offload jobs when doing paging IO - otherwise this can lead to
|
|
// deadlocks in CcCopyRead.
|
|
if (Irp->Flags & IRP_PAGING_IO)
|
|
wait = true;
|
|
|
|
if (!(Irp->Flags & IRP_PAGING_IO) && FileObject->SectionObjectPointer && FileObject->SectionObjectPointer->DataSectionObject) {
|
|
IO_STATUS_BLOCK iosb;
|
|
|
|
CcFlushCache(FileObject->SectionObjectPointer, &IrpSp->Parameters.Read.ByteOffset, IrpSp->Parameters.Read.Length, &iosb);
|
|
if (!NT_SUCCESS(iosb.Status)) {
|
|
ERR("CcFlushCache returned %08lx\n", iosb.Status);
|
|
return iosb.Status;
|
|
}
|
|
}
|
|
|
|
if (!ExIsResourceAcquiredSharedLite(fcb->Header.Resource)) {
|
|
if (!ExAcquireResourceSharedLite(fcb->Header.Resource, wait)) {
|
|
Status = STATUS_PENDING;
|
|
IoMarkIrpPending(Irp);
|
|
goto exit;
|
|
}
|
|
|
|
acquired_fcb_lock = true;
|
|
}
|
|
|
|
Status = do_read(Irp, wait, &bytes_read);
|
|
|
|
if (acquired_fcb_lock)
|
|
ExReleaseResourceLite(fcb->Header.Resource);
|
|
|
|
exit:
|
|
if (FileObject->Flags & FO_SYNCHRONOUS_IO && !(Irp->Flags & IRP_PAGING_IO))
|
|
FileObject->CurrentByteOffset.QuadPart = IrpSp->Parameters.Read.ByteOffset.QuadPart + (NT_SUCCESS(Status) ? bytes_read : 0);
|
|
|
|
end:
|
|
Irp->IoStatus.Status = Status;
|
|
|
|
TRACE("Irp->IoStatus.Status = %08lx\n", Irp->IoStatus.Status);
|
|
TRACE("Irp->IoStatus.Information = %Iu\n", Irp->IoStatus.Information);
|
|
TRACE("returning %08lx\n", Status);
|
|
|
|
if (Status != STATUS_PENDING)
|
|
IoCompleteRequest(Irp, IO_NO_INCREMENT);
|
|
else {
|
|
if (!add_thread_job(Vcb, Irp))
|
|
Status = do_read_job(Irp);
|
|
}
|
|
|
|
exit2:
|
|
if (top_level)
|
|
IoSetTopLevelIrp(NULL);
|
|
|
|
FsRtlExitFileSystem();
|
|
|
|
return Status;
|
|
}
|