# PROJECT: Python tools for traversing BTRFS structures # LICENSE: GPL-2.0+ (https://spdx.org/licenses/GPL-2.0+) # PURPOSE: Classes and structures for BTRFS on-disk layout # COPYRIGHT: Copyright 2018 Victor Perevertkin (victor@perevertkin.ru) # some code was taken from https://github.com/knorrie/python-btrfs from btrfs_constants import * import struct from collections import namedtuple, OrderedDict import collections.abc import copy import datetime import os import uuid import crc32c ULLONG_MAX = (1 << 64) - 1 ULONG_MAX = (1 << 32) - 1 def ULL(n): return n & ULLONG_MAX ROOT_TREE_OBJECTID = 1 EXTENT_TREE_OBJECTID = 2 CHUNK_TREE_OBJECTID = 3 DEV_TREE_OBJECTID = 4 FS_TREE_OBJECTID = 5 ROOT_TREE_DIR_OBJECTID = 6 CSUM_TREE_OBJECTID = 7 QUOTA_TREE_OBJECTID = 8 UUID_TREE_OBJECTID = 9 FREE_SPACE_TREE_OBJECTID = 10 DEV_STATS_OBJECTID = 0 BALANCE_OBJECTID = ULL(-4) ORPHAN_OBJECTID = ULL(-5) TREE_LOG_OBJECTID = ULL(-6) TREE_LOG_FIXUP_OBJECTID = ULL(-7) TREE_RELOC_OBJECTID = ULL(-8) DATA_RELOC_TREE_OBJECTID = ULL(-9) EXTENT_CSUM_OBJECTID = ULL(-10) FREE_SPACE_OBJECTID = ULL(-11) FREE_INO_OBJECTID = ULL(-12) MULTIPLE_OBJECTIDS = ULL(-255) FIRST_FREE_OBJECTID = 256 LAST_FREE_OBJECTID = ULL(-256) FIRST_CHUNK_TREE_OBJECTID = 256 DEV_ITEMS_OBJECTID = 1 BTRFS_SYSTEM_CHUNK_ARRAY_SIZE = 2048 INODE_ITEM_KEY = 1 INODE_REF_KEY = 12 INODE_EXTREF_KEY = 13 XATTR_ITEM_KEY = 24 ORPHAN_ITEM_KEY = 48 DIR_LOG_ITEM_KEY = 60 DIR_LOG_INDEX_KEY = 72 DIR_ITEM_KEY = 84 DIR_INDEX_KEY = 96 EXTENT_DATA_KEY = 108 EXTENT_CSUM_KEY = 128 ROOT_ITEM_KEY = 132 ROOT_BACKREF_KEY = 144 ROOT_REF_KEY = 156 EXTENT_ITEM_KEY = 168 METADATA_ITEM_KEY = 169 TREE_BLOCK_REF_KEY = 176 EXTENT_DATA_REF_KEY = 178 SHARED_BLOCK_REF_KEY = 182 SHARED_DATA_REF_KEY = 184 BLOCK_GROUP_ITEM_KEY = 192 FREE_SPACE_INFO_KEY = 198 FREE_SPACE_EXTENT_KEY = 199 FREE_SPACE_BITMAP_KEY = 200 DEV_EXTENT_KEY = 204 DEV_ITEM_KEY = 216 CHUNK_ITEM_KEY = 228 QGROUP_STATUS_KEY = 240 QGROUP_INFO_KEY = 242 QGROUP_LIMIT_KEY = 244 QGROUP_RELATION_KEY = 246 BALANCE_ITEM_KEY = 248 DEV_STATS_KEY = 249 DEV_REPLACE_KEY = 250 UUID_KEY_SUBVOL = 251 UUID_KEY_RECEIVED_SUBVOL = 252 STRING_ITEM_KEY = 253 BLOCK_GROUP_SINGLE = 0 BLOCK_GROUP_DATA = 1 << 0 BLOCK_GROUP_SYSTEM = 1 << 1 BLOCK_GROUP_METADATA = 1 << 2 BLOCK_GROUP_RAID0 = 1 << 3 BLOCK_GROUP_RAID1 = 1 << 4 BLOCK_GROUP_DUP = 1 << 5 BLOCK_GROUP_RAID10 = 1 << 6 BLOCK_GROUP_RAID5 = 1 << 7 BLOCK_GROUP_RAID6 = 1 << 8 BLOCK_GROUP_TYPE_MASK = ( BLOCK_GROUP_DATA | BLOCK_GROUP_SYSTEM | BLOCK_GROUP_METADATA ) BLOCK_GROUP_PROFILE_MASK = ( BLOCK_GROUP_RAID0 | BLOCK_GROUP_RAID1 | BLOCK_GROUP_RAID5 | BLOCK_GROUP_RAID6 | BLOCK_GROUP_DUP | BLOCK_GROUP_RAID10 ) AVAIL_ALLOC_BIT_SINGLE = 1 << 48 # used in balance_args SPACE_INFO_GLOBAL_RSV = 1 << 49 _block_group_flags_str_map = { BLOCK_GROUP_DATA: 'DATA', BLOCK_GROUP_METADATA: 'METADATA', BLOCK_GROUP_SYSTEM: 'SYSTEM', BLOCK_GROUP_RAID0: 'RAID0', BLOCK_GROUP_RAID1: 'RAID1', BLOCK_GROUP_DUP: 'DUP', BLOCK_GROUP_RAID10: 'RAID10', BLOCK_GROUP_RAID5: 'RAID5', BLOCK_GROUP_RAID6: 'RAID6', } _balance_args_profiles_str_map = { BLOCK_GROUP_RAID0: 'RAID0', BLOCK_GROUP_RAID1: 'RAID1', BLOCK_GROUP_DUP: 'DUP', BLOCK_GROUP_RAID10: 'RAID10', BLOCK_GROUP_RAID5: 'RAID5', BLOCK_GROUP_RAID6: 'RAID6', AVAIL_ALLOC_BIT_SINGLE: 'SINGLE', } QGROUP_LEVEL_SHIFT = 48 EXTENT_FLAG_DATA = 1 << 0 EXTENT_FLAG_TREE_BLOCK = 1 << 1 BLOCK_FLAG_FULL_BACKREF = 1 << 8 _extent_flags_str_map = { EXTENT_FLAG_DATA: 'DATA', EXTENT_FLAG_TREE_BLOCK: 'TREE_BLOCK', BLOCK_FLAG_FULL_BACKREF: 'FULL_BACKREF', } INODE_NODATASUM = 1 << 0 INODE_NODATACOW = 1 << 1 INODE_READONLY = 1 << 2 INODE_NOCOMPRESS = 1 << 3 INODE_PREALLOC = 1 << 4 INODE_SYNC = 1 << 5 INODE_IMMUTABLE = 1 << 6 INODE_APPEND = 1 << 7 INODE_NODUMP = 1 << 8 INODE_NOATIME = 1 << 9 INODE_DIRSYNC = 1 << 10 INODE_COMPRESS = 1 << 11 _inode_flags_str_map = { INODE_NODATASUM: 'NODATASUM', INODE_READONLY: 'READONLY', INODE_NOCOMPRESS: 'NOCOMPRESS', INODE_PREALLOC: 'PREALLOC', INODE_SYNC: 'SYNC', INODE_IMMUTABLE: 'IMMUTABLE', INODE_APPEND: 'APPEND', INODE_NODUMP: 'NODUMP', INODE_NOATIME: 'NOATIME', INODE_DIRSYNC: 'DIRSYNC', INODE_COMPRESS: 'COMPRESS', } ROOT_SUBVOL_RDONLY = 1 << 0 _root_flags_str_map = { ROOT_SUBVOL_RDONLY: 'RDONLY', } FT_UNKNOWN = 0 FT_REG_FILE = 1 FT_DIR = 2 FT_CHRDEV = 3 FT_BLKDEV = 4 FT_FIFO = 5 FT_SOCK = 6 FT_SYMLINK = 7 FT_XATTR = 8 FT_MAX = 9 _dir_item_type_str_map = { FT_UNKNOWN: 'UNKNOWN', FT_REG_FILE: 'FILE', FT_DIR: 'DIR', FT_CHRDEV: 'CHRDEV', FT_BLKDEV: 'BLKDEV', FT_FIFO: 'FIFO', FT_SOCK: 'SOCK', FT_SYMLINK: 'SYMLINK', FT_XATTR: 'XATTR', } COMPRESS_NONE = 0 COMPRESS_ZLIB = 1 COMPRESS_LZO = 2 COMPRESS_ZSTD = 3 _compress_type_str_map = { COMPRESS_NONE: 'none', COMPRESS_ZLIB: 'zlib', COMPRESS_LZO: 'lzo', COMPRESS_ZSTD: 'zstd', } FILE_EXTENT_INLINE = 0 FILE_EXTENT_REG = 1 FILE_EXTENT_PREALLOC = 2 _file_extent_type_str_map = { FILE_EXTENT_INLINE: 'inline', FILE_EXTENT_REG: 'regular', FILE_EXTENT_PREALLOC: 'prealloc', } def qgroup_level(objectid): return objectid >> QGROUP_LEVEL_SHIFT def qgroup_subvid(objectid): return objectid & ((1 << QGROUP_LEVEL_SHIFT) - 1) _key_objectid_str_map = { ROOT_TREE_OBJECTID: 'ROOT_TREE', EXTENT_TREE_OBJECTID: 'EXTENT_TREE', CHUNK_TREE_OBJECTID: 'CHUNK_TREE', DEV_TREE_OBJECTID: 'DEV_TREE', FS_TREE_OBJECTID: 'FS_TREE', ROOT_TREE_DIR_OBJECTID: 'ROOT_TREE_DIR', CSUM_TREE_OBJECTID: 'CSUM_TREE', QUOTA_TREE_OBJECTID: 'QUOTA_TREE', UUID_TREE_OBJECTID: 'UUID_TREE', FREE_SPACE_TREE_OBJECTID: 'FREE_SPACE_TREE', BALANCE_OBJECTID: 'BALANCE', ORPHAN_OBJECTID: 'ORPHAN', TREE_LOG_OBJECTID: 'TREE_LOG', TREE_LOG_FIXUP_OBJECTID: 'TREE_LOG_FIXUP', TREE_RELOC_OBJECTID: 'TREE_RELOC', DATA_RELOC_TREE_OBJECTID: 'DATA_RELOC_TREE', EXTENT_CSUM_OBJECTID: 'EXTENT_CSUM', FREE_SPACE_OBJECTID: 'FREE_SPACE', FREE_INO_OBJECTID: 'FREE_INO', MULTIPLE_OBJECTIDS: 'MULTIPLE', } def key_objectid_str(objectid, _type): if _type == DEV_EXTENT_KEY: return str(objectid) if _type == QGROUP_RELATION_KEY: return "{}/{}".format(qgroup_level(objectid), qgroup_subvid(objectid)) if _type == UUID_KEY_SUBVOL or _type == UUID_KEY_RECEIVED_SUBVOL: return "0x{:0>16x}".format(objectid) if objectid == ROOT_TREE_OBJECTID and _type == DEV_ITEM_KEY: return 'DEV_ITEMS' if objectid == DEV_STATS_OBJECTID and _type == DEV_STATS_KEY: return 'DEV_STATS' if objectid == FIRST_CHUNK_TREE_OBJECTID and _type == CHUNK_ITEM_KEY: return 'FIRST_CHUNK_TREE' if objectid == ULLONG_MAX: return '-1' return _key_objectid_str_map.get(objectid, str(objectid)) _key_type_str_map = { INODE_ITEM_KEY: 'INODE_ITEM', INODE_REF_KEY: 'INODE_REF', INODE_EXTREF_KEY: 'INODE_EXTREF', XATTR_ITEM_KEY: 'XATTR_ITEM', ORPHAN_ITEM_KEY: 'ORPHAN_ITEM', DIR_LOG_ITEM_KEY: 'DIR_LOG_ITEM', DIR_LOG_INDEX_KEY: 'DIR_LOG_INDEX', DIR_ITEM_KEY: 'DIR_ITEM', DIR_INDEX_KEY: 'DIR_INDEX', EXTENT_DATA_KEY: 'EXTENT_DATA', EXTENT_CSUM_KEY: 'EXTENT_CSUM', ROOT_ITEM_KEY: 'ROOT_ITEM', ROOT_BACKREF_KEY: 'ROOT_BACKREF', ROOT_REF_KEY: 'ROOT_REF', EXTENT_ITEM_KEY: 'EXTENT_ITEM', METADATA_ITEM_KEY: 'METADATA_ITEM', TREE_BLOCK_REF_KEY: 'TREE_BLOCK_REF', EXTENT_DATA_REF_KEY: 'EXTENT_DATA_REF', SHARED_BLOCK_REF_KEY: 'SHARED_BLOCK_REF', SHARED_DATA_REF_KEY: 'SHARED_DATA_REF', BLOCK_GROUP_ITEM_KEY: 'BLOCK_GROUP_ITEM', FREE_SPACE_INFO_KEY: 'FREE_SPACE_INFO', FREE_SPACE_EXTENT_KEY: 'FREE_SPACE_EXTENT', FREE_SPACE_BITMAP_KEY: 'FREE_SPACE_BITMAP', DEV_EXTENT_KEY: 'DEV_EXTENT', DEV_ITEM_KEY: 'DEV_ITEM', CHUNK_ITEM_KEY: 'CHUNK_ITEM', QGROUP_STATUS_KEY: 'QGROUP_STATUS', QGROUP_INFO_KEY: 'QGROUP_INFO', QGROUP_LIMIT_KEY: 'QGROUP_LIMIT', QGROUP_RELATION_KEY: 'QGROUP_RELATION', BALANCE_ITEM_KEY: 'BALANCE_ITEM', DEV_STATS_KEY: 'DEV_STATS', DEV_REPLACE_KEY: 'DEV_REPLACE', UUID_KEY_SUBVOL: 'UUID_SUBVOL', UUID_KEY_RECEIVED_SUBVOL: 'RECEIVED_SUBVOL', STRING_ITEM_KEY: 'STRING_ITEM', } # === Helper functions def key_type_str(_type): return _key_type_str_map.get(_type, str(_type)) def key_offset_str(offset, _type): if _type == QGROUP_RELATION_KEY or _type == QGROUP_INFO_KEY or _type == QGROUP_LIMIT_KEY: return "{}/{}".format(qgroup_level(offset), qgroup_subvid(offset)) if _type == UUID_KEY_SUBVOL or _type == UUID_KEY_RECEIVED_SUBVOL: return "0x{:0>16x}".format(offset) if _type == ROOT_ITEM_KEY: return _key_objectid_str_map.get(offset, str(offset)) if offset == ULLONG_MAX: return '-1' return str(offset) def flags_str(flags, flags_str_map): ret = [] for flag in sorted(flags_str_map.keys()): if flags & flag: ret.append(flags_str_map[flag]) if len(ret) == 0: ret.append("none") return '|'.join(ret) def embedded_text_for_str(text): try: return "utf-8 {}".format(text.decode('utf-8')) except UnicodeDecodeError: return "raw {}".format(repr(text)) # === Basic structures class TimeSpec(object): sstruct = struct.Struct('<QL') @staticmethod def from_values(sec, nsec): t = TimeSpec.__new__(TimeSpec) t.sec = sec t.nsec = nsec return t def __init__(self, data): self.sec, self.nsec = TimeSpec.sstruct.unpack_from(data) @property def iso8601(self): return datetime.datetime.utcfromtimestamp( float("{self.sec}.{self.nsec}".format(self=self)) ).isoformat() def __str__(self): return "{self.sec}.{self.nsec} ({self.iso8601})".format(self=self) class Key(object): def __init__(self, objectid, _type, offset): self._objectid = objectid self._type = _type self._offset = offset self._pack() @property def objectid(self): return self._objectid @objectid.setter def objectid(self, _objectid): self._objectid = _objectid self._pack() @property def type(self): return self._type @type.setter def type(self, _type): self._type = _type self._pack() @property def offset(self): return self._offset @offset.setter def offset(self, _offset): self._offset = _offset self._pack() @property def key(self): return self._key @key.setter def key(self, _key): self._key = _key self._unpack() def _pack(self): self._key = (self.objectid << 72) + (self._type << 64) + self.offset def _unpack(self): self._objectid = self._key >> 72 self._type = (self._key & ((1 << 72) - 1)) >> 64 self._offset = (self._key & ((1 << 64) - 1)) def __lt__(self, other): if isinstance(other, Key): return self._key < other._key return self._key < other def __le__(self, other): if isinstance(other, Key): return self._key <= other._key return self._key <= other def __eq__(self, other): if isinstance(other, Key): return self._key == other._key return self._key == other def __ge__(self, other): if isinstance(other, Key): return self._key >= other._key return self._key >= other def __gt__(self, other): if isinstance(other, Key): return self._key > other._key return self._key > other def __str__(self): return "({} {} {})".format( key_objectid_str(self._objectid, self._type), key_type_str(self._type), key_offset_str(self._offset, self._type), ) def __add__(self, amount): new_key = copy.copy(self) new_key.key += amount return new_key def __sub__(self, amount): new_key = copy.copy(self) new_key.key -= amount return new_key class DiskKey(Key): sstruct = struct.Struct('<QBQ') def __init__(self, data): super(DiskKey, self).__init__(*DiskKey.sstruct.unpack_from(data)) class InnerKey(Key): sstruct = struct.Struct('<QBQQQ') def __init__(self, data): unpacked_data = InnerKey.sstruct.unpack_from(data) super().__init__(*unpacked_data[:3]) self.block_num = unpacked_data[3] self.generation = unpacked_data[4] def __str__(self): return "(inner_key {} {} {} block_num {}, generation {})".format( key_objectid_str(self._objectid, self._type), key_type_str(self._type), key_offset_str(self._offset, self._type), self.block_num, self.generation, ) class LeafKey(Key): sstruct = struct.Struct('<QBQLL') def __init__(self, data): unpacked_data = LeafKey.sstruct.unpack_from(data) super().__init__(*unpacked_data[:3]) self.data_offset = unpacked_data[3] self.data_size = unpacked_data[4] def __str__(self): return "(leaf_key {} {} {} data_offset {:#x} data_size {})".format( key_objectid_str(self._objectid, self._type), key_type_str(self._type), key_offset_str(self._offset, self._type), self.data_offset, self.data_size, ) class ItemData(object): def __init__(self, key): self.key = key def setattr_from_key(self, objectid_attr=None, type_attr=None, offset_attr=None): if objectid_attr is not None: setattr(self, objectid_attr, self.key.objectid) if type_attr is not None: setattr(self, type_attr, self.key.type) if offset_attr is not None: setattr(self, offset_attr, self.key.offset) self._key_attrs = objectid_attr, type_attr, offset_attr @property def key_attrs(self): try: return self._key_attrs except AttributeError: return None, None, None def __lt__(self, other): return self.key < other.key superblock = struct.Struct('<32x16s2Q8s9Q5L4QH2B611x2048s') # NOTE: the structure is not complete # FS UUID # Physical block address # Flags # Signature (_BHRfS_M) # generation # Log. address of root of tree roots # Log. address of chunk tree root # Log. address of log tree root # log_root_transid # total_bytes # bytes_used # root_dir_objectid (usually 6) # num_devices # sectorsize # nodesize # __unused_leafsize # stripesize # sys_chunk_array_size # chunk_root_generation # compat_flags # compat_ro_flags # incompat_flags # csum_type # root_level 23 # chunk_root_level 24 # --- # sys_chunk_array _node_header_struct = struct.Struct('<32x16sQQ16sQQLB') NodeHeader = namedtuple('NodeHeader', 'FS_UUID node_addr flags chunk_tree_uuid generation tree_id items_num level') # === Items class InodeItem(ItemData): _inode_item = [ struct.Struct('<5Q4L3Q32x'), TimeSpec.sstruct, TimeSpec.sstruct, TimeSpec.sstruct, TimeSpec.sstruct, ] sstruct = struct.Struct('<' + ''.join([s.format[1:].decode() for s in _inode_item])) def __init__(self, key, data): super().__init__(key) self.generation, self.transid, self.size, self.nbytes, self.block_group, \ self.nlink, self.uid, self.gid, self.mode, self.rdev, self.flags, self.sequence = \ InodeItem._inode_item[0].unpack_from(data) pos = InodeItem._inode_item[0].size next_pos = pos + TimeSpec.sstruct.size self.atime = TimeSpec(data[pos:next_pos]) pos, next_pos = next_pos, next_pos + TimeSpec.sstruct.size self.ctime = TimeSpec(data[pos:next_pos]) pos, next_pos = next_pos, next_pos + TimeSpec.sstruct.size self.mtime = TimeSpec(data[pos:next_pos]) pos, next_pos = next_pos, next_pos + TimeSpec.sstruct.size self.otime = TimeSpec(data[pos:next_pos]) @property def flags_str(self): return flags_str(self.flags, _inode_flags_str_map) def __str__(self): return "inode generation {self.generation} transid {self.transid} size {self.size} " \ "nbytes {self.nbytes} block_group {self.block_group} mode {self.mode:05o} " \ "nlink {self.nlink} uid {self.uid} gid {self.gid} rdev {self.rdev} " \ "flags {self.flags:#x}({self.flags_str})".format(self=self) class RootItem(ItemData): _root_item = [ InodeItem.sstruct, struct.Struct('<7QL'), DiskKey.sstruct, struct.Struct('<BBQ16s16s16s4Q'), TimeSpec.sstruct, TimeSpec.sstruct, TimeSpec.sstruct, TimeSpec.sstruct, ] sstruct = struct.Struct('<' + ''.join([s.format[1:].decode() for s in _root_item])) def __init__(self, key, data): super().__init__(key) self.inode = InodeItem(None, data[:InodeItem.sstruct.size]) pos = InodeItem.sstruct.size self.generation, self.dirid, self.bytenr, self.byte_limit, self.bytes_used, \ self.last_snapshot, self.flags, self.refs = \ RootItem._root_item[1].unpack_from(data, pos) pos += RootItem._root_item[1].size self.drop_progress = DiskKey(data[pos:pos+DiskKey.sstruct.size]) pos += DiskKey.sstruct.size self.drop_level, self.level, self.generation_v2, uuid_bytes, parent_uuid_bytes, \ received_uuid_bytes, self.ctransid, self.otransid, self.stransid, self.rtransid = \ RootItem._root_item[3].unpack_from(data, pos) self.uuid = uuid.UUID(bytes=uuid_bytes) self.parent_uuid = uuid.UUID(bytes=parent_uuid_bytes) self.received_uuid = uuid.UUID(bytes=received_uuid_bytes) pos += RootItem._root_item[3].size next_pos = pos + TimeSpec.sstruct.size self.ctime = TimeSpec(data[pos:next_pos]) pos, next_pos = next_pos, next_pos + TimeSpec.sstruct.size self.otime = TimeSpec(data[pos:next_pos]) pos, next_pos = next_pos, next_pos + TimeSpec.sstruct.size self.stime = TimeSpec(data[pos:next_pos]) pos, next_pos = next_pos, next_pos + TimeSpec.sstruct.size self.rtime = TimeSpec(data[pos:next_pos]) @property def flags_str(self): return flags_str(self.flags, _root_flags_str_map) def __str__(self): return "root {self.key.objectid} uuid {self.uuid} " \ "generation {self.generation} last_snapshot {self.last_snapshot} " \ "bytenr {self.bytenr:#x} level {self.level} " \ "flags {self.flags:#x}({self.flags_str})".format(self=self) class Chunk(ItemData): sstruct = struct.Struct('<4Q3L2H') def __init__(self, key, data): super().__init__(key) self.setattr_from_key(offset_attr='vaddr') self.length, self.owner, self.stripe_len, self.type, self.io_align, \ self.io_width, self.sector_size, self.num_stripes, self.sub_stripes = \ Chunk.sstruct.unpack_from(data) self.stripes = [] pos = Chunk.sstruct.size for i in range(self.num_stripes): next_pos = pos + Stripe.sstruct.size self.stripes.append(Stripe(data[pos:next_pos])) pos = next_pos @property def size(self): return Chunk.sstruct.size + self.num_stripes * Stripe.sstruct.size @property def type_str(self): return flags_str(self.type, _block_group_flags_str_map) def __str__(self): return "chunk vaddr {self.vaddr:#x} type {self.type_str} length {self.length} " \ "num_stripes {self.num_stripes}".format(self=self) class Stripe(object): sstruct = struct.Struct('<2Q16s') def __init__(self, data): self.devid, self.offset, uuid_bytes = Stripe.sstruct.unpack(data) self.uuid = uuid.UUID(bytes=uuid_bytes) def __str__(self): return "stripe devid {self.devid} offset {self.offset:#x}".format(self=self) class InodeRefList(ItemData, collections.abc.MutableSequence): def __init__(self, header, data): super().__init__(header) self._list = [] pos = 0 while pos < header.len: inode_ref = InodeRef(data, pos) self._list.append(inode_ref) pos += len(inode_ref) def __getitem__(self, index): return self._list[index] def __setitem__(self, index, value): self._list[index] = value def __delitem__(self, index): del self._list[index] def __len__(self): return len(self._list) def insert(self, index, value): self._list.insert(index, value) def __str__(self): return "inode ref list size {}".format(len(self)) class InodeRef(ItemData): sstruct = struct.Struct('<QH') def __init__(self, key, data): super().__init__(key) self.index, self.name_len = InodeRef.sstruct.unpack_from(data) self.name, = struct.Struct('<{}s'.format(self.name_len)).unpack_from(data, InodeRef.sstruct.size) self._len = InodeRef.sstruct.size + self.name_len @property def name_str(self): return embedded_text_for_str(self.name) def __len__(self): return self._len def __str__(self): return "inode ref index {self.index} name {self.name_str}".format(self=self) class DirItemList(ItemData, collections.abc.MutableSequence): def __init__(self, key, data): super().__init__(key) self._list = [] pos = 0 while pos < key.data_size: cls = {DIR_ITEM_KEY: DirItem, XATTR_ITEM_KEY: XAttrItem} dir_item = cls[self.key.type](data, pos) self._list.append(dir_item) pos += len(dir_item) def __getitem__(self, index): return self._list[index] def __setitem__(self, index, value): self._list[index] = value def __delitem__(self, index): del self._list[index] def __len__(self): return len(self._list) def insert(self, index, value): self._list.insert(index, value) def __str__(self): return "dir item list hash {self.key.offset} size {}".format(len(self), self=self) class XAttrItemList(DirItemList): def __str__(self): return "xattr item list hash {self.key.offset} size {}".format(len(self), self=self) class DirItem(object): _dir_item = [ DiskKey.sstruct, struct.Struct('<QHHB') ] sstruct = struct.Struct('<' + ''.join([s.format[1:].decode() for s in _dir_item])) def __init__(self, data, pos): next_pos = pos + DiskKey.sstruct.size self.location = DiskKey(data[pos:next_pos]) pos = next_pos self.transid, self.data_len, self.name_len, self.type = \ DirItem._dir_item[1].unpack_from(data, pos) pos += DirItem._dir_item[1].size self.name, = struct.Struct('<{}s'.format(self.name_len)).unpack_from(data, pos) pos += self.name_len self.data, = struct.Struct('<{}s'.format(self.data_len)).unpack_from(data, pos) pos += self.data_len self._len = DirItem.sstruct.size + self.name_len + self.data_len @property def type_str(self): return _dir_item_type_str_map[self.type] @property def name_str(self): return embedded_text_for_str(self.name) @property def data_str(self): return embedded_text_for_str(self.data) def __len__(self): return self._len def __str__(self): return "dir item location {self.location} type {self.type_str} " \ "name {self.name_str}".format(self=self) class XAttrItem(DirItem): def __str__(self): return "xattr item name {self.name_str} data {self.data_str}".format(self=self) class DirIndex(ItemData): def __init__(self, header, data): super().__init__(header) self.location = DiskKey(data[:DiskKey.sstruct.size]) pos = DiskKey.sstruct.size self.transid, self.data_len, self.name_len, self.type = \ DirItem._dir_item[1].unpack_from(data, pos) pos += DirItem._dir_item[1].size self.name, = struct.Struct('<{}s'.format(self.name_len)).unpack_from(data, pos) @property def type_str(self): return _dir_item_type_str_map[self.type] @property def name_str(self): return embedded_text_for_str(self.name) def __str__(self): return "dir index {self.key.offset} location {self.location} type {self.type_str} " \ "name {self.name_str}".format(self=self) class FileExtentItem(ItemData): _file_extent_item = [ struct.Struct('<QQBB2xB'), struct.Struct('<4Q'), ] sstruct = struct.Struct('<' + ''.join([s.format[1:].decode() for s in _file_extent_item])) def __init__(self, key, data): super().__init__(key) self.logical_offset = key.offset self.generation, self.ram_bytes, self.compression, self.encryption, self.type = \ FileExtentItem._file_extent_item[0].unpack_from(data) if self.type != FILE_EXTENT_INLINE: # These are confusing, so they deserve a comment in the code: # (disk_bytenr EXTENT_ITEM disk_num_bytes) is the tree key of # the extent item storing the actual data. # # The third one, offset is the offset inside that extent where the # data we need starts. num_bytes is the amount of bytes to be used # from that offset onwards. # # Remember that these numbers always be multiples of disk block # sizes, because that's how it gets cowed. We don't just use 1 or 2 # bytes from another extent. pos = FileExtentItem._file_extent_item[0].size self.disk_bytenr, self.disk_num_bytes, self.offset, self.num_bytes = \ FileExtentItem._file_extent_item[1].unpack_from(data, pos) else: self._inline_encoded_nbytes = key.data_size - FileExtentItem._file_extent_item[0].size @property def compression_str(self): return _compress_type_str_map.get(self.compression, 'unknown') @property def type_str(self): return _file_extent_type_str_map.get(self.type, 'unknown') def __str__(self): ret = ["extent data at {self.logical_offset} generation {self.generation} " "ram_bytes {self.ram_bytes} " "compression {self.compression_str} type {self.type_str}".format(self=self)] if self.type != FILE_EXTENT_INLINE: ret.append("disk_bytenr {self.disk_bytenr} disk_num_bytes {self.disk_num_bytes} " "offset {self.offset} num_bytes {self.num_bytes}".format(self=self)) else: ret.append("inline_encoded_nbytes {self._inline_encoded_nbytes}".format(self=self)) return ' '.join(ret) class ExtentItem(ItemData): sstruct = struct.Struct('<3Q') extent_inline_ref = struct.Struct('<BQ') def __init__(self, header, data, load_data_refs=True, load_metadata_refs=True): super().__init__(header) self.setattr_from_key(objectid_attr='vaddr', offset_attr='length') pos = 0 self.refs, self.generation, self.flags = ExtentItem.sstruct.unpack_from(data, pos) pos += ExtentItem.sstruct.size if self.flags == EXTENT_FLAG_DATA and load_data_refs: self.extent_data_refs = [] self.shared_data_refs = [] while pos < len(data): inline_ref_type, inline_ref_offset = \ ExtentItem.extent_inline_ref.unpack_from(data, pos) if inline_ref_type == EXTENT_DATA_REF_KEY: pos += 1 next_pos = pos + InlineExtentDataRef.sstruct.size self.extent_data_refs.append(InlineExtentDataRef(data[pos:next_pos])) pos = next_pos elif inline_ref_type == SHARED_DATA_REF_KEY: pos += 1 next_pos = pos + InlineSharedDataRef.inline_shared_data_ref.size self.shared_data_refs.append(InlineSharedDataRef(data[pos:next_pos])) pos = next_pos elif self.flags & EXTENT_FLAG_TREE_BLOCK and load_metadata_refs: next_pos = pos + TreeBlockInfo.tree_block_info.size self.tree_block_info = TreeBlockInfo(data[pos:next_pos]) pos = next_pos self.tree_block_refs = [] self.shared_block_refs = [] while pos < len(data): inline_ref_type, inline_ref_offset = \ ExtentItem.extent_inline_ref.unpack_from(data, pos) if inline_ref_type == TREE_BLOCK_REF_KEY: self.tree_block_refs.append(InlineTreeBlockRef(inline_ref_offset)) elif inline_ref_type == SHARED_BLOCK_REF_KEY: self.shared_block_refs.append(InlineSharedBlockRef(inline_ref_offset)) else: raise Exception("BUG: expected inline TREE_BLOCK_REF or SHARED_BLOCK_REF_KEY " "but got inline_ref_type {}".format(inline_ref_type)) pos += ExtentItem.extent_inline_ref.size def append_extent_data_ref(self, ref): self.extent_data_refs.append(ref) def append_shared_data_ref(self, ref): self.shared_data_refs.append(ref) def append_tree_block_ref(self, ref): self.tree_block_refs.append(ref) def append_shared_block_ref(self, ref): self.shared_block_refs.append(ref) @property def flags_str(self): return flags_str(self.flags, _extent_flags_str_map) def __str__(self): return "extent vaddr {self.vaddr} length {self.length} refs {self.refs} " \ "gen {self.generation} flags {self.flags_str}".format(self=self) class ExtentDataRef(ItemData): sstruct = struct.Struct('<3QL') def __init__(self, header, data): super().__init__(header) self.root, self.objectid, self.offset, self.count = \ ExtentDataRef.sstruct.unpack(data) def __str__(self): return "extent data backref root {self.root} objectid {self.objectid} " \ "offset {self.offset} count {self.count}".format(self=self) class InlineExtentDataRef(ExtentDataRef): sstruct = ExtentDataRef.sstruct def __init__(self, data): self.root, self.objectid, self.offset, self.count = \ InlineExtentDataRef.sstruct.unpack(data) def __str__(self): return "inline extent data backref root {self.root} objectid {self.objectid} " \ "offset {self.offset} count {self.count}".format(self=self) class SharedDataRef(ItemData): sstruct = struct.Struct('<L') def __init__(self, header, data): super().__init__(header) self.setattr_from_key(offset_attr='parent') self.count, = SharedDataRef.sstruct.unpack(data) def __str__(self): return "shared data backref parent {self.parent} count {self.count}".format(self=self) class InlineSharedDataRef(SharedDataRef): sstruct = struct.Struct('<QL') def __init__(self, data): self.parent, self.count = InlineSharedDataRef.sstruct.unpack(data) def __str__(self): return "inline shared data backref parent {self.parent} " \ "count {self.count}".format(self=self) class TreeBlockInfo(object): sstruct = struct.Struct('<QBQB') def __init__(self, data): tb_objectid, tb_type, tb_offset, self.level = \ TreeBlockInfo.sstruct.unpack(data) self.key = Key(tb_objectid, tb_type, tb_offset) def __str__(self): return "tree block key {self.key} level {self.level}".format(self=self) class TreeBlockRef(ItemData): def __init__(self, header): super().__init__(header) self.setattr_from_key(offset_attr='root') def __str__(self): return "tree block backref root {}".format(key_objectid_str(self.root, None)) class InlineTreeBlockRef(TreeBlockRef): def __init__(self, root): self.root = root def __str__(self): return "inline tree block backref root {}".format(key_objectid_str(self.root, None)) class SharedBlockRef(ItemData): def __init__(self, header): super().__init__(header) self.setattr_from_key(offset_attr='parent') def __str__(self): return "shared block backref parent {}".format(self.parent) class InlineSharedBlockRef(SharedBlockRef): def __init__(self, parent): self.parent = parent def __str__(self): return "inline shared block backref parent {}".format(self.parent) # === Main FileSystem class def key_bin_search(fd, base_offset, item_size, cmp_item, min, max): low = min high = max while low < high: mid = (low + high) // 2 offset = base_offset + mid * item_size fd.seek(offset) key1 = DiskKey(fd.read(item_size)) if key1 > cmp_item: high = mid elif key1 < cmp_item: low = mid + 1 else: return True, mid return False, low chunk_map_item = namedtuple('chunk_map_item', 'logical physical length devid') class FileSystem(object): def __init__(self, path, part_offset): self._chunk_map = OrderedDict() self.path = path self.part_offset = part_offset self.fd = open(path, 'rb') self.fd.seek(part_offset + 0x10000) # going to superblock sb_bytes = self.fd.read(superblock.size) sb_tuple = superblock.unpack(sb_bytes) if sb_tuple[3] != b'_BHRfS_M': raise "No signature found" # setting base FS information self.fsid = sb_tuple[0] self.nodesize = sb_tuple[14] self.sectorsize = sb_tuple[13] self._chunk_root = sb_tuple[6] self._chunk_root_level = sb_tuple[24] self._tree_roots_root = sb_tuple[5] self._tree_roots_root_level = sb_tuple[23] # setting chunk map sys_chunk_array_size = sb_tuple[17] sys_chunk = sb_tuple[25][:sys_chunk_array_size] pos = 0 while pos < sys_chunk_array_size: key = DiskKey(sys_chunk[pos:]) pos += DiskKey.sstruct.size chunk = Chunk(key, sys_chunk[pos:]) for st in chunk.stripes: self._insert_chunk(chunk_map_item(chunk.vaddr, st.offset, chunk.length, st.devid)) pos += chunk.size # setting tree roots _, fs_tree_root_item = self.search_tree(self._tree_roots_root_level, self._tree_roots_root, Key(FS_TREE_OBJECTID, ROOT_ITEM_KEY, 0)) _, extent_tree_root_item = self.search_tree(self._tree_roots_root_level, self._tree_roots_root, Key(EXTENT_TREE_OBJECTID, ROOT_ITEM_KEY, 0)) self._fs_root_level = fs_tree_root_item.level self._fs_root = fs_tree_root_item.bytenr self._extent_root_level = extent_tree_root_item.level self._extent_root = extent_tree_root_item.bytenr @property def chunk_root(self): return self._chunk_root_level, self._chunk_root @property def tree_roots_root(self): return self._tree_roots_root_level, self._tree_roots_root @property def fs_root(self): return self._fs_root_level, self._fs_root @property def extent_root(self): return self._extent_root_level, self._extent_root def logical_to_physical(self, log): cur_logical = next(iter(self._chunk_map)) # first item for logical, cmi in self._chunk_map.items(): if logical > log: break cur_logical = logical # if there is no address in chunk_map, searching in chunk_tree if cur_logical + self._chunk_map[cur_logical].length < log: def process_func(header, offset): for i in range(header.items_num): self.fd.seek(offset + i * LeafKey.sstruct.size) k = LeafKey(self.fd.read(LeafKey.sstruct.size)) self.fd.seek(offset + k.data_offset) if k.type == CHUNK_ITEM_KEY: item = _key_type_class_map[k.type](k, self.fd.read(k.data_size)) for st in item.stripes: self._insert_chunk(chunk_map_item(item.vaddr, st.offset, item.length, st.devid)) self.search_tree(self._chunk_root_level, self._chunk_root, Key(FIRST_CHUNK_TREE_OBJECTID, CHUNK_ITEM_KEY, log), process_func) cur_logical = next(iter(self._chunk_map)) # first item if cur_logical > log: raise Exception(f'Cannot translate address {log:#x}') for logical, cmi in self._chunk_map.items(): if logical > log: break cur_logical = logical if cur_logical + self._chunk_map[cur_logical].length < log: raise Exception(f'Cannot translate address {log:#x}') print('address translation: {:#x} -> {:#x}'.format(log, self._chunk_map[cur_logical].physical + log - cur_logical)) return self.part_offset + self._chunk_map[cur_logical].physical + log - cur_logical def search_tree(self, level, root_offset, key, process_node_func = None): for lvl in range(level, 0, -1): # inner node root_offset = self.logical_to_physical(root_offset) self.fd.seek(root_offset) header = NodeHeader._make(_node_header_struct.unpack(self.fd.read(_node_header_struct.size))) if header.level != lvl: raise Exception('Invalid inner node level') found, itemnr = key_bin_search( self.fd, root_offset + _node_header_struct.size, InnerKey.sstruct.size, key, 0, header.items_num ) # TODO: better understand this if not found and itemnr > 0: itemnr -= 1 self.fd.seek(root_offset + _node_header_struct.size + itemnr * InnerKey.sstruct.size) k = InnerKey(self.fd.read(InnerKey.sstruct.size)) root_offset = k.block_num else: # we are in leaf node root_offset = self.logical_to_physical(root_offset) self.fd.seek(root_offset) header = NodeHeader._make(_node_header_struct.unpack(self.fd.read(_node_header_struct.size))) if header.level != 0: raise Exception('Invalid leaf level') if process_node_func: process_node_func(header, root_offset + _node_header_struct.size) return root_offset found, itemnr = key_bin_search( self.fd, root_offset + _node_header_struct.size, LeafKey.sstruct.size, key, 0, header.items_num ) self.fd.seek(root_offset + _node_header_struct.size + itemnr * LeafKey.sstruct.size) k = LeafKey(self.fd.read(LeafKey.sstruct.size)) self.fd.seek(root_offset + _node_header_struct.size + k.data_offset) if k.type in _key_type_class_map: return k, _key_type_class_map[k.type](k, self.fd.read(k.data_size)) else: return k, False def print_node(self, header, offset): print(header) root_paddr = self.logical_to_physical(header.node_addr) key_size = InnerKey.sstruct.size if header.level > 0 else LeafKey.sstruct.size key_struct = InnerKey if header.level > 0 else LeafKey for i in range(header.items_num): self.fd.seek(root_paddr + _node_header_struct.size + i * key_size) k = key_struct(self.fd.read(key_size)) print(k) self.fd.seek(root_paddr + _node_header_struct.size + k.data_offset) if k.type in _key_type_class_map and header.level == 0: item = _key_type_class_map[k.type](k, self.fd.read(k.data_size)) print(item) if k.type == DIR_ITEM_KEY: for it in item: print(it) print('============================') def print_chunk_map(self): print('=== chunk map ===') for logical, cmi in self._chunk_map.items(): print(f'{cmi.logical:#x}..{cmi.logical+cmi.length:#x} -> {cmi.physical:#x}..{cmi.physical+cmi.length:#x}') print('=================') def _insert_chunk(self, chunk): if not chunk.logical in self._chunk_map: cm = dict(self._chunk_map) cm[chunk.logical] = chunk self._chunk_map = OrderedDict(sorted(cm.items())) _key_type_class_map = { INODE_ITEM_KEY: InodeItem, INODE_REF_KEY: InodeRef, DIR_ITEM_KEY: DirItemList, DIR_INDEX_KEY: DirIndex, EXTENT_DATA_KEY: FileExtentItem, ROOT_ITEM_KEY: RootItem, EXTENT_ITEM_KEY: ExtentItem, CHUNK_ITEM_KEY: Chunk, }