# PROJECT: Python tools for traversing BTRFS structures # LICENSE: GPL-2.0+ (https://spdx.org/licenses/GPL-2.0+) # PURPOSE: Classes and structures for BTRFS on-disk layout # COPYRIGHT: Copyright 2018 Victor Perevertkin (victor@perevertkin.ru) # some code was taken from https://github.com/knorrie/python-btrfs from btrfs_constants import * import struct from collections import namedtuple, OrderedDict import collections.abc import copy import datetime import os import uuid import crc32c ULLONG_MAX = (1 << 64) - 1 ULONG_MAX = (1 << 32) - 1 def ULL(n): return n & ULLONG_MAX ROOT_TREE_OBJECTID = 1 EXTENT_TREE_OBJECTID = 2 CHUNK_TREE_OBJECTID = 3 DEV_TREE_OBJECTID = 4 FS_TREE_OBJECTID = 5 ROOT_TREE_DIR_OBJECTID = 6 CSUM_TREE_OBJECTID = 7 QUOTA_TREE_OBJECTID = 8 UUID_TREE_OBJECTID = 9 FREE_SPACE_TREE_OBJECTID = 10 DEV_STATS_OBJECTID = 0 BALANCE_OBJECTID = ULL(-4) ORPHAN_OBJECTID = ULL(-5) TREE_LOG_OBJECTID = ULL(-6) TREE_LOG_FIXUP_OBJECTID = ULL(-7) TREE_RELOC_OBJECTID = ULL(-8) DATA_RELOC_TREE_OBJECTID = ULL(-9) EXTENT_CSUM_OBJECTID = ULL(-10) FREE_SPACE_OBJECTID = ULL(-11) FREE_INO_OBJECTID = ULL(-12) MULTIPLE_OBJECTIDS = ULL(-255) FIRST_FREE_OBJECTID = 256 LAST_FREE_OBJECTID = ULL(-256) FIRST_CHUNK_TREE_OBJECTID = 256 DEV_ITEMS_OBJECTID = 1 BTRFS_SYSTEM_CHUNK_ARRAY_SIZE = 2048 INODE_ITEM_KEY = 1 INODE_REF_KEY = 12 INODE_EXTREF_KEY = 13 XATTR_ITEM_KEY = 24 ORPHAN_ITEM_KEY = 48 DIR_LOG_ITEM_KEY = 60 DIR_LOG_INDEX_KEY = 72 DIR_ITEM_KEY = 84 DIR_INDEX_KEY = 96 EXTENT_DATA_KEY = 108 EXTENT_CSUM_KEY = 128 ROOT_ITEM_KEY = 132 ROOT_BACKREF_KEY = 144 ROOT_REF_KEY = 156 EXTENT_ITEM_KEY = 168 METADATA_ITEM_KEY = 169 TREE_BLOCK_REF_KEY = 176 EXTENT_DATA_REF_KEY = 178 SHARED_BLOCK_REF_KEY = 182 SHARED_DATA_REF_KEY = 184 BLOCK_GROUP_ITEM_KEY = 192 FREE_SPACE_INFO_KEY = 198 FREE_SPACE_EXTENT_KEY = 199 FREE_SPACE_BITMAP_KEY = 200 DEV_EXTENT_KEY = 204 DEV_ITEM_KEY = 216 CHUNK_ITEM_KEY = 228 QGROUP_STATUS_KEY = 240 QGROUP_INFO_KEY = 242 QGROUP_LIMIT_KEY = 244 QGROUP_RELATION_KEY = 246 BALANCE_ITEM_KEY = 248 DEV_STATS_KEY = 249 DEV_REPLACE_KEY = 250 UUID_KEY_SUBVOL = 251 UUID_KEY_RECEIVED_SUBVOL = 252 STRING_ITEM_KEY = 253 BLOCK_GROUP_SINGLE = 0 BLOCK_GROUP_DATA = 1 << 0 BLOCK_GROUP_SYSTEM = 1 << 1 BLOCK_GROUP_METADATA = 1 << 2 BLOCK_GROUP_RAID0 = 1 << 3 BLOCK_GROUP_RAID1 = 1 << 4 BLOCK_GROUP_DUP = 1 << 5 BLOCK_GROUP_RAID10 = 1 << 6 BLOCK_GROUP_RAID5 = 1 << 7 BLOCK_GROUP_RAID6 = 1 << 8 BLOCK_GROUP_TYPE_MASK = ( BLOCK_GROUP_DATA | BLOCK_GROUP_SYSTEM | BLOCK_GROUP_METADATA ) BLOCK_GROUP_PROFILE_MASK = ( BLOCK_GROUP_RAID0 | BLOCK_GROUP_RAID1 | BLOCK_GROUP_RAID5 | BLOCK_GROUP_RAID6 | BLOCK_GROUP_DUP | BLOCK_GROUP_RAID10 ) AVAIL_ALLOC_BIT_SINGLE = 1 << 48 # used in balance_args SPACE_INFO_GLOBAL_RSV = 1 << 49 _block_group_flags_str_map = { BLOCK_GROUP_DATA: 'DATA', BLOCK_GROUP_METADATA: 'METADATA', BLOCK_GROUP_SYSTEM: 'SYSTEM', BLOCK_GROUP_RAID0: 'RAID0', BLOCK_GROUP_RAID1: 'RAID1', BLOCK_GROUP_DUP: 'DUP', BLOCK_GROUP_RAID10: 'RAID10', BLOCK_GROUP_RAID5: 'RAID5', BLOCK_GROUP_RAID6: 'RAID6', } _balance_args_profiles_str_map = { BLOCK_GROUP_RAID0: 'RAID0', BLOCK_GROUP_RAID1: 'RAID1', BLOCK_GROUP_DUP: 'DUP', BLOCK_GROUP_RAID10: 'RAID10', BLOCK_GROUP_RAID5: 'RAID5', BLOCK_GROUP_RAID6: 'RAID6', AVAIL_ALLOC_BIT_SINGLE: 'SINGLE', } QGROUP_LEVEL_SHIFT = 48 EXTENT_FLAG_DATA = 1 << 0 EXTENT_FLAG_TREE_BLOCK = 1 << 1 BLOCK_FLAG_FULL_BACKREF = 1 << 8 _extent_flags_str_map = { EXTENT_FLAG_DATA: 'DATA', EXTENT_FLAG_TREE_BLOCK: 'TREE_BLOCK', BLOCK_FLAG_FULL_BACKREF: 'FULL_BACKREF', } INODE_NODATASUM = 1 << 0 INODE_NODATACOW = 1 << 1 INODE_READONLY = 1 << 2 INODE_NOCOMPRESS = 1 << 3 INODE_PREALLOC = 1 << 4 INODE_SYNC = 1 << 5 INODE_IMMUTABLE = 1 << 6 INODE_APPEND = 1 << 7 INODE_NODUMP = 1 << 8 INODE_NOATIME = 1 << 9 INODE_DIRSYNC = 1 << 10 INODE_COMPRESS = 1 << 11 _inode_flags_str_map = { INODE_NODATASUM: 'NODATASUM', INODE_READONLY: 'READONLY', INODE_NOCOMPRESS: 'NOCOMPRESS', INODE_PREALLOC: 'PREALLOC', INODE_SYNC: 'SYNC', INODE_IMMUTABLE: 'IMMUTABLE', INODE_APPEND: 'APPEND', INODE_NODUMP: 'NODUMP', INODE_NOATIME: 'NOATIME', INODE_DIRSYNC: 'DIRSYNC', INODE_COMPRESS: 'COMPRESS', } ROOT_SUBVOL_RDONLY = 1 << 0 _root_flags_str_map = { ROOT_SUBVOL_RDONLY: 'RDONLY', } FT_UNKNOWN = 0 FT_REG_FILE = 1 FT_DIR = 2 FT_CHRDEV = 3 FT_BLKDEV = 4 FT_FIFO = 5 FT_SOCK = 6 FT_SYMLINK = 7 FT_XATTR = 8 FT_MAX = 9 _dir_item_type_str_map = { FT_UNKNOWN: 'UNKNOWN', FT_REG_FILE: 'FILE', FT_DIR: 'DIR', FT_CHRDEV: 'CHRDEV', FT_BLKDEV: 'BLKDEV', FT_FIFO: 'FIFO', FT_SOCK: 'SOCK', FT_SYMLINK: 'SYMLINK', FT_XATTR: 'XATTR', } COMPRESS_NONE = 0 COMPRESS_ZLIB = 1 COMPRESS_LZO = 2 COMPRESS_ZSTD = 3 _compress_type_str_map = { COMPRESS_NONE: 'none', COMPRESS_ZLIB: 'zlib', COMPRESS_LZO: 'lzo', COMPRESS_ZSTD: 'zstd', } FILE_EXTENT_INLINE = 0 FILE_EXTENT_REG = 1 FILE_EXTENT_PREALLOC = 2 _file_extent_type_str_map = { FILE_EXTENT_INLINE: 'inline', FILE_EXTENT_REG: 'regular', FILE_EXTENT_PREALLOC: 'prealloc', } def qgroup_level(objectid): return objectid >> QGROUP_LEVEL_SHIFT def qgroup_subvid(objectid): return objectid & ((1 << QGROUP_LEVEL_SHIFT) - 1) _key_objectid_str_map = { ROOT_TREE_OBJECTID: 'ROOT_TREE', EXTENT_TREE_OBJECTID: 'EXTENT_TREE', CHUNK_TREE_OBJECTID: 'CHUNK_TREE', DEV_TREE_OBJECTID: 'DEV_TREE', FS_TREE_OBJECTID: 'FS_TREE', ROOT_TREE_DIR_OBJECTID: 'ROOT_TREE_DIR', CSUM_TREE_OBJECTID: 'CSUM_TREE', QUOTA_TREE_OBJECTID: 'QUOTA_TREE', UUID_TREE_OBJECTID: 'UUID_TREE', FREE_SPACE_TREE_OBJECTID: 'FREE_SPACE_TREE', BALANCE_OBJECTID: 'BALANCE', ORPHAN_OBJECTID: 'ORPHAN', TREE_LOG_OBJECTID: 'TREE_LOG', TREE_LOG_FIXUP_OBJECTID: 'TREE_LOG_FIXUP', TREE_RELOC_OBJECTID: 'TREE_RELOC', DATA_RELOC_TREE_OBJECTID: 'DATA_RELOC_TREE', EXTENT_CSUM_OBJECTID: 'EXTENT_CSUM', FREE_SPACE_OBJECTID: 'FREE_SPACE', FREE_INO_OBJECTID: 'FREE_INO', MULTIPLE_OBJECTIDS: 'MULTIPLE', } def key_objectid_str(objectid, _type): if _type == DEV_EXTENT_KEY: return str(objectid) if _type == QGROUP_RELATION_KEY: return "{}/{}".format(qgroup_level(objectid), qgroup_subvid(objectid)) if _type == UUID_KEY_SUBVOL or _type == UUID_KEY_RECEIVED_SUBVOL: return "0x{:0>16x}".format(objectid) if objectid == ROOT_TREE_OBJECTID and _type == DEV_ITEM_KEY: return 'DEV_ITEMS' if objectid == DEV_STATS_OBJECTID and _type == DEV_STATS_KEY: return 'DEV_STATS' if objectid == FIRST_CHUNK_TREE_OBJECTID and _type == CHUNK_ITEM_KEY: return 'FIRST_CHUNK_TREE' if objectid == ULLONG_MAX: return '-1' return _key_objectid_str_map.get(objectid, str(objectid)) _key_type_str_map = { INODE_ITEM_KEY: 'INODE_ITEM', INODE_REF_KEY: 'INODE_REF', INODE_EXTREF_KEY: 'INODE_EXTREF', XATTR_ITEM_KEY: 'XATTR_ITEM', ORPHAN_ITEM_KEY: 'ORPHAN_ITEM', DIR_LOG_ITEM_KEY: 'DIR_LOG_ITEM', DIR_LOG_INDEX_KEY: 'DIR_LOG_INDEX', DIR_ITEM_KEY: 'DIR_ITEM', DIR_INDEX_KEY: 'DIR_INDEX', EXTENT_DATA_KEY: 'EXTENT_DATA', EXTENT_CSUM_KEY: 'EXTENT_CSUM', ROOT_ITEM_KEY: 'ROOT_ITEM', ROOT_BACKREF_KEY: 'ROOT_BACKREF', ROOT_REF_KEY: 'ROOT_REF', EXTENT_ITEM_KEY: 'EXTENT_ITEM', METADATA_ITEM_KEY: 'METADATA_ITEM', TREE_BLOCK_REF_KEY: 'TREE_BLOCK_REF', EXTENT_DATA_REF_KEY: 'EXTENT_DATA_REF', SHARED_BLOCK_REF_KEY: 'SHARED_BLOCK_REF', SHARED_DATA_REF_KEY: 'SHARED_DATA_REF', BLOCK_GROUP_ITEM_KEY: 'BLOCK_GROUP_ITEM', FREE_SPACE_INFO_KEY: 'FREE_SPACE_INFO', FREE_SPACE_EXTENT_KEY: 'FREE_SPACE_EXTENT', FREE_SPACE_BITMAP_KEY: 'FREE_SPACE_BITMAP', DEV_EXTENT_KEY: 'DEV_EXTENT', DEV_ITEM_KEY: 'DEV_ITEM', CHUNK_ITEM_KEY: 'CHUNK_ITEM', QGROUP_STATUS_KEY: 'QGROUP_STATUS', QGROUP_INFO_KEY: 'QGROUP_INFO', QGROUP_LIMIT_KEY: 'QGROUP_LIMIT', QGROUP_RELATION_KEY: 'QGROUP_RELATION', BALANCE_ITEM_KEY: 'BALANCE_ITEM', DEV_STATS_KEY: 'DEV_STATS', DEV_REPLACE_KEY: 'DEV_REPLACE', UUID_KEY_SUBVOL: 'UUID_SUBVOL', UUID_KEY_RECEIVED_SUBVOL: 'RECEIVED_SUBVOL', STRING_ITEM_KEY: 'STRING_ITEM', } # === Helper functions def key_type_str(_type): return _key_type_str_map.get(_type, str(_type)) def key_offset_str(offset, _type): if _type == QGROUP_RELATION_KEY or _type == QGROUP_INFO_KEY or _type == QGROUP_LIMIT_KEY: return "{}/{}".format(qgroup_level(offset), qgroup_subvid(offset)) if _type == UUID_KEY_SUBVOL or _type == UUID_KEY_RECEIVED_SUBVOL: return "0x{:0>16x}".format(offset) if _type == ROOT_ITEM_KEY: return _key_objectid_str_map.get(offset, str(offset)) if offset == ULLONG_MAX: return '-1' return str(offset) def flags_str(flags, flags_str_map): ret = [] for flag in sorted(flags_str_map.keys()): if flags & flag: ret.append(flags_str_map[flag]) if len(ret) == 0: ret.append("none") return '|'.join(ret) def embedded_text_for_str(text): try: return "utf-8 {}".format(text.decode('utf-8')) except UnicodeDecodeError: return "raw {}".format(repr(text)) # === Basic structures class TimeSpec(object): sstruct = struct.Struct('> 72 self._type = (self._key & ((1 << 72) - 1)) >> 64 self._offset = (self._key & ((1 << 64) - 1)) def __lt__(self, other): if isinstance(other, Key): return self._key < other._key return self._key < other def __le__(self, other): if isinstance(other, Key): return self._key <= other._key return self._key <= other def __eq__(self, other): if isinstance(other, Key): return self._key == other._key return self._key == other def __ge__(self, other): if isinstance(other, Key): return self._key >= other._key return self._key >= other def __gt__(self, other): if isinstance(other, Key): return self._key > other._key return self._key > other def __str__(self): return "({} {} {})".format( key_objectid_str(self._objectid, self._type), key_type_str(self._type), key_offset_str(self._offset, self._type), ) def __add__(self, amount): new_key = copy.copy(self) new_key.key += amount return new_key def __sub__(self, amount): new_key = copy.copy(self) new_key.key -= amount return new_key class DiskKey(Key): sstruct = struct.Struct(' cmp_item: high = mid elif key1 < cmp_item: low = mid + 1 else: return True, mid return False, low chunk_map_item = namedtuple('chunk_map_item', 'logical physical length devid') class FileSystem(object): def __init__(self, path, part_offset): self._chunk_map = OrderedDict() self.path = path self.part_offset = part_offset self.fd = open(path, 'rb') self.fd.seek(part_offset + 0x10000) # going to superblock sb_bytes = self.fd.read(superblock.size) sb_tuple = superblock.unpack(sb_bytes) if sb_tuple[3] != b'_BHRfS_M': raise "No signature found" # setting base FS information self.fsid = sb_tuple[0] self.nodesize = sb_tuple[14] self.sectorsize = sb_tuple[13] self._chunk_root = sb_tuple[6] self._chunk_root_level = sb_tuple[24] self._tree_roots_root = sb_tuple[5] self._tree_roots_root_level = sb_tuple[23] # setting chunk map sys_chunk_array_size = sb_tuple[17] sys_chunk = sb_tuple[25][:sys_chunk_array_size] pos = 0 while pos < sys_chunk_array_size: key = DiskKey(sys_chunk[pos:]) pos += DiskKey.sstruct.size chunk = Chunk(key, sys_chunk[pos:]) for st in chunk.stripes: self._insert_chunk(chunk_map_item(chunk.vaddr, st.offset, chunk.length, st.devid)) pos += chunk.size # setting tree roots _, fs_tree_root_item = self.search_tree(self._tree_roots_root_level, self._tree_roots_root, Key(FS_TREE_OBJECTID, ROOT_ITEM_KEY, 0)) _, extent_tree_root_item = self.search_tree(self._tree_roots_root_level, self._tree_roots_root, Key(EXTENT_TREE_OBJECTID, ROOT_ITEM_KEY, 0)) self._fs_root_level = fs_tree_root_item.level self._fs_root = fs_tree_root_item.bytenr self._extent_root_level = extent_tree_root_item.level self._extent_root = extent_tree_root_item.bytenr @property def chunk_root(self): return self._chunk_root_level, self._chunk_root @property def tree_roots_root(self): return self._tree_roots_root_level, self._tree_roots_root @property def fs_root(self): return self._fs_root_level, self._fs_root @property def extent_root(self): return self._extent_root_level, self._extent_root def logical_to_physical(self, log): cur_logical = next(iter(self._chunk_map)) # first item for logical, cmi in self._chunk_map.items(): if logical > log: break cur_logical = logical # if there is no address in chunk_map, searching in chunk_tree if cur_logical + self._chunk_map[cur_logical].length < log: def process_func(header, offset): for i in range(header.items_num): self.fd.seek(offset + i * LeafKey.sstruct.size) k = LeafKey(self.fd.read(LeafKey.sstruct.size)) self.fd.seek(offset + k.data_offset) if k.type == CHUNK_ITEM_KEY: item = _key_type_class_map[k.type](k, self.fd.read(k.data_size)) for st in item.stripes: self._insert_chunk(chunk_map_item(item.vaddr, st.offset, item.length, st.devid)) self.search_tree(self._chunk_root_level, self._chunk_root, Key(FIRST_CHUNK_TREE_OBJECTID, CHUNK_ITEM_KEY, log), process_func) cur_logical = next(iter(self._chunk_map)) # first item if cur_logical > log: raise Exception(f'Cannot translate address {log:#x}') for logical, cmi in self._chunk_map.items(): if logical > log: break cur_logical = logical if cur_logical + self._chunk_map[cur_logical].length < log: raise Exception(f'Cannot translate address {log:#x}') print('address translation: {:#x} -> {:#x}'.format(log, self._chunk_map[cur_logical].physical + log - cur_logical)) return self.part_offset + self._chunk_map[cur_logical].physical + log - cur_logical def search_tree(self, level, root_offset, key, process_node_func = None): for lvl in range(level, 0, -1): # inner node root_offset = self.logical_to_physical(root_offset) self.fd.seek(root_offset) header = NodeHeader._make(_node_header_struct.unpack(self.fd.read(_node_header_struct.size))) if header.level != lvl: raise Exception('Invalid inner node level') found, itemnr = key_bin_search( self.fd, root_offset + _node_header_struct.size, InnerKey.sstruct.size, key, 0, header.items_num ) # TODO: better understand this if not found and itemnr > 0: itemnr -= 1 self.fd.seek(root_offset + _node_header_struct.size + itemnr * InnerKey.sstruct.size) k = InnerKey(self.fd.read(InnerKey.sstruct.size)) root_offset = k.block_num else: # we are in leaf node root_offset = self.logical_to_physical(root_offset) self.fd.seek(root_offset) header = NodeHeader._make(_node_header_struct.unpack(self.fd.read(_node_header_struct.size))) if header.level != 0: raise Exception('Invalid leaf level') if process_node_func: process_node_func(header, root_offset + _node_header_struct.size) return root_offset found, itemnr = key_bin_search( self.fd, root_offset + _node_header_struct.size, LeafKey.sstruct.size, key, 0, header.items_num ) self.fd.seek(root_offset + _node_header_struct.size + itemnr * LeafKey.sstruct.size) k = LeafKey(self.fd.read(LeafKey.sstruct.size)) self.fd.seek(root_offset + _node_header_struct.size + k.data_offset) if k.type in _key_type_class_map: return k, _key_type_class_map[k.type](k, self.fd.read(k.data_size)) else: return k, False def print_node(self, header, offset): print(header) root_paddr = self.logical_to_physical(header.node_addr) key_size = InnerKey.sstruct.size if header.level > 0 else LeafKey.sstruct.size key_struct = InnerKey if header.level > 0 else LeafKey for i in range(header.items_num): self.fd.seek(root_paddr + _node_header_struct.size + i * key_size) k = key_struct(self.fd.read(key_size)) print(k) self.fd.seek(root_paddr + _node_header_struct.size + k.data_offset) if k.type in _key_type_class_map and header.level == 0: item = _key_type_class_map[k.type](k, self.fd.read(k.data_size)) print(item) if k.type == DIR_ITEM_KEY: for it in item: print(it) print('============================') def print_chunk_map(self): print('=== chunk map ===') for logical, cmi in self._chunk_map.items(): print(f'{cmi.logical:#x}..{cmi.logical+cmi.length:#x} -> {cmi.physical:#x}..{cmi.physical+cmi.length:#x}') print('=================') def _insert_chunk(self, chunk): if not chunk.logical in self._chunk_map: cm = dict(self._chunk_map) cm[chunk.logical] = chunk self._chunk_map = OrderedDict(sorted(cm.items())) _key_type_class_map = { INODE_ITEM_KEY: InodeItem, INODE_REF_KEY: InodeRef, DIR_ITEM_KEY: DirItemList, DIR_INDEX_KEY: DirIndex, EXTENT_DATA_KEY: FileExtentItem, ROOT_ITEM_KEY: RootItem, EXTENT_ITEM_KEY: ExtentItem, CHUNK_ITEM_KEY: Chunk, }