diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/ChangeLog b/reactos/drivers/fs/ntfs/linux-ntfs/ChangeLog new file mode 100644 index 00000000000..f6fcaf5cbdf --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/ChangeLog @@ -0,0 +1,802 @@ +ToDo: + - Find and fix bugs. + - Enable NFS exporting of NTFS. + - Implement aops->set_page_dirty() in order to take control of buffer + dirtying. Not having it means if page_has_buffers(), all buffers + will be dirtied with the page. And if not they won't be. That is + fine for the moment but will break once we enable metadata updates. + - Implement sops->dirty_inode() to implement {a,m,c} time updates and + such things. + - Implement sops->write_inode(). + - In between ntfs_prepare/commit_write, need exclusion between + simultaneous file extensions. Need perhaps an NInoResizeUnderway() + flag which we can set in ntfs_prepare_write() and clear again in + ntfs_commit_write(). Just have to be careful in readpage/writepage, + as well as in truncate, that we play nice... We might need to have + a data_size field in the ntfs_inode to store the real attribute + length. Also need to be careful with initialized_size extention in + ntfs_prepare_write. Basically, just be _very_ careful in this code... + OTOH, perhaps i_sem, which is held accross generic_file_write is + sufficient for synchronisation here. We then just need to make sure + ntfs_readpage/writepage/truncate interoperate properly with us. + +2.1.5 - Fix minor bug in attribute list attribute handling. + + - Fix bug in attribute list handling. Actually it is not as much a bug + as too much protection in that we were not allowing attribute lists + which waste space on disk while Windows XP clearly allows it and in + fact creates such attribute lists so our driver was failing. + - Update NTFS documentation ready for 2.6 kernel release. + +2.1.4 - Reduce compiler requirements. + + - Remove all uses of unnamed structs and unions in the driver to make + old and newer gcc versions happy. Makes it a bit uglier IMO but at + least people will stop hassling me about it. + +2.1.3 - Important bug fixes in corner cases. + + - super.c::parse_ntfs_boot_sector(): Correct the check for 64-bit + clusters. (Philipp Thomas) + - attrib.c::load_attribute_list(): Fix bug when initialized_size is a + multiple of the block_size but not the cluster size. (Szabolcs + Szakacsits ) + +2.1.2 - Important bug fixes aleviating the hangs in statfs. + + - Fix buggy free cluster and free inode determination logic. + +2.1.1 - Minor updates. + + - Add handling for initialized_size != data_size in compressed files. + - Reduce function local stack usage from 0x3d4 bytes to just noise in + fs/ntfs/upcase.c. (Randy Dunlap ) + - Remove compiler warnings for newer gcc. + +2.1.0 - First steps towards write support: implement file overwrite. + + - Add configuration option for developmental write support with an + appropriately scary configuration help text. + - Initial implementation of fs/ntfs/aops.c::ntfs_writepage() and its + helper fs/ntfs/aops.c::ntfs_write_block(). This enables mmap(2) based + overwriting of existing files on ntfs. Note: Resident files are + only written into memory, and not written out to disk at present, so + avoid writing to files smaller than about 1kiB. + - Initial implementation of fs/ntfs/aops.c::ntfs_prepare_write(), its + helper fs/ntfs/aops.c::ntfs_prepare_nonresident_write() and their + counterparts, fs/ntfs/aops.c::ntfs_commit_write(), and + fs/ntfs/aops.c::ntfs_commit_nonresident_write(), respectively. Also, + add generic_file_write() to the ntfs file operations (fs/ntfs/file.c). + This enables write(2) based overwriting of existing files on ntfs. + Note: As with mmap(2) based overwriting, resident files are only + written into memory, and not written out to disk at present, so avoid + writing to files smaller than about 1kiB. + - Implement ->truncate (fs/ntfs/inode.c::ntfs_truncate()) and + ->setattr() (fs/ntfs/inode.c::ntfs_setattr()) inode operations for + files with the purpose of intercepting and aborting all i_size + changes which we do not support yet. ntfs_truncate() actually only + emits a warning message but AFAICS our interception of i_size changes + elsewhere means ntfs_truncate() never gets called for i_size changes. + It is only called from generic_file_write() when we fail in + ntfs_prepare_{,nonresident_}write() in order to discard any + instantiated buffers beyond i_size. Thus i_size is not actually + changed so our warning message is enough. Unfortunately it is not + possible to easily determine if i_size is being changed or not hence + we just emit an appropriately worded error message. + +2.0.25 - Small bug fixes and cleanups. + + - Unlock the page in an out of memory error code path in + fs/ntfs/aops.c::ntfs_read_block(). + - If fs/ntfs/aops.c::ntfs_read_page() is called on an uptodate page, + just unlock the page and return. (This can happen due to ->writepage + clearing PageUptodate() during write out of MstProtected() + attributes. + - Remove leaked write code again. + +2.0.24 - Cleanups. + + - Treat BUG_ON() as ASSERT() not VERIFY(), i.e. do not use side effects + inside BUG_ON(). (Adam J. Richter) + - Split logical OR expressions inside BUG_ON() into individual BUG_ON() + calls for improved debugging. (Adam J. Richter) + - Add errors flag to the ntfs volume state, accessed via + NVol{,Set,Clear}Errors(vol). + - Do not allow read-write remounts of read-only volumes with errors. + - Clarify comment for ntfs file operation sendfile which was added by + Christoph Hellwig a while ago (just using generic_file_sendfile()) + to say that ntfs ->sendfile is only used for the case where the + source data is on the ntfs partition and the destination is + somewhere else, i.e. nothing we need to concern ourselves with. + - Add generic_file_write() as our ntfs file write operation. + +2.0.23 - Major bug fixes (races, deadlocks, non-i386 architectures). + + - Massive internal locking changes to mft record locking. Fixes lock + recursion and replaces the mrec_lock read/write semaphore with a + mutex. Also removes the now superfluous mft_count. This fixes several + race conditions and deadlocks, especially in the future write code. + - Fix ntfs over loopback for compressed files by adding an + optimization barrier. (gcc was screwing up otherwise ?) + - Miscellaneous cleanups all over the code and a fix or two in error + handling code paths. + Thanks go to Christoph Hellwig for pointing out the following two: + - Remove now unused function fs/ntfs/malloc.h::vmalloc_nofs(). + - Fix ntfs_free() for ia64 and parisc by checking for VMALLOC_END, too. + +2.0.22 - Cleanups, mainly to ntfs_readdir(), and use C99 initializers. + + - Change fs/ntfs/dir.c::ntfs_reddir() to only read/write ->f_pos once + at entry/exit respectively. + - Use C99 initializers for structures. + - Remove unused variable blocks from fs/ntfs/aops.c::ntfs_read_block(). + +2.0.21 - Check for, and refuse to work with too large files/directories/volumes. + + - Limit volume size at mount time to 2TiB on architectures where + unsigned long is 32-bits (fs/ntfs/super.c::parse_ntfs_boot_sector()). + This is the most we can do without overflowing the 32-bit limit of + the block device size imposed on us by sb_bread() and sb_getblk() + for the time being. + - Limit file/directory size at open() time to 16TiB on architectures + where unsigned long is 32-bits (fs/ntfs/file.c::ntfs_file_open() and + fs/ntfs/dir.c::ntfs_dir_open()). This is the most we can do without + overflowing the page cache page index. + +2.0.20 - Support non-resident directory index bitmaps, fix page leak in readdir. + + - Move the directory index bitmap to use an attribute inode instead of + having special fields for it inside the ntfs inode structure. This + means that the index bitmaps now use the page cache for i/o, too, + and also as a side effect we get support for non-resident index + bitmaps for free. + - Simplify/cleanup error handling in fs/ntfs/dir.c::ntfs_readdir() and + fix a page leak that manifested itself in some cases. + - Add fs/ntfs/inode.c::ntfs_put_inode(), which we need to release the + index bitmap inode on the final iput(). + +2.0.19 - Fix race condition, improvements, and optimizations in i/o interface. + + - Apply block optimization added to fs/ntfs/aops.c::ntfs_read_block() + to fs/ntfs/compress.c::ntfs_file_read_compressed_block() as well. + - Drop the "file" from ntfs_file_read_compressed_block(). + - Rename fs/ntfs/aops.c::ntfs_enb_buffer_read_async() to + ntfs_end_buffer_async_read() (more like the fs/buffer.c counterpart). + - Update ntfs_end_buffer_async_read() with the improved logic from + its updated counterpart fs/buffer.c::end_buffer_async_read(). Apply + further logic improvements to better determine when we set PageError. + - Update submission of buffers in fs/ntfs/aops.c::ntfs_read_block() to + check for the buffers being uptodate first in line with the updated + fs/buffer.c::block_read_full_page(). This plugs a small race + condition. + +2.0.18 - Fix race condition in reading of compressed files. + + - There was a narrow window between checking a buffer head for being + uptodate and locking it in ntfs_file_read_compressed_block(). We now + lock the buffer and then check whether it is uptodate or not. + +2.0.17 - Cleanups and optimizations - shrinking the ToDo list. + + - Modify fs/ntfs/inode.c::ntfs_read_locked_inode() to return an error + code and update callers, i.e. ntfs_iget(), to pass that error code + up instead of just using -EIO. + - Modifications to super.c to ensure that both mount and remount + cannot set any write related options when the driver is compiled + read-only. + - Optimize block resolution in fs/ntfs/aops.c::ntfs_read_block() to + cache the current run list element. This should improve performance + when reading very large and/or very fragmented data. + +2.0.16 - Convert access to $MFT/$BITMAP to attribute inode API. + + - Fix a stupid bug introduced in 2.0.15 where we were unmapping the + wrong inode in fs/ntfs/inode.c::ntfs_attr_iget(). + - Fix debugging check in fs/ntfs/aops.c::ntfs_read_block(). + - Convert $MFT/$BITMAP access to attribute inode API and remove all + remnants of the ugly mftbmp address space and operations hack. This + means we finally have only one readpage function as well as only one + async io completion handler. Yey! The mft bitmap is now just an + attribute inode and is accessed from vol->mftbmp_ino just as if it + were a normal file. Fake inodes rule. (-: + +2.0.15 - Fake inodes based attribute i/o via the pagecache, fixes and cleanups. + + - Fix silly bug in fs/ntfs/super.c::parse_options() which was causing + remounts to fail when the partition had an entry in /etc/fstab and + the entry specified the nls= option. + - Apply same macro magic used in fs/ntfs/inode.h to fs/ntfs/volume.h to + expand all the helper functions NVolFoo(), NVolSetFoo(), and + NVolClearFoo(). + - Move copyright statement from driver initialisation message to + module description (fs/super.c). This makes the initialisation + message fit on one line and fits in better with rest of kernel. + - Update fs/ntfs/attrib.c::map_run_list() to work on both real and + attribute inodes, and both for files and directories. + - Implement fake attribute inodes allowing all attribute i/o to go via + the page cache and to use all the normal vfs/mm functionality: + - Add ntfs_attr_iget() and its helper ntfs_read_locked_attr_inode() + to fs/ntfs/inode.c. + - Add needed cleanup code to ntfs_clear_big_inode(). + - Merge address space operations for files and directories (aops.c), + now just have ntfs_aops: + - Rename: + end_buffer_read_attr_async() -> ntfs_end_buffer_read_async(), + ntfs_attr_read_block() -> ntfs_read_block(), + ntfs_file_read_page() -> ntfs_readpage(). + - Rewrite fs/ntfs/aops.c::ntfs_readpage() to work on both real and + attribute inodes, and both for files and directories. + - Remove obsolete fs/ntfs/aops.c::ntfs_mst_readpage(). + +2.0.14 - Run list merging code cleanup, minor locking changes, typo fixes. + + - Change fs/ntfs/super.c::ntfs_statfs() to not rely on BKL by moving + the locking out of super.c::get_nr_free_mft_records() and taking and + dropping the mftbmp_lock rw_semaphore in ntfs_statfs() itself. + - Bring attribute run list merging code (fs/ntfs/attrib.c) in sync with + current userspace ntfs library code. This means that if a merge + fails the original run lists are always left unmodified instead of + being silently corrupted. + - Misc typo fixes. + +2.0.13 - Use iget5_locked() in preparation for fake inodes and small cleanups. + + - Remove nr_mft_bits and the now superfluous union with nr_mft_records + from ntfs_volume structure. + - Remove nr_lcn_bits and the now superfluous union with nr_clusters + from ntfs_volume structure. + - Use iget5_locked() and friends instead of conventional iget(). Wrap + the call in fs/ntfs/inode.c::ntfs_iget() and update callers of iget() + to use ntfs_iget(). Leave only one iget() call at mount time so we + don't need an ntfs_iget_mount(). + - Change fs/ntfs/inode.c::ntfs_new_extent_inode() to take mft_no as an + additional argument. + +2.0.12 - Initial cleanup of address space operations following 2.0.11 changes. + + - Merge fs/ntfs/aops.c::end_buffer_read_mst_async() and + fs/ntfs/aops.c::end_buffer_read_file_async() into one function + fs/ntfs/aops.c::end_buffer_read_attr_async() using NInoMstProtected() + to determine whether to apply mst fixups or not. + - Above change allows merging fs/ntfs/aops.c::ntfs_file_read_block() + and fs/ntfs/aops.c::ntfs_mst_readpage() into one function + fs/ntfs/aops.c::ntfs_attr_read_block(). Also, create a tiny wrapper + fs/ntfs/aops.c::ntfs_mst_readpage() to transform the parameters from + the VFS readpage function prototype to the ntfs_attr_read_block() + function prototype. + +2.0.11 - Initial preparations for fake inode based attribute i/o. + + - Move definition of ntfs_inode_state_bits to fs/ntfs/inode.h and + do some macro magic (adapted from include/linux/buffer_head.h) to + expand all the helper functions NInoFoo(), NInoSetFoo(), and + NInoClearFoo(). + - Add new flag to ntfs_inode_state_bits: NI_Sparse. + - Add new fields to ntfs_inode structure to allow use of fake inodes + for attribute i/o: type, name, name_len. Also add new state bits: + NI_Attr, which, if set, indicates the inode is a fake inode, and + NI_MstProtected, which, if set, indicates the attribute uses multi + sector transfer protection, i.e. fixups need to be applied after + reads and before/after writes. + - Rename fs/ntfs/inode.c::ntfs_{new,clear,destroy}_inode() to + ntfs_{new,clear,destroy}_extent_inode() and update callers. + - Use ntfs_clear_extent_inode() in fs/ntfs/inode.c::__ntfs_clear_inode() + instead of ntfs_destroy_extent_inode(). + - Cleanup memory deallocations in {__,}ntfs_clear_{,big_}inode(). + - Make all operations on ntfs inode state bits use the NIno* functions. + - Set up the new ntfs inode fields and state bits in + fs/ntfs/inode.c::ntfs_read_inode() and add appropriate cleanup of + allocated memory to __ntfs_clear_inode(). + - Cleanup ntfs_inode structure a bit for better ordering of elements + w.r.t. their size to allow better packing of the structure in memory. + +2.0.10 - There can only be 2^32 - 1 inodes on an NTFS volume. + + - Add check at mount time to verify that the number of inodes on the + volume does not exceed 2^32 - 1, which is the maximum allowed for + NTFS according to Microsoft. + - Change mft_no member of ntfs_inode structure to be unsigned long. + Update all users. This makes ntfs_inode->mft_no just a copy of struct + inode->i_ino. But we can't just always use struct inode->i_ino and + remove mft_no because extent inodes do not have an attached struct + inode. + +2.0.9 - Decompression engine now uses a single buffer and other cleanups. + + - Change decompression engine to use a single buffer protected by a + spin lock instead of per-CPU buffers. (Rusty Russell) + - Do not update cb_pos when handling a partial final page during + decompression of a sparse compression block, as the value is later + reset without being read/used. (Rusty Russell) + - Switch to using the new KM_BIO_SRC_IRQ for atomic kmap()s. (Andrew + Morton) + - Change buffer size in ntfs_readdir()/ntfs_filldir() to use + NLS_MAX_CHARSET_SIZE which makes the buffers almost 1kiB each but + it also makes everything safer so it is a good thing. + - Miscellaneous minor cleanups to comments. + +2.0.8 - Major updates for handling of case sensitivity and dcache aliasing. + + Big thanks go to Al Viro and other inhabitants of #kernel for investing + their time to discuss the case sensitivity and dcache aliasing issues. + + - Remove unused source file fs/ntfs/attraops.c. + - Remove show_inodes mount option(s), thus dropping support for + displaying of short file names. + - Remove deprecated mount option posix. + - Restore show_sys_files mount option. + - Add new mount option case_sensitive, to determine if the driver + treats file names as case sensitive or not. If case sensitive, create + file names in the POSIX namespace. Otherwise create file names in the + LONG/WIN32 namespace. Note, files remain accessible via their short + file name, if it exists. + - Remove really dumb logic bug in boot sector recovery code. + - Fix dcache aliasing issues wrt short/long file names via changes + to fs/ntfs/dir.c::ntfs_lookup_inode_by_name() and + fs/ntfs/namei.c::ntfs_lookup(): + - Add additional argument to ntfs_lookup_inode_by_name() in which we + return information about the matching file name if the case is not + matching or the match is a short file name. See comments above the + function definition for details. + - Change ntfs_lookup() to only create dcache entries for the correctly + cased file name and only for the WIN32 namespace counterpart of DOS + namespace file names. This ensures we have only one dentry per + directory and also removes all dcache aliasing issues between short + and long file names once we add write support. See comments above + function for details. + - Fix potential 1 byte overflow in fs/ntfs/unistr.c::ntfs_ucstonls(). + +2.0.7 - Minor cleanups and updates for changes in core kernel code. + + - Remove much of the NULL struct element initializers. + - Various updates to make compatible with recent kernels. + - Remove defines of MAX_BUF_PER_PAGE and include linux/buffer_head.h + in fs/ntfs/ntfs.h instead. + - Remove no longer needed KERNEL_VERSION checks. We are now in the + kernel proper so they are no longer needed. + +2.0.6 - Major bugfix to make compatible with other kernel changes. + + - Initialize the mftbmp address space properly now that there are more + fields in the struct address_space. This was leading to hangs and + oopses on umount since 2.5.12 because of changes to other parts of + the kernel. We probably want a kernel generic init_address_space() + function... + - Drop BKL from ntfs_readdir() after consultation with Al Viro. The + only caller of ->readdir() is vfs_readdir() which holds i_sem during + the call, and i_sem is sufficient protection against changes in the + directory inode (including ->i_size). + - Use generic_file_llseek() for directories (as opposed to + default_llseek()) as this downs i_sem instead of the BKL which is + what we now need for exclusion against ->f_pos changes considering we + no longer take the BKL in ntfs_readdir(). + +2.0.5 - Major bugfix. Buffer overflow in extent inode handling. + + - No need to set old blocksize in super.c::ntfs_fill_super() as the + VFS does so via invocation of deactivate_super() calling + fs->fill_super() calling block_kill_super() which does it. + - BKL moved from VFS into dir.c::ntfs_readdir(). (Linus Torvalds) + -> Do we really need it? I don't think so as we have exclusion on + the directory ntfs_inode rw_semaphore mrec_lock. We mmight have to + move the ->f_pos accesses under the mrec_lock though. Check this... + - Fix really, really, really stupid buffer overflow in extent inode + handling in mft.c::map_extent_mft_record(). + +2.0.4 - Cleanups and updates for kernel 2.5.11. + + - Add documentation on how to use the MD driver to be able to use NTFS + stripe and volume sets in Linux and generally cleanup documentation + a bit. + Remove all uses of kdev_t in favour of struct block_device *: + - Change compress.c::ntfs_file_read_compressed_block() to use + sb_getblk() instead of getblk(). + - Change super.c::ntfs_fill_super() to use bdev_hardsect_size() instead + of get_hardsect_size(). + - No need to get old blocksize in super.c::ntfs_fill_super() as + fs/super.c::get_sb_bdev() already does this. + - Set bh->b_bdev instead of bh->b_dev throughout aops.c. + +2.0.3 - Small bug fixes, cleanups, and performance improvements. + + - Remove some dead code from mft.c. + - Optimize readpage and read_block functions throughout aops.c so that + only initialized blocks are read. Non-initialized ones have their + buffer head mapped, zeroed, and set up to date, without scheduling + any i/o. Thanks to Al Viro for advice on how to avoid the device i/o. + Thanks go to Andrew Morton for spotting the below: + - Fix buglet in allocate_compression_buffers() error code path. + - Call flush_dcache_page() after modifying page cache page contents in + ntfs_file_readpage(). + - Check for existence of page buffers throughout aops.c before calling + create_empty_buffers(). This happens when an I/O error occurs and the + read is retried. (It also happens once writing is implemented so that + needed doing anyway but I had left it for later...) + - Don't BUG_ON() uptodate and/or mapped buffers throughout aops.c in + readpage and read_block functions. Reasoning same as above (i.e. I/O + error retries and future write code paths.) + +2.0.2 - Minor updates and cleanups. + + - Cleanup: rename mst.c::__post_read_mst_fixup to post_write_mst_fixup + and cleanup the code a bit, removing the unused size parameter. + - Change default fmask to 0177 and update documentation. + - Change attrib.c::get_attr_search_ctx() to return the search context + directly instead of taking the address of a pointer. A return value + of NULL means the allocation failed. Updated all callers + appropriately. + - Update to 2.5.9 kernel (preserving backwards compatibility) by + replacing all occurences of page->buffers with page_buffers(page). + - Fix minor bugs in run list merging, also minor cleanup. + - Updates to bootsector layout and mft mirror contents descriptions. + - Small bug fix in error detection in unistr.c and some cleanups. + - Grow name buffer allocations in unistr.c in aligned mutlipled of 64 + bytes. + +2.0.1 - Minor updates. + + - Make default umask correspond to documentation. + - Improve documentation. + - Set default mode to include execute bit. The {u,f,d}mask can be used + to take it away if desired. This allows binaries to be executed from + a mounted ntfs partition. + +2.0.0 - New version number. Remove TNG from the name. Now in the kernel. + + - Add kill_super, just keeping up with the vfs changes in the kernel. + - Repeat some changes from tng-0.0.8 that somehow got lost on the way + from the CVS import into BitKeeper. + - Begin to implement proper handling of allocated_size vs + initialized_size vs data_size (i.e. i_size). Done are + mft.c::ntfs_mft_readpage(), aops.c::end_buffer_read_index_async(), + and attrib.c::load_attribute_list(). + - Lock the run list in attrib.c::load_attribute_list() while using it. + - Fix memory leak in ntfs_file_read_compressed_block() and generally + clean up compress.c a little, removing some uncommented/unused debug + code. + - Tidy up dir.c a little bit. + - Don't bother getting the run list in inode.c::ntfs_read_inode(). + - Merge mft.c::ntfs_mft_readpage() and aops.c::ntfs_index_readpage() + creating aops.c::ntfs_mst_readpage(), improving the handling of + holes and overflow in the process and implementing the correct + equivalent of ntfs_file_get_block() in ntfs_mst_readpage() itself. + I am aiming for correctness at the moment. Modularisation can come + later. + - Rename aops.c::end_buffer_read_index_async() to + end_buffer_read_mst_async() and optimize the overflow checking and + handling. + - Use the host of the mftbmp address space mapping to hold the ntfs + volume. This is needed so the async i/o completion handler can + retrieve a pointer to the volume. Hopefully this will not cause + problems elsewhere in the kernel... Otherwise will need to use a + fake inode. + - Complete implementation of proper handling of allocated_size vs + initialized_size vs data_size (i.e. i_size) in whole driver. + Basically aops.c is now completely rewritten. + - Change NTFS driver name to just NTFS and set version number to 2.0.0 + to make a clear distinction from the old driver which is still on + version 1.1.22. + +tng-0.0.8 - 08/03/2002 - Now using BitKeeper, http://linux-ntfs.bkbits.net/ + + - Replace bdevname(sb->s_dev) with sb->s_id. + - Remove now superfluous new-line characters in all callers of + ntfs_debug(). + - Apply kludge in ntfs_read_inode(), setting i_nlink to 1 for + directories. Without this the "find" utility gets very upset which is + fair enough as Linux/Unix do not support directory hard links. + - Further run list merging work. (Richard Russon) + - Backwards compatibility for gcc-2.95. (Richard Russon) + - Update to kernel 2.5.5-pre1 and rediff the now tiny patch. + - Convert to new file system declaration using ->ntfs_get_sb() and + replacing ntfs_read_super() with ntfs_fill_super(). + - Set s_maxbytes to MAX_LFS_FILESIZE to avoid page cache page index + overflow on 32-bit architectures. + - Cleanup upcase loading code to use ntfs_(un)map_page(). + - Disable/reenable preemtion in critical sections of compession engine. + - Replace device size determination in ntfs_fill_super() with + sb->s_bdev->bd_inode->i_size (in bytes) and remove now superfluous + function super.c::get_nr_blocks(). + - Implement a mount time option (show_inodes) allowing choice of which + types of inode names readdir() returns and modify ntfs_filldir() + accordingly. There are several parameters to show_inodes: + system: system files + win32: long file names (including POSIX file names) [DEFAULT] + long: same as win32 + dos: short file names only (excluding POSIX file names) + short: same as dos + posix: same as both win32 and dos + all: all file names + Note that the options are additive, i.e. specifying: + -o show_inodes=system,show_inodes=win32,show_inodes=dos + is the same as specifying: + -o show_inodes=all + Note that the "posix" and "all" options will show all directory + names, BUT the link count on each directory inode entry is set to 1, + due to Linux not supporting directory hard links. This may well + confuse some userspace applications, since the directory names will + have the same inode numbers. Thus it is NOT advisable to use the + "posix" or "all" options. We provide them only for completeness sake. + - Add copies of allocated_size, initialized_size, and compressed_size to + the ntfs inode structure and set them up in + inode.c::ntfs_read_inode(). These reflect the unnamed data attribute + for files and the index allocation attribute for directories. + - Add copies of allocated_size and initialized_size to ntfs inode for + $BITMAP attribute of large directories and set them up in + inode.c::ntfs_read_inode(). + - Add copies of allocated_size and initialized_size to ntfs volume for + $BITMAP attribute of $MFT and set them up in + super.c::load_system_files(). + - Parse deprecated ntfs driver options (iocharset, show_sys_files, + posix, and utf8) and tell user what the new options to use are. Note + we still do support them but they will be removed with kernel 2.7.x. + - Change all occurences of integer long long printf formatting to hex + as printk() will not support long long integer format if/when the + div64 patch goes into the kernel. + - Make slab caches have stable names and change the names to what they + were intended to be. These changes are required/made possible by the + new slab cache name handling which removes the length limitation by + requiring the caller of kmem_cache_create() to supply a stable name + which is then referenced but not copied. + - Rename run_list structure to run_list_element and create a new + run_list structure containing a pointer to a run_list_element + structure and a read/write semaphore. Adapt all users of run lists + to new scheme and take and release the lock as needed. This fixes a + nasty race as the run_list changes even when inodes are locked for + reading and even when the inode isn't locked at all, so we really + needed the serialization. We use a semaphore rather than a spinlock + as memory allocations can sleep and doing everything GFP_ATOMIC + would be silly. + - Cleanup read_inode() removing all code checking for lowest_vcn != 0. + This can never happen due to the nature of lookup_attr() and how we + support attribute lists. If it did happen it would imply the inode + being corrupt. + - Check for lowest_vcn != 0 in ntfs_read_inode() and mark the inode as + bad if found. + - Update to 2.5.6-pre2 changes in struct address_space. + - Use parent_ino() when accessing d_parent inode number in dir.c. + - Import Sourceforge CVS repository into BitKeeper repository: + http://linux-ntfs.bkbits.net/ntfs-tng-2.5 + - Update fs/Makefile, fs/Config.help, fs/Config.in, and + Documentation/filesystems/ntfs.txt for NTFS TNG. + - Create kernel configuration option controlling whether debugging + is enabled or not. + - Add the required export of end_buffer_io_sync() from the patches + directory to the kernel code. + - Update inode.c::ntfs_show_options() with show_inodes mount option. + - Update errors mount option. + +tng-0.0.7 - 13/02/2002 - The driver is now feature complete for read-only! + + - Cleanup mft.c and it's debug/error output in particular. Fix a minor + bug in mapping of extent inodes. Update all the comments to fit all + the recent code changes. + - Modify vcn_to_lcn() to cope with entirely unmapped run lists. + - Cleanups in compress.c, mostly comments and folding help. + - Implement attrib.c::map_run_list() as a generic helper. + - Make compress.c::ntfs_file_read_compressed_block() use map_run_list() + thus making code shorter and enabling attribute list support. + - Cleanup incorrect use of [su]64 with %L printf format specifier in + all source files. Type casts to [unsigned] long long added to correct + the mismatches (important for architectures which have long long not + being 64 bits). + - Merge async io completion handlers for directory indexes and $MFT + data into one by setting the index_block_size{_bits} of the ntfs + inode for $MFT to the mft_record_size{_bits} of the ntfs_volume. + - Cleanup aops.c, update comments. + - Make ntfs_file_get_block() use map_run_list() so all files now + support attribute lists. + - Make ntfs_dir_readpage() almost verbatim copy of + block_read_full_page() by using ntfs_file_get_block() with only real + difference being the use of our own async io completion handler + rather than the default one, thus reducing the amount of code and + automatically enabling attribute list support for directory indices. + - Fix bug in load_attribute_list() - forgot to call brelse in error + code path. + - Change parameters to find_attr() and lookup_attr(). We no longer + pass in the upcase table and its length. These can be gotten from + ctx->ntfs_ino->vol->upcase{_len}. Update all callers. + - Cleanups in attrib.c. + - Implement merging of run lists, attrib.c::merge_run_lists() and its + helpers. (Richard Russon) + - Attribute lists part 2, attribute extents and multi part run lists: + enable proper support for LCN_RL_NOT_MAPPED and automatic mapping of + further run list parts via attrib.c::map_run_list(). + - Tiny endianness bug fix in decompress_mapping_pairs(). + +tng-0.0.6 - Encrypted directories, bug fixes, cleanups, debugging enhancements. + + - Enable encrypted directories. (Their index root is marked encrypted + to indicate that new files in that directory should be created + encrypted.) + - Fix bug in NInoBmpNonResident() macro. (Cut and paste error.) + - Enable $Extend system directory. Most (if not all) extended system + files do not have unnamed data attributes so ntfs_read_inode() had to + special case them but that is ok, as the special casing recovery + happens inside an error code path so there is zero slow down in the + normal fast path. The special casing is done by introducing a new + function inode.c::ntfs_is_extended_system_file() which checks if any + of the hard links in the inode point to $Extend as being their parent + directory and if they do we assume this is an extended system file. + - Create a sysctl/proc interface to allow {dis,en}abling of debug output + when compiled with -DDEBUG. Default is debug messages to be disabled. + To enable them, one writes a non-zero value to /proc/sys/fs/ntfs-debug + (if /proc is enabled) or uses sysctl(2) to effect the same (if sysctl + interface is enabled). Inspired by old ntfs driver. + - Add debug_msgs insmod/kernel boot parameter to set whether debug + messages are {dis,en}abled. This is useful to enable debug messages + during ntfs initialization and is the only way to activate debugging + when the sysctl interface is not enabled. + - Cleanup debug output in various places. + - Remove all dollar signs ($) from the source (except comments) to + enable compilation on architectures whose gcc compiler does not + support dollar signs in the names of variables/constants. Attribute + types now start with AT_ instead of $ and $I30 is now just I30. + - Cleanup ntfs_lookup() and add consistency check of sequence numbers. + - Load complete run list for $MFT/$BITMAP during mount and cleanup + access functions. This means we now cope with $MFT/$BITMAP being + spread accross several mft records. + - Disable modification of mft_zone_multiplier on remount. We can always + reenable this later on if we really want to, but we will need to make + sure we readjust the mft_zone size / layout accordingly. + +tng-0.0.5 - Modernize for 2.5.x and further in line-ing with Al Viro's comments. + + - Use sb_set_blocksize() instead of set_blocksize() and verify the + return value. + - Use sb_bread() instead of bread() throughout. + - Add index_vcn_size{_bits} to ntfs_inode structure to store the size + of a directory index block vcn. Apply resulting simplifications in + dir.c everywhere. + - Fix a small bug somewhere (but forgot what it was). + - Change ntfs_{debug,error,warning} to enable gcc to do type checking + on the printf-format parameter list and fix bugs reported by gcc + as a result. (Richard Russon) + - Move inode allocation strategy to Al's new stuff but maintain the + divorce of ntfs_inode from struct inode. To achieve this we have two + separate slab caches, one for big ntfs inodes containing a struct + inode and pure ntfs inodes and at the same time fix some faulty + error code paths in ntfs_read_inode(). + - Show mount options in proc (inode.c::ntfs_show_options()). + +tng-0.0.4 - Big changes, getting in line with Al Viro's comments. + + - Modified (un)map_mft_record functions to be common for read and write + case. To specify which is which, added extra parameter at front of + parameter list. Pass either READ or WRITE to this, each has the + obvious meaning. + - General cleanups to allow for easier folding in vi. + - attrib.c::decompress_mapping_pairs() now accepts the old run list + argument, and invokes attrib.c::merge_run_lists() to merge the old + and the new run lists. + - Removed attrib.c::find_first_attr(). + - Implemented loading of attribute list and complete run list for $MFT. + This means we now cope with $MFT being spread across several mft + records. + - Adapt to 2.5.2-pre9 and the changed create_empty_buffers() syntax. + - Adapt major/minor/kdev_t/[bk]devname stuff to new 2.5.x kernels. + - Make ntfs_volume be allocated via kmalloc() instead of using a slab + cache. There are too little ntfs_volume structures at any one time + to justify a private slab cache. + - Fix bogus kmap() use in async io completion. Now use kmap_atomic(). + Use KM_BIO_IRQ on advice from IRC/kernel... + - Use ntfs_map_page() in map_mft_record() and create ->readpage method + for reading $MFT (ntfs_mft_readpage). In the process create dedicated + address space operations (ntfs_mft_aops) for $MFT inode mapping. Also + removed the now superfluous exports from the kernel core patch. + - Fix a bug where kfree() was used insted of ntfs_free(). + - Change map_mft_record() to take ntfs_inode as argument instead of + vfs inode. Dito for unmap_mft_record(). Adapt all callers. + - Add pointer to ntfs_volume to ntfs_inode. + - Add mft record number and sequence number to ntfs_inode. Stop using + i_ino and i_generation for in-driver purposes. + - Implement attrib.c::merge_run_lists(). (Richard Russon) + - Remove use of proper inodes by extent inodes. Move i_ino and + i_generation to ntfs_inode to do this. Apply simplifications that + result and remove iget_no_wait(), etc. + - Pass ntfs_inode everywhere in the driver (used to be struct inode). + - Add reference counting in ntfs_inode for the ntfs inode itself and + for the mapped mft record. + - Extend mft record mapping so we can (un)map extent mft records (new + functions (un)map_extent_mft_record), and so mappings are reference + counted and don't have to happen twice if already mapped - just ref + count increases. + - Add -o iocharset as alias to -o nls for backwards compatibility. + - The latest core patch is now tiny. In fact just a single additional + export is necessary over the base kernel. + +tng-0.0.3 - Cleanups, enhancements, bug fixes. + + - Work on attrib.c::decompress_mapping_pairs() to detect base extents + and setup the run list appropriately using knowledge provided by the + sizes in the base attribute record. + - Balance the get_/put_attr_search_ctx() calls so we don't leak memory + any more. + - Introduce ntfs_malloc_nofs() and ntfs_free() to allocate/free a single + page or use vmalloc depending on the amount of memory requested. + - Cleanup error output. The __FUNCTION__ "(): " is now added + automatically. Introduced a new header file debug.h to support this + and also moved ntfs_debug() function into it. + - Make reading of compressed files more intelligent and especially get + rid of the vmalloc_nofs() from readpage(). This now uses per CPU + buffers (allocated at first mount with cluster size <= 4kiB and + deallocated on last umount with cluster size <= 4kiB), and + asynchronous io for the compressed data using a list of buffer heads. + Er, we use synchronous io as async io only works on whole pages + covered by buffers and not on individual buffer heads... + - Bug fix for reading compressed files with sparse compression blocks. + +tng-0.0.2 - Now handles larger/fragmented/compressed volumes/files/dirs. + + - Fixed handling of directories when cluster size exceeds index block + size. + - Hide DOS only name space directory entries from readdir() but allow + them in lookup(). This should fix the problem that Linux doesn't + support directory hard links, while still allowing access to entries + via their short file name. This also has the benefit of mimicking + what Windows users are used to, so it is the ideal solution. + - Implemented sync_page everywhere so no more hangs in D state when + waiting for a page. + - Stop using bforget() in favour of brelse(). + - Stop locking buffers unnecessarily. + - Implemented compressed files (inode->mapping contains uncompressed + data, raw compressed data is currently bread() into a vmalloc()ed + memory buffer). + - Enable compressed directories. (Their index root is marked compressed + to indicate that new files in that directory should be created + compressed.) + - Use vsnprintf rather than vsprintf in the ntfs_error and ntfs_warning + functions. (Thanks to Will Dyson for pointing this out.) + - Moved the ntfs_inode and ntfs_volume (the former ntfs_inode_info and + ntfs_sb_info) out of the common inode and super_block structures and + started using the generic_ip and generic_sbp pointers instead. This + makes ntfs entirely private with respect to the kernel tree. + - Detect compiler version and abort with error message if gcc less than + 2.96 is used. + - Fix bug in name comparison function in unistr.c. + - Implement attribute lists part 1, the infrastructure: search contexts + and operations, find_external_attr(), lookup_attr()) and make the + code use the infrastructure. + - Fix stupid buffer overflow bug that became apparent on larger run + list containing attributes. + - Fix bugs in readdir() that became apparent on larger directories. + + The driver is now really useful and survives the test + find . -type f -exec md5sum "{}" \; + without any error messages on a over 1GiB sized partition with >16k + files on it, including compressed files and directories and many files + and directories with attribute lists. + +tng-0.0.1 - The first useful version. + + - Added ntfs_lookup(). + - Added default upcase generation and handling. + - Added compile options to be shown on module init. + - Many bug fixes that were "hidden" before. + - Update to latest kernel. + - Added ntfs_readdir(). + - Added file operations for mmap(), read(), open() and llseek(). We just + use the generic ones. The whole point of going through implementing + readpage() methods and where possible get_block() call backs is that + this allows us to make use of the generic high level methods provided + by the kernel. + + The driver is now actually useful! Yey. (-: It undoubtedly has got bugs + though and it doesn't implement accesssing compressed files yet. Also, + accessing files with attribute list attributes is not implemented yet + either. But for small or simple file systems it should work and allow + you to list directories, use stat on directory entries and the file + system, open, read, mmap and llseek around in files. A big mile stone + has been reached! + +tng-0.0.0 - Initial version tag. + + Initial driver implementation. The driver can mount and umount simple + NTFS file systems (i.e. ones without attribute lists in the system + files). If the mount fails there might be problems in the error handling + code paths, so be warned. Otherwise it seems to be loading the system + files nicely and the mft record read mapping/unmapping seems to be + working nicely, too. Proof of inode metadata in the page cache and non- + resident file unnamed stream data in the page cache concepts is thus + complete. + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/Makefile b/reactos/drivers/fs/ntfs/linux-ntfs/Makefile new file mode 100644 index 00000000000..b6fa3030ac3 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/Makefile @@ -0,0 +1,16 @@ +# Rules for making the NTFS driver. + +obj-$(CONFIG_NTFS_FS) += ntfs.o + +ntfs-objs := aops.o attrib.o compress.o debug.o dir.o file.o inode.o mft.o \ + mst.o namei.o super.o sysctl.o time.o unistr.o upcase.o + +EXTRA_CFLAGS = -DNTFS_VERSION=\"2.1.5\" + +ifeq ($(CONFIG_NTFS_DEBUG),y) +EXTRA_CFLAGS += -DDEBUG +endif + +ifeq ($(CONFIG_NTFS_RW),y) +EXTRA_CFLAGS += -DNTFS_RW +endif diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/aops.c b/reactos/drivers/fs/ntfs/linux-ntfs/aops.c new file mode 100644 index 00000000000..e3b1c227cb7 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/aops.c @@ -0,0 +1,1794 @@ +/** + * aops.c - NTFS kernel address space operations and page cache handling. + * Part of the Linux-NTFS project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include + +#include "ntfs.h" + +/** + * ntfs_end_buffer_async_read - async io completion for reading attributes + * @bh: buffer head on which io is completed + * @uptodate: whether @bh is now uptodate or not + * + * Asynchronous I/O completion handler for reading pages belonging to the + * attribute address space of an inode. The inodes can either be files or + * directories or they can be fake inodes describing some attribute. + * + * If NInoMstProtected(), perform the post read mst fixups when all IO on the + * page has been completed and mark the page uptodate or set the error bit on + * the page. To determine the size of the records that need fixing up, we cheat + * a little bit by setting the index_block_size in ntfs_inode to the ntfs + * record size, and index_block_size_bits, to the log(base 2) of the ntfs + * record size. + */ +static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) +{ + static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED; + unsigned long flags; + struct buffer_head *tmp; + struct page *page; + ntfs_inode *ni; + int page_uptodate = 1; + + page = bh->b_page; + ni = NTFS_I(page->mapping->host); + + if (likely(uptodate)) { + s64 file_ofs; + + set_buffer_uptodate(bh); + + file_ofs = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); + /* Check for the current buffer head overflowing. */ + if (file_ofs + bh->b_size > ni->initialized_size) { + char *addr; + int ofs = 0; + + if (file_ofs < ni->initialized_size) + ofs = ni->initialized_size - file_ofs; + addr = kmap_atomic(page, KM_BIO_SRC_IRQ); + memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); + flush_dcache_page(page); + kunmap_atomic(addr, KM_BIO_SRC_IRQ); + } + } else { + clear_buffer_uptodate(bh); + ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %Lu.", + (unsigned long long)bh->b_blocknr); + SetPageError(page); + } + + spin_lock_irqsave(&page_uptodate_lock, flags); + clear_buffer_async_read(bh); + unlock_buffer(bh); + tmp = bh; + do { + if (!buffer_uptodate(tmp)) + page_uptodate = 0; + if (buffer_async_read(tmp)) { + if (likely(buffer_locked(tmp))) + goto still_busy; + /* Async buffers must be locked. */ + BUG(); + } + tmp = tmp->b_this_page; + } while (tmp != bh); + spin_unlock_irqrestore(&page_uptodate_lock, flags); + /* + * If none of the buffers had errors then we can set the page uptodate, + * but we first have to perform the post read mst fixups, if the + * attribute is mst protected, i.e. if NInoMstProteced(ni) is true. + */ + if (!NInoMstProtected(ni)) { + if (likely(page_uptodate && !PageError(page))) + SetPageUptodate(page); + } else { + char *addr; + unsigned int i, recs, nr_err; + u32 rec_size; + + rec_size = ni->itype.index.block_size; + recs = PAGE_CACHE_SIZE / rec_size; + addr = kmap_atomic(page, KM_BIO_SRC_IRQ); + for (i = nr_err = 0; i < recs; i++) { + if (likely(!post_read_mst_fixup((NTFS_RECORD*)(addr + + i * rec_size), rec_size))) + continue; + nr_err++; + ntfs_error(ni->vol->sb, "post_read_mst_fixup() failed, " + "corrupt %s record 0x%Lx. Run chkdsk.", + ni->mft_no ? "index" : "mft", + (long long)(((s64)page->index << + PAGE_CACHE_SHIFT >> + ni->itype.index.block_size_bits) + i)); + } + flush_dcache_page(page); + kunmap_atomic(addr, KM_BIO_SRC_IRQ); + if (likely(!PageError(page))) { + if (likely(!nr_err && recs)) { + if (likely(page_uptodate)) + SetPageUptodate(page); + } else { + ntfs_error(ni->vol->sb, "Setting page error, " + "index 0x%lx.", page->index); + SetPageError(page); + } + } + } + unlock_page(page); + return; +still_busy: + spin_unlock_irqrestore(&page_uptodate_lock, flags); + return; +} + +/** + * ntfs_read_block - fill a @page of an address space with data + * @page: page cache page to fill with data + * + * Fill the page @page of the address space belonging to the @page->host inode. + * We read each buffer asynchronously and when all buffers are read in, our io + * completion handler ntfs_end_buffer_read_async(), if required, automatically + * applies the mst fixups to the page before finally marking it uptodate and + * unlocking it. + * + * We only enforce allocated_size limit because i_size is checked for in + * generic_file_read(). + * + * Return 0 on success and -errno on error. + * + * Contains an adapted version of fs/buffer.c::block_read_full_page(). + */ +static int ntfs_read_block(struct page *page) +{ + VCN vcn; + LCN lcn; + ntfs_inode *ni; + ntfs_volume *vol; + run_list_element *rl; + struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; + sector_t iblock, lblock, zblock; + unsigned int blocksize, vcn_ofs; + int i, nr; + unsigned char blocksize_bits; + + ni = NTFS_I(page->mapping->host); + vol = ni->vol; + + blocksize_bits = VFS_I(ni)->i_blkbits; + blocksize = 1 << blocksize_bits; + + if (!page_has_buffers(page)) + create_empty_buffers(page, blocksize, 0); + bh = head = page_buffers(page); + if (unlikely(!bh)) { + unlock_page(page); + return -ENOMEM; + } + + iblock = page->index << (PAGE_CACHE_SHIFT - blocksize_bits); + lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits; + zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits; + +#ifdef DEBUG + if (unlikely(!ni->run_list.rl && !ni->mft_no && !NInoAttr(ni))) + panic("NTFS: $MFT/$DATA run list has been unmapped! This is a " + "very serious bug! Cannot continue..."); +#endif + + /* Loop through all the buffers in the page. */ + rl = NULL; + nr = i = 0; + do { + if (unlikely(buffer_uptodate(bh))) + continue; + if (unlikely(buffer_mapped(bh))) { + arr[nr++] = bh; + continue; + } + bh->b_bdev = vol->sb->s_bdev; + /* Is the block within the allowed limits? */ + if (iblock < lblock) { + BOOL is_retry = FALSE; + + /* Convert iblock into corresponding vcn and offset. */ + vcn = (VCN)iblock << blocksize_bits >> + vol->cluster_size_bits; + vcn_ofs = ((VCN)iblock << blocksize_bits) & + vol->cluster_size_mask; + if (!rl) { +lock_retry_remap: + down_read(&ni->run_list.lock); + rl = ni->run_list.rl; + } + if (likely(rl != NULL)) { + /* Seek to element containing target vcn. */ + while (rl->length && rl[1].vcn <= vcn) + rl++; + lcn = vcn_to_lcn(rl, vcn); + } else + lcn = (LCN)LCN_RL_NOT_MAPPED; + /* Successful remap. */ + if (lcn >= 0) { + /* Setup buffer head to correct block. */ + bh->b_blocknr = ((lcn << vol->cluster_size_bits) + + vcn_ofs) >> blocksize_bits; + set_buffer_mapped(bh); + /* Only read initialized data blocks. */ + if (iblock < zblock) { + arr[nr++] = bh; + continue; + } + /* Fully non-initialized data block, zero it. */ + goto handle_zblock; + } + /* It is a hole, need to zero it. */ + if (lcn == LCN_HOLE) + goto handle_hole; + /* If first try and run list unmapped, map and retry. */ + if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { + is_retry = TRUE; + /* + * Attempt to map run list, dropping lock for + * the duration. + */ + up_read(&ni->run_list.lock); + if (!map_run_list(ni, vcn)) + goto lock_retry_remap; + rl = NULL; + } + /* Hard error, zero out region. */ + SetPageError(page); + ntfs_error(vol->sb, "vcn_to_lcn(vcn = 0x%Lx) failed " + "with error code 0x%Lx%s.", + (long long)vcn, (long long)-lcn, + is_retry ? " even after retrying" : ""); + // FIXME: Depending on vol->on_errors, do something. + } + /* + * Either iblock was outside lblock limits or vcn_to_lcn() + * returned error. Just zero that portion of the page and set + * the buffer uptodate. + */ +handle_hole: + bh->b_blocknr = -1UL; + clear_buffer_mapped(bh); +handle_zblock: + memset(kmap(page) + i * blocksize, 0, blocksize); + flush_dcache_page(page); + kunmap(page); + set_buffer_uptodate(bh); + } while (i++, iblock++, (bh = bh->b_this_page) != head); + + /* Release the lock if we took it. */ + if (rl) + up_read(&ni->run_list.lock); + + /* Check we have at least one buffer ready for i/o. */ + if (nr) { + struct buffer_head *tbh; + + /* Lock the buffers. */ + for (i = 0; i < nr; i++) { + tbh = arr[i]; + lock_buffer(tbh); + tbh->b_end_io = ntfs_end_buffer_async_read; + set_buffer_async_read(tbh); + } + /* Finally, start i/o on the buffers. */ + for (i = 0; i < nr; i++) { + tbh = arr[i]; + if (likely(!buffer_uptodate(tbh))) + submit_bh(READ, tbh); + else + ntfs_end_buffer_async_read(tbh, 1); + } + return 0; + } + /* No i/o was scheduled on any of the buffers. */ + if (likely(!PageError(page))) + SetPageUptodate(page); + else /* Signal synchronous i/o error. */ + nr = -EIO; + unlock_page(page); + return nr; +} + +/** + * ntfs_readpage - fill a @page of a @file with data from the device + * @file: open file to which the page @page belongs or NULL + * @page: page cache page to fill with data + * + * For non-resident attributes, ntfs_readpage() fills the @page of the open + * file @file by calling the ntfs version of the generic block_read_full_page() + * function, ntfs_read_block(), which in turn creates and reads in the buffers + * associated with the page asynchronously. + * + * For resident attributes, OTOH, ntfs_readpage() fills @page by copying the + * data from the mft record (which at this stage is most likely in memory) and + * fills the remainder with zeroes. Thus, in this case, I/O is synchronous, as + * even if the mft record is not cached at this point in time, we need to wait + * for it to be read in before we can do the copy. + * + * Return 0 on success and -errno on error. + * + * WARNING: Do not make this function static! It is used by mft.c! + */ +int ntfs_readpage(struct file *file, struct page *page) +{ + s64 attr_pos; + ntfs_inode *ni, *base_ni; + char *addr; + attr_search_context *ctx; + MFT_RECORD *mrec; + u32 attr_len; + int err = 0; + + BUG_ON(!PageLocked(page)); + + /* + * This can potentially happen because we clear PageUptodate() during + * ntfs_writepage() of MstProtected() attributes. + */ + if (PageUptodate(page)) { + unlock_page(page); + return 0; + } + + ni = NTFS_I(page->mapping->host); + + if (NInoNonResident(ni)) { + /* + * Only unnamed $DATA attributes can be compressed or + * encrypted. + */ + if (ni->type == AT_DATA && !ni->name_len) { + /* If file is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + err = -EACCES; + goto err_out; + } + /* Compressed data streams are handled in compress.c. */ + if (NInoCompressed(ni)) + return ntfs_read_compressed_block(page); + } + /* Normal data stream. */ + return ntfs_read_block(page); + } + /* Attribute is resident, implying it is not compressed or encrypted. */ + if (!NInoAttr(ni)) + base_ni = ni; + else + base_ni = ni->ext.base_ntfs_ino; + + /* Map, pin, and lock the mft record. */ + mrec = map_mft_record(base_ni); + if (unlikely(IS_ERR(mrec))) { + err = PTR_ERR(mrec); + goto err_out; + } + ctx = get_attr_search_ctx(base_ni, mrec); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto unm_err_out; + } + if (unlikely(!lookup_attr(ni->type, ni->name, ni->name_len, + IGNORE_CASE, 0, NULL, 0, ctx))) { + err = -ENOENT; + goto put_unm_err_out; + } + + /* Starting position of the page within the attribute value. */ + attr_pos = page->index << PAGE_CACHE_SHIFT; + + /* The total length of the attribute value. */ + attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); + + addr = kmap(page); + /* Copy over in bounds data, zeroing the remainder of the page. */ + if (attr_pos < attr_len) { + u32 bytes = attr_len - attr_pos; + if (bytes > PAGE_CACHE_SIZE) + bytes = PAGE_CACHE_SIZE; + else if (bytes < PAGE_CACHE_SIZE) + memset(addr + bytes, 0, PAGE_CACHE_SIZE - bytes); + /* Copy the data to the page. */ + memcpy(addr, attr_pos + (char*)ctx->attr + + le16_to_cpu( + ctx->attr->data.resident.value_offset), bytes); + } else + memset(addr, 0, PAGE_CACHE_SIZE); + flush_dcache_page(page); + kunmap(page); + + SetPageUptodate(page); +put_unm_err_out: + put_attr_search_ctx(ctx); +unm_err_out: + unmap_mft_record(base_ni); +err_out: + unlock_page(page); + return err; +} + +#ifdef NTFS_RW + +/** + * ntfs_write_block - write a @page to the backing store + * @page: page cache page to write out + * + * This function is for writing pages belonging to non-resident, non-mst + * protected attributes to their backing store. + * + * For a page with buffers, map and write the dirty buffers asynchronously + * under page writeback. For a page without buffers, create buffers for the + * page, then proceed as above. + * + * If a page doesn't have buffers the page dirty state is definitive. If a page + * does have buffers, the page dirty state is just a hint, and the buffer dirty + * state is definitive. (A hint which has rules: dirty buffers against a clean + * page is illegal. Other combinations are legal and need to be handled. In + * particular a dirty page containing clean buffers for example.) + * + * Return 0 on success and -errno on error. + * + * Based on ntfs_read_block() and __block_write_full_page(). + */ +static int ntfs_write_block(struct page *page) +{ + VCN vcn; + LCN lcn; + sector_t block, dblock, iblock; + struct inode *vi; + ntfs_inode *ni; + ntfs_volume *vol; + run_list_element *rl; + struct buffer_head *bh, *head; + unsigned int blocksize, vcn_ofs; + int err; + BOOL need_end_writeback; + unsigned char blocksize_bits; + + vi = page->mapping->host; + ni = NTFS_I(vi); + vol = ni->vol; + + ntfs_debug("Entering for inode %li, attribute type 0x%x, page index " + "0x%lx.\n", vi->i_ino, ni->type, page->index); + + BUG_ON(!NInoNonResident(ni)); + BUG_ON(NInoMstProtected(ni)); + + blocksize_bits = vi->i_blkbits; + blocksize = 1 << blocksize_bits; + + if (!page_has_buffers(page)) { + BUG_ON(!PageUptodate(page)); + create_empty_buffers(page, blocksize, + (1 << BH_Uptodate) | (1 << BH_Dirty)); + } + bh = head = page_buffers(page); + if (unlikely(!bh)) { + ntfs_warning(vol->sb, "Error allocating page buffers. " + "Redirtying page so we try again later."); + /* + * Put the page back on mapping->dirty_pages, but leave its + * buffer's dirty state as-is. + */ + // FIXME: Once Andrew's -EAGAIN patch goes in, remove the + // __set_page_dirty_nobuffers(page) and return -EAGAIN instead + // of zero. + __set_page_dirty_nobuffers(page); + unlock_page(page); + return 0; + } + + /* NOTE: Different naming scheme to ntfs_read_block()! */ + + /* The first block in the page. */ + block = page->index << (PAGE_CACHE_SHIFT - blocksize_bits); + + /* The first out of bounds block for the data size. */ + dblock = (vi->i_size + blocksize - 1) >> blocksize_bits; + + /* The last (fully or partially) initialized block. */ + iblock = ni->initialized_size >> blocksize_bits; + + /* + * Be very careful. We have no exclusion from __set_page_dirty_buffers + * here, and the (potentially unmapped) buffers may become dirty at + * any time. If a buffer becomes dirty here after we've inspected it + * then we just miss that fact, and the page stays dirty. + * + * Buffers outside i_size may be dirtied by __set_page_dirty_buffers; + * handle that here by just cleaning them. + */ + + /* + * Loop through all the buffers in the page, mapping all the dirty + * buffers to disk addresses and handling any aliases from the + * underlying block device's mapping. + */ + rl = NULL; + err = 0; + do { + BOOL is_retry = FALSE; + + if (unlikely(block >= dblock)) { + /* + * Mapped buffers outside i_size will occur, because + * this page can be outside i_size when there is a + * truncate in progress. The contents of such buffers + * were zeroed by ntfs_writepage(). + * + * FIXME: What about the small race window where + * ntfs_writepage() has not done any clearing because + * the page was within i_size but before we get here, + * vmtruncate() modifies i_size? + */ + clear_buffer_dirty(bh); + set_buffer_uptodate(bh); + continue; + } + + /* Clean buffers are not written out, so no need to map them. */ + if (!buffer_dirty(bh)) + continue; + + /* Make sure we have enough initialized size. */ + if (unlikely((block >= iblock) && + (ni->initialized_size < vi->i_size))) { + /* + * If this page is fully outside initialized size, zero + * out all pages between the current initialized size + * and the current page. Just use ntfs_readpage() to do + * the zeroing transparently. + */ + if (block > iblock) { + // TODO: + // For each page do: + // - read_cache_page() + // Again for each page do: + // - wait_on_page_locked() + // - Check (PageUptodate(page) && + // !PageError(page)) + // Update initialized size in the attribute and + // in the inode. + // Again, for each page do: + // __set_page_dirty_buffers(); + // page_cache_release() + // We don't need to wait on the writes. + // Update iblock. + } + /* + * The current page straddles initialized size. Zero + * all non-uptodate buffers and set them uptodate (and + * dirty?). Note, there aren't any non-uptodate buffers + * if the page is uptodate. + * FIXME: For an uptodate page, the buffers may need to + * be written out because they were not initialized on + * disk before. + */ + if (!PageUptodate(page)) { + // TODO: + // Zero any non-uptodate buffers up to i_size. + // Set them uptodate and dirty. + } + // TODO: + // Update initialized size in the attribute and in the + // inode (up to i_size). + // Update iblock. + // FIXME: This is inefficient. Try to batch the two + // size changes to happen in one go. + ntfs_error(vol->sb, "Writing beyond initialized size " + "is not supported yet. Sorry."); + err = -EOPNOTSUPP; + break; + // Do NOT set_buffer_new() BUT DO clear buffer range + // outside write request range. + // set_buffer_uptodate() on complete buffers as well as + // set_buffer_dirty(). + } + + /* No need to map buffers that are already mapped. */ + if (buffer_mapped(bh)) + continue; + + /* Unmapped, dirty buffer. Need to map it. */ + bh->b_bdev = vol->sb->s_bdev; + + /* Convert block into corresponding vcn and offset. */ + vcn = (VCN)block << blocksize_bits >> vol->cluster_size_bits; + vcn_ofs = ((VCN)block << blocksize_bits) & + vol->cluster_size_mask; + if (!rl) { +lock_retry_remap: + down_read(&ni->run_list.lock); + rl = ni->run_list.rl; + } + if (likely(rl != NULL)) { + /* Seek to element containing target vcn. */ + while (rl->length && rl[1].vcn <= vcn) + rl++; + lcn = vcn_to_lcn(rl, vcn); + } else + lcn = (LCN)LCN_RL_NOT_MAPPED; + /* Successful remap. */ + if (lcn >= 0) { + /* Setup buffer head to point to correct block. */ + bh->b_blocknr = ((lcn << vol->cluster_size_bits) + + vcn_ofs) >> blocksize_bits; + set_buffer_mapped(bh); + continue; + } + /* It is a hole, need to instantiate it. */ + if (lcn == LCN_HOLE) { + // TODO: Instantiate the hole. + // clear_buffer_new(bh); + // unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); + ntfs_error(vol->sb, "Writing into sparse regions is " + "not supported yet. Sorry."); + err = -EOPNOTSUPP; + break; + } + /* If first try and run list unmapped, map and retry. */ + if (!is_retry && lcn == LCN_RL_NOT_MAPPED) { + is_retry = TRUE; + /* + * Attempt to map run list, dropping lock for + * the duration. + */ + up_read(&ni->run_list.lock); + err = map_run_list(ni, vcn); + if (likely(!err)) + goto lock_retry_remap; + rl = NULL; + } + /* Failed to map the buffer, even after retrying. */ + bh->b_blocknr = -1UL; + ntfs_error(vol->sb, "vcn_to_lcn(vcn = 0x%Lx) failed " + "with error code 0x%Lx%s.", + (long long)vcn, (long long)-lcn, + is_retry ? " even after retrying" : ""); + // FIXME: Depending on vol->on_errors, do something. + if (!err) + err = -EIO; + break; + } while (block++, (bh = bh->b_this_page) != head); + + /* Release the lock if we took it. */ + if (rl) + up_read(&ni->run_list.lock); + + /* For the error case, need to reset bh to the beginning. */ + bh = head; + + /* Just an optimization, so ->readpage() isn't called later. */ + if (unlikely(!PageUptodate(page))) { + int uptodate = 1; + do { + if (!buffer_uptodate(bh)) { + uptodate = 0; + bh = head; + break; + } + } while ((bh = bh->b_this_page) != head); + if (uptodate) + SetPageUptodate(page); + } + + /* Setup all mapped, dirty buffers for async write i/o. */ + do { + get_bh(bh); + if (buffer_mapped(bh) && buffer_dirty(bh)) { + lock_buffer(bh); + if (test_clear_buffer_dirty(bh)) { + BUG_ON(!buffer_uptodate(bh)); + mark_buffer_async_write(bh); + } else + unlock_buffer(bh); + } else if (unlikely(err)) { + /* + * For the error case. The buffer may have been set + * dirty during attachment to a dirty page. + */ + if (err != -ENOMEM) + clear_buffer_dirty(bh); + } + } while ((bh = bh->b_this_page) != head); + + if (unlikely(err)) { + // TODO: Remove the -EOPNOTSUPP check later on... + if (unlikely(err == -EOPNOTSUPP)) + err = 0; + else if (err == -ENOMEM) { + ntfs_warning(vol->sb, "Error allocating memory. " + "Redirtying page so we try again " + "later."); + /* + * Put the page back on mapping->dirty_pages, but + * leave its buffer's dirty state as-is. + */ + // FIXME: Once Andrew's -EAGAIN patch goes in, remove + // the __set_page_dirty_nobuffers(page) and set err to + // -EAGAIN instead of zero. + __set_page_dirty_nobuffers(page); + err = 0; + } else + SetPageError(page); + } + + BUG_ON(PageWriteback(page)); + SetPageWriteback(page); /* Keeps try_to_free_buffers() away. */ + unlock_page(page); + + /* + * Submit the prepared buffers for i/o. Note the page is unlocked, + * and the async write i/o completion handler can end_page_writeback() + * at any time after the *first* submit_bh(). So the buffers can then + * disappear... + */ + need_end_writeback = TRUE; + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + submit_bh(WRITE, bh); + need_end_writeback = FALSE; + } + put_bh(bh); + bh = next; + } while (bh != head); + + /* If no i/o was started, need to end_page_writeback(). */ + if (unlikely(need_end_writeback)) + end_page_writeback(page); + + ntfs_debug("Done."); + return err; +} + +/** + * ntfs_writepage - write a @page to the backing store + * @page: page cache page to write out + * + * For non-resident attributes, ntfs_writepage() writes the @page by calling + * the ntfs version of the generic block_write_full_page() function, + * ntfs_write_block(), which in turn if necessary creates and writes the + * buffers associated with the page asynchronously. + * + * For resident attributes, OTOH, ntfs_writepage() writes the @page by copying + * the data to the mft record (which at this stage is most likely in memory). + * Thus, in this case, I/O is synchronous, as even if the mft record is not + * cached at this point in time, we need to wait for it to be read in before we + * can do the copy. + * + * Note the caller clears the page dirty flag before calling ntfs_writepage(). + * + * Based on ntfs_readpage() and fs/buffer.c::block_write_full_page(). + * + * Return 0 on success and -errno on error. + */ +static int ntfs_writepage(struct page *page, struct writeback_control *wbc) +{ + s64 attr_pos; + struct inode *vi; + ntfs_inode *ni, *base_ni; + char *kaddr; + attr_search_context *ctx; + MFT_RECORD *m; + u32 attr_len, bytes; + int err; + + BUG_ON(!PageLocked(page)); + + vi = page->mapping->host; + + /* Is the page fully outside i_size? (truncate in progress) */ + if (unlikely(page->index >= (vi->i_size + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT)) { + unlock_page(page); + ntfs_debug("Write outside i_size - truncated?"); + return 0; + } + + ni = NTFS_I(vi); + + if (NInoNonResident(ni)) { + /* + * Only unnamed $DATA attributes can be compressed, encrypted, + * and/or sparse. + */ + if (ni->type == AT_DATA && !ni->name_len) { + /* If file is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + unlock_page(page); + ntfs_debug("Denying write access to encrypted " + "file."); + return -EACCES; + } + /* Compressed data streams are handled in compress.c. */ + if (NInoCompressed(ni)) { + // TODO: Implement and replace this check with + // return ntfs_write_compressed_block(page); + unlock_page(page); + ntfs_error(vi->i_sb, "Writing to compressed " + "files is not supported yet. " + "Sorry."); + return -EOPNOTSUPP; + } + // TODO: Implement and remove this check. + if (NInoSparse(ni)) { + unlock_page(page); + ntfs_error(vi->i_sb, "Writing to sparse files " + "is not supported yet. Sorry."); + return -EOPNOTSUPP; + } + } + + /* We have to zero every time due to mmap-at-end-of-file. */ + if (page->index >= (vi->i_size >> PAGE_CACHE_SHIFT)) { + /* The page straddles i_size. */ + unsigned int ofs = vi->i_size & ~PAGE_CACHE_MASK; + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + } + + // TODO: Implement and remove this check. + if (NInoMstProtected(ni)) { + unlock_page(page); + ntfs_error(vi->i_sb, "Writing to MST protected " + "attributes is not supported yet. " + "Sorry."); + return -EOPNOTSUPP; + } + + /* Normal data stream. */ + return ntfs_write_block(page); + } + + /* + * Attribute is resident, implying it is not compressed, encrypted, or + * mst protected. + */ + BUG_ON(page_has_buffers(page)); + BUG_ON(!PageUptodate(page)); + + // TODO: Consider using PageWriteback() + unlock_page() in 2.5 once the + // "VM fiddling has ended". Note, don't forget to replace all the + // unlock_page() calls further below with end_page_writeback() ones. + // FIXME: Make sure it is ok to SetPageError() on unlocked page under + // writeback before doing the change! +#if 0 + SetPageWriteback(page); + unlock_page(page); +#endif + + if (!NInoAttr(ni)) + base_ni = ni; + else + base_ni = ni->ext.base_ntfs_ino; + + /* Map, pin, and lock the mft record. */ + m = map_mft_record(base_ni); + if (unlikely(IS_ERR(m))) { + err = PTR_ERR(m); + m = NULL; + ctx = NULL; + goto err_out; + } + ctx = get_attr_search_ctx(base_ni, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto err_out; + } + if (unlikely(!lookup_attr(ni->type, ni->name, ni->name_len, + IGNORE_CASE, 0, NULL, 0, ctx))) { + err = -ENOENT; + goto err_out; + } + + /* Starting position of the page within the attribute value. */ + attr_pos = page->index << PAGE_CACHE_SHIFT; + + /* The total length of the attribute value. */ + attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); + + if (unlikely(vi->i_size != attr_len)) { + ntfs_error(vi->i_sb, "BUG()! i_size (0x%Lx) doesn't match " + "attr_len (0x%x). Aborting write.", vi->i_size, + attr_len); + err = -EIO; + goto err_out; + } + if (unlikely(attr_pos >= attr_len)) { + ntfs_error(vi->i_sb, "BUG()! attr_pos (0x%Lx) > attr_len (0x%x)" + ". Aborting write.", attr_pos, attr_len); + err = -EIO; + goto err_out; + } + + bytes = attr_len - attr_pos; + if (unlikely(bytes > PAGE_CACHE_SIZE)) + bytes = PAGE_CACHE_SIZE; + + /* + * Here, we don't need to zero the out of bounds area everytime because + * the below memcpy() already takes care of the mmap-at-end-of-file + * requirements. If the file is converted to a non-resident one, then + * the code path use is switched to the non-resident one where the + * zeroing happens on each ntfs_writepage() invocation. + * + * The above also applies nicely when i_size is decreased. + * + * When i_size is increased, the memory between the old and new i_size + * _must_ be zeroed (or overwritten with new data). Otherwise we will + * expose data to userspace/disk which should never have been exposed. + * + * FIXME: Ensure that i_size increases do the zeroing/overwriting and + * if we cannot guarantee that, then enable the zeroing below. + */ + + kaddr = kmap_atomic(page, KM_USER0); + /* Copy the data from the page to the mft record. */ + memcpy((u8*)ctx->attr + le16_to_cpu( + ctx->attr->data.resident.value_offset) + attr_pos, + kaddr, bytes); + flush_dcache_mft_record_page(ctx->ntfs_ino); +#if 0 + /* Zero out of bounds area. */ + if (likely(bytes < PAGE_CACHE_SIZE)) { + memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); + flush_dcache_page(page); + } +#endif + kunmap_atomic(kaddr, KM_USER0); + + unlock_page(page); + + // TODO: Mark mft record dirty so it gets written back. + ntfs_error(vi->i_sb, "Writing to resident files is not supported yet. " + "Wrote to memory only..."); + + put_attr_search_ctx(ctx); + unmap_mft_record(base_ni); + return 0; +err_out: + if (err == -ENOMEM) { + ntfs_warning(vi->i_sb, "Error allocating memory. Redirtying " + "page so we try again later."); + /* + * Put the page back on mapping->dirty_pages, but leave its + * buffer's dirty state as-is. + */ + // FIXME: Once Andrew's -EAGAIN patch goes in, remove the + // __set_page_dirty_nobuffers(page) and set err to -EAGAIN + // instead of zero. + __set_page_dirty_nobuffers(page); + err = 0; + } else { + ntfs_error(vi->i_sb, "Resident attribute write failed with " + "error %i. Setting page error flag.", -err); + SetPageError(page); + } + unlock_page(page); + if (ctx) + put_attr_search_ctx(ctx); + if (m) + unmap_mft_record(base_ni); + return err; +} + +/** + * ntfs_prepare_nonresident_write - + * + */ +static int ntfs_prepare_nonresident_write(struct page *page, + unsigned from, unsigned to) +{ + VCN vcn; + LCN lcn; + sector_t block, ablock, iblock; + struct inode *vi; + ntfs_inode *ni; + ntfs_volume *vol; + run_list_element *rl; + struct buffer_head *bh, *head, *wait[2], **wait_bh = wait; + unsigned int vcn_ofs, block_start, block_end, blocksize; + int err; + BOOL is_retry; + unsigned char blocksize_bits; + + vi = page->mapping->host; + ni = NTFS_I(vi); + vol = ni->vol; + + ntfs_debug("Entering for inode %li, attribute type 0x%x, page index " + "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type, + page->index, from, to); + + BUG_ON(!NInoNonResident(ni)); + BUG_ON(NInoMstProtected(ni)); + + blocksize_bits = vi->i_blkbits; + blocksize = 1 << blocksize_bits; + + /* + * create_empty_buffers() will create uptodate/dirty buffers if the + * page is uptodate/dirty. + */ + if (!page_has_buffers(page)) + create_empty_buffers(page, blocksize, 0); + bh = head = page_buffers(page); + if (unlikely(!bh)) + return -ENOMEM; + + /* The first block in the page. */ + block = page->index << (PAGE_CACHE_SHIFT - blocksize_bits); + + /* + * The first out of bounds block for the allocated size. No need to + * round up as allocated_size is in multiples of cluster size and the + * minimum cluster size is 512 bytes, which is equal to the smallest + * blocksize. + */ + ablock = ni->allocated_size >> blocksize_bits; + + /* The last (fully or partially) initialized block. */ + iblock = ni->initialized_size >> blocksize_bits; + + /* Loop through all the buffers in the page. */ + block_start = 0; + rl = NULL; + err = 0; + do { + block_end = block_start + blocksize; + /* + * If buffer @bh is outside the write, just mark it uptodate + * if the page is uptodate and continue with the next buffer. + */ + if (block_end <= from || block_start >= to) { + if (PageUptodate(page)) { + if (!buffer_uptodate(bh)) + set_buffer_uptodate(bh); + } + continue; + } + /* + * @bh is at least partially being written to. + * Make sure it is not marked as new. + */ + //if (buffer_new(bh)) + // clear_buffer_new(bh); + + if (block >= ablock) { + // TODO: block is above allocated_size, need to + // allocate it. Best done in one go to accommodate not + // only block but all above blocks up to and including: + // ((page->index << PAGE_CACHE_SHIFT) + to + blocksize + // - 1) >> blobksize_bits. Obviously will need to round + // up to next cluster boundary, too. This should be + // done with a helper function, so it can be reused. + ntfs_error(vol->sb, "Writing beyond allocated size " + "is not supported yet. Sorry."); + err = -EOPNOTSUPP; + goto err_out; + // Need to update ablock. + // Need to set_buffer_new() on all block bhs that are + // newly allocated. + } + /* + * Now we have enough allocated size to fulfill the whole + * request, i.e. block < ablock is true. + */ + if (unlikely((block >= iblock) && + (ni->initialized_size < vi->i_size))) { + /* + * If this page is fully outside initialized size, zero + * out all pages between the current initialized size + * and the current page. Just use ntfs_readpage() to do + * the zeroing transparently. + */ + if (block > iblock) { + // TODO: + // For each page do: + // - read_cache_page() + // Again for each page do: + // - wait_on_page_locked() + // - Check (PageUptodate(page) && + // !PageError(page)) + // Update initialized size in the attribute and + // in the inode. + // Again, for each page do: + // __set_page_dirty_buffers(); + // page_cache_release() + // We don't need to wait on the writes. + // Update iblock. + } + /* + * The current page straddles initialized size. Zero + * all non-uptodate buffers and set them uptodate (and + * dirty?). Note, there aren't any non-uptodate buffers + * if the page is uptodate. + * FIXME: For an uptodate page, the buffers may need to + * be written out because they were not initialized on + * disk before. + */ + if (!PageUptodate(page)) { + // TODO: + // Zero any non-uptodate buffers up to i_size. + // Set them uptodate and dirty. + } + // TODO: + // Update initialized size in the attribute and in the + // inode (up to i_size). + // Update iblock. + // FIXME: This is inefficient. Try to batch the two + // size changes to happen in one go. + ntfs_error(vol->sb, "Writing beyond initialized size " + "is not supported yet. Sorry."); + err = -EOPNOTSUPP; + goto err_out; + // Do NOT set_buffer_new() BUT DO clear buffer range + // outside write request range. + // set_buffer_uptodate() on complete buffers as well as + // set_buffer_dirty(). + } + + /* Need to map unmapped buffers. */ + if (!buffer_mapped(bh)) { + /* Unmapped buffer. Need to map it. */ + bh->b_bdev = vol->sb->s_bdev; + + /* Convert block into corresponding vcn and offset. */ + vcn = (VCN)block << blocksize_bits >> + vol->cluster_size_bits; + vcn_ofs = ((VCN)block << blocksize_bits) & + vol->cluster_size_mask; + + is_retry = FALSE; + if (!rl) { +lock_retry_remap: + down_read(&ni->run_list.lock); + rl = ni->run_list.rl; + } + if (likely(rl != NULL)) { + /* Seek to element containing target vcn. */ + while (rl->length && rl[1].vcn <= vcn) + rl++; + lcn = vcn_to_lcn(rl, vcn); + } else + lcn = (LCN)LCN_RL_NOT_MAPPED; + if (unlikely(lcn < 0)) { + /* + * We extended the attribute allocation above. + * If we hit an ENOENT here it means that the + * allocation was insufficient which is a bug. + */ + BUG_ON(lcn == LCN_ENOENT); + + /* It is a hole, need to instantiate it. */ + if (lcn == LCN_HOLE) { + // TODO: Instantiate the hole. + // clear_buffer_new(bh); + // unmap_underlying_metadata(bh->b_bdev, + // bh->b_blocknr); + // For non-uptodate buffers, need to + // zero out the region outside the + // request in this bh or all bhs, + // depending on what we implemented + // above. + // Need to flush_dcache_page(). + // Or could use set_buffer_new() + // instead? + ntfs_error(vol->sb, "Writing into " + "sparse regions is " + "not supported yet. " + "Sorry."); + err = -EOPNOTSUPP; + goto err_out; + } else if (!is_retry && + lcn == LCN_RL_NOT_MAPPED) { + is_retry = TRUE; + /* + * Attempt to map run list, dropping + * lock for the duration. + */ + up_read(&ni->run_list.lock); + err = map_run_list(ni, vcn); + if (likely(!err)) + goto lock_retry_remap; + rl = NULL; + } + /* + * Failed to map the buffer, even after + * retrying. + */ + bh->b_blocknr = -1UL; + ntfs_error(vol->sb, "vcn_to_lcn(vcn = 0x%Lx) " + "failed with error code " + "0x%Lx%s.", (long long)vcn, + (long long)-lcn, is_retry ? + " even after retrying" : ""); + // FIXME: Depending on vol->on_errors, do + // something. + if (!err) + err = -EIO; + goto err_out; + } + /* We now have a successful remap, i.e. lcn >= 0. */ + + /* Setup buffer head to correct block. */ + bh->b_blocknr = ((lcn << vol->cluster_size_bits) + + vcn_ofs) >> blocksize_bits; + set_buffer_mapped(bh); + + // FIXME: Something analogous to this is needed for + // each newly allocated block, i.e. BH_New. + // FIXME: Might need to take this out of the + // if (!buffer_mapped(bh)) {}, depending on how we + // implement things during the allocated_size and + // initialized_size extension code above. + if (buffer_new(bh)) { + clear_buffer_new(bh); + unmap_underlying_metadata(bh->b_bdev, + bh->b_blocknr); + if (PageUptodate(page)) { + set_buffer_uptodate(bh); + continue; + } + /* + * Page is _not_ uptodate, zero surrounding + * region. NOTE: This is how we decide if to + * zero or not! + */ + if (block_end > to || block_start < from) { + void *kaddr; + + kaddr = kmap_atomic(page, KM_USER0); + if (block_end > to) + memset(kaddr + to, 0, + block_end - to); + if (block_start < from) + memset(kaddr + block_start, 0, + from - + block_start); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + } + continue; + } + } + /* @bh is mapped, set it uptodate if the page is uptodate. */ + if (PageUptodate(page)) { + if (!buffer_uptodate(bh)) + set_buffer_uptodate(bh); + continue; + } + /* + * The page is not uptodate. The buffer is mapped. If it is not + * uptodate, and it is only partially being written to, we need + * to read the buffer in before the write, i.e. right now. + */ + if (!buffer_uptodate(bh) && + (block_start < from || block_end > to)) { + ll_rw_block(READ, 1, &bh); + *wait_bh++ = bh; + } + } while (block++, block_start = block_end, + (bh = bh->b_this_page) != head); + + /* Release the lock if we took it. */ + if (rl) { + up_read(&ni->run_list.lock); + rl = NULL; + } + + /* If we issued read requests, let them complete. */ + while (wait_bh > wait) { + wait_on_buffer(*--wait_bh); + if (!buffer_uptodate(*wait_bh)) + return -EIO; + } + + ntfs_debug("Done."); + return 0; +err_out: + /* + * Zero out any newly allocated blocks to avoid exposing stale data. + * If BH_New is set, we know that the block was newly allocated in the + * above loop. + * FIXME: What about initialized_size increments? Have we done all the + * required zeroing above? If not this error handling is broken, and + * in particular the if (block_end <= from) check is completely bogus. + */ + bh = head; + block_start = 0; + is_retry = FALSE; + do { + block_end = block_start + blocksize; + if (block_end <= from) + continue; + if (block_start >= to) + break; + if (buffer_new(bh)) { + void *kaddr; + + clear_buffer_new(bh); + if (buffer_uptodate(bh)) + buffer_error(); + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr + block_start, 0, bh->b_size); + kunmap_atomic(kaddr, KM_USER0); + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + is_retry = TRUE; + } + } while (block_start = block_end, (bh = bh->b_this_page) != head); + if (is_retry) + flush_dcache_page(page); + if (rl) + up_read(&ni->run_list.lock); + return err; +} + +/** + * ntfs_prepare_write - prepare a page for receiving data + * + * This is called from generic_file_write() with i_sem held on the inode + * (@page->mapping->host). The @page is locked and kmap()ped so page_address() + * can simply be used. The source data has not yet been copied into the @page. + * + * Need to extend the attribute/fill in holes if necessary, create blocks and + * make partially overwritten blocks uptodate, + * + * i_size is not to be modified yet. + * + * Return 0 on success or -errno on error. + * + * Should be using block_prepare_write() [support for sparse files] or + * cont_prepare_write() [no support for sparse files]. Can't do that due to + * ntfs specifics but can look at them for implementation guidancea. + * + * Note: In the range, @from is inclusive and @to is exclusive, i.e. @from is + * the first byte in the page that will be written to and @to is the first byte + * after the last byte that will be written to. + */ +static int ntfs_prepare_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + struct inode *vi = page->mapping->host; + ntfs_inode *ni = NTFS_I(vi); + + ntfs_debug("Entering for inode %li, attribute type 0x%x, page index " + "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type, + page->index, from, to); + + BUG_ON(!PageLocked(page)); + BUG_ON(from > PAGE_CACHE_SIZE); + BUG_ON(to > PAGE_CACHE_SIZE); + BUG_ON(from > to); + + if (NInoNonResident(ni)) { + /* + * Only unnamed $DATA attributes can be compressed, encrypted, + * and/or sparse. + */ + if (ni->type == AT_DATA && !ni->name_len) { + /* If file is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + ntfs_debug("Denying write access to encrypted " + "file."); + return -EACCES; + } + /* Compressed data streams are handled in compress.c. */ + if (NInoCompressed(ni)) { + // TODO: Implement and replace this check with + // return ntfs_write_compressed_block(page); + ntfs_error(vi->i_sb, "Writing to compressed " + "files is not supported yet. " + "Sorry."); + return -EOPNOTSUPP; + } + // TODO: Implement and remove this check. + if (NInoSparse(ni)) { + ntfs_error(vi->i_sb, "Writing to sparse files " + "is not supported yet. Sorry."); + return -EOPNOTSUPP; + } + } + + // TODO: Implement and remove this check. + if (NInoMstProtected(ni)) { + ntfs_error(vi->i_sb, "Writing to MST protected " + "attributes is not supported yet. " + "Sorry."); + return -EOPNOTSUPP; + } + + /* Normal data stream. */ + return ntfs_prepare_nonresident_write(page, from, to); + } + + /* + * Attribute is resident, implying it is not compressed, encrypted, or + * mst protected. + */ + BUG_ON(page_has_buffers(page)); + + /* Do we need to resize the attribute? */ + if (((s64)page->index << PAGE_CACHE_SHIFT) + to > vi->i_size) { + // TODO: Implement resize... + ntfs_error(vi->i_sb, "Writing beyond the existing file size is " + "not supported yet. Sorry."); + return -EOPNOTSUPP; + } + + /* + * Because resident attributes are handled by memcpy() to/from the + * corresponding MFT record, and because this form of i/o is byte + * aligned rather than block aligned, there is no need to bring the + * page uptodate here as in the non-resident case where we need to + * bring the buffers straddled by the write uptodate before + * generic_file_write() does the copying from userspace. + * + * We thus defer the uptodate bringing of the page region outside the + * region written to to ntfs_commit_write(). The reason for doing this + * is that we save one round of: + * map_mft_record(), get_attr_search_ctx(), lookup_attr(), + * kmap_atomic(), kunmap_atomic(), put_attr_search_ctx(), + * unmap_mft_record(). + * Which is obviously a very worthwhile save. + * + * Thus we just return success now... + */ + ntfs_debug("Done."); + return 0; +} + +/* + * NOTES: There is a disparity between the apparent need to extend the + * attribute in prepare write but to update i_size only in commit write. + * Need to make sure i_sem protection is sufficient. And if not will need to + * handle this in some way or another. + */ + +/** + * ntfs_commit_nonresident_write - + * + */ +static int ntfs_commit_nonresident_write(struct page *page, + unsigned from, unsigned to) +{ + s64 pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to; + struct inode *vi; + struct buffer_head *bh, *head; + unsigned int block_start, block_end, blocksize; + BOOL partial; + + vi = page->mapping->host; + + ntfs_debug("Entering for inode %li, attribute type 0x%x, page index " + "0x%lx, from = %u, to = %u.", vi->i_ino, + NTFS_I(vi)->type, page->index, from, to); + + blocksize = 1 << vi->i_blkbits; + + // FIXME: We need a whole slew of special cases in here for MST + // protected attributes for example. For compressed files, too... + // For now, we know ntfs_prepare_write() would have failed so we can't + // get here in any of the cases which we have to special case, so we + // are just a ripped off unrolled generic_commit_write() at present. + + bh = head = page_buffers(page); + block_start = 0; + partial = FALSE; + do { + block_end = block_start + blocksize; + if (block_end <= from || block_start >= to) { + if (!buffer_uptodate(bh)) + partial = TRUE; + } else { + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + } + } while (block_start = block_end, (bh = bh->b_this_page) != head); + + /* + * If this is a partial write which happened to make all buffers + * uptodate then we can optimize away a bogus ->readpage() for the next + * read(). Here we 'discover' whether the page went uptodate as a + * result of this (potentially partial) write. + */ + if (!partial) + SetPageUptodate(page); + + /* + * Not convinced about this at all. See disparity comment above. For + * now we know ntfs_prepare_write() would have failed in the write + * exceeds i_size case, so this will never trigger which is fine. + */ + if (pos > vi->i_size) { + ntfs_error(vi->i_sb, "Writing beyond the existing file size is " + "not supported yet. Sorry."); + // vi->i_size = pos; + // mark_inode_dirty(vi); + } + ntfs_debug("Done."); + return 0; +} + +/** + * ntfs_commit_write - commit the received data + * + * This is called from generic_file_write() with i_sem held on the inode + * (@page->mapping->host). The @page is locked and kmap()ped so page_address() + * can simply be used. The source data has already been copied into the @page. + * + * Need to mark modified blocks dirty so they get written out later when + * ntfs_writepage() is invoked by the VM. + * + * Return 0 on success or -errno on error. + * + * Should be using generic_commit_write(). This marks buffers uptodate and + * dirty, sets the page uptodate if all buffers in the page are uptodate, and + * updates i_size if the end of io is beyond i_size. In that case, it also + * marks the inode dirty. - We could still use this (obviously except for + * NInoMstProtected() attributes, where we will need to duplicate the core code + * because we need our own async_io completion handler) but we could just do + * the i_size update in prepare write, when we resize the attribute. Then + * we would avoid the i_size update and mark_inode_dirty() happening here. + * + * Can't use generic_commit_write() due to ntfs specialities but can look at + * it for implementation guidance. + * + * If things have gone as outlined in ntfs_prepare_write(), then we do not + * need to do any page content modifications here at all, except in the write + * to resident attribute case, where we need to do the uptodate bringing here + * which we combine with the copying into the mft record which means we only + * need to map the mft record and find the attribute record in it only once. + */ +static int ntfs_commit_write(struct file *file, struct page *page, + unsigned from, unsigned to) +{ + s64 attr_pos; + struct inode *vi; + ntfs_inode *ni, *base_ni; + char *kaddr, *kattr; + attr_search_context *ctx; + MFT_RECORD *m; + u32 attr_len, bytes; + int err; + + vi = page->mapping->host; + ni = NTFS_I(vi); + + ntfs_debug("Entering for inode %li, attribute type 0x%x, page index " + "0x%lx, from = %u, to = %u.", vi->i_ino, ni->type, + page->index, from, to); + + if (NInoNonResident(ni)) { + /* + * Only unnamed $DATA attributes can be compressed, encrypted, + * and/or sparse. + */ + if (ni->type == AT_DATA && !ni->name_len) { + /* If file is encrypted, deny access, just like NT4. */ + if (NInoEncrypted(ni)) { + // Should never get here! + ntfs_debug("Denying write access to encrypted " + "file."); + return -EACCES; + } + /* Compressed data streams are handled in compress.c. */ + if (NInoCompressed(ni)) { + // TODO: Implement and replace this check with + // return ntfs_write_compressed_block(page); + // Should never get here! + ntfs_error(vi->i_sb, "Writing to compressed " + "files is not supported yet. " + "Sorry."); + return -EOPNOTSUPP; + } + // TODO: Implement and remove this check. + if (NInoSparse(ni)) { + // Should never get here! + ntfs_error(vi->i_sb, "Writing to sparse files " + "is not supported yet. Sorry."); + return -EOPNOTSUPP; + } + } + + // TODO: Implement and remove this check. + if (NInoMstProtected(ni)) { + // Should never get here! + ntfs_error(vi->i_sb, "Writing to MST protected " + "attributes is not supported yet. " + "Sorry."); + return -EOPNOTSUPP; + } + + /* Normal data stream. */ + return ntfs_commit_nonresident_write(page, from, to); + } + + /* + * Attribute is resident, implying it is not compressed, encrypted, or + * mst protected. + */ + + /* Do we need to resize the attribute? */ + if (((s64)page->index << PAGE_CACHE_SHIFT) + to > vi->i_size) { + // TODO: Implement resize... + // pos = ((s64)page->index << PAGE_CACHE_SHIFT) + to; + // vi->i_size = pos; + // mark_inode_dirty(vi); + // Should never get here! + ntfs_error(vi->i_sb, "Writing beyond the existing file size is " + "not supported yet. Sorry."); + return -EOPNOTSUPP; + } + + if (!NInoAttr(ni)) + base_ni = ni; + else + base_ni = ni->ext.base_ntfs_ino; + + /* Map, pin, and lock the mft record. */ + m = map_mft_record(base_ni); + if (unlikely(IS_ERR(m))) { + err = PTR_ERR(m); + m = NULL; + ctx = NULL; + goto err_out; + } + ctx = get_attr_search_ctx(base_ni, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto err_out; + } + if (unlikely(!lookup_attr(ni->type, ni->name, ni->name_len, + IGNORE_CASE, 0, NULL, 0, ctx))) { + err = -ENOENT; + goto err_out; + } + + /* Starting position of the page within the attribute value. */ + attr_pos = page->index << PAGE_CACHE_SHIFT; + + /* The total length of the attribute value. */ + attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); + + if (unlikely(vi->i_size != attr_len)) { + ntfs_error(vi->i_sb, "BUG()! i_size (0x%Lx) doesn't match " + "attr_len (0x%x). Aborting write.", vi->i_size, + attr_len); + err = -EIO; + goto err_out; + } + if (unlikely(attr_pos >= attr_len)) { + ntfs_error(vi->i_sb, "BUG()! attr_pos (0x%Lx) > attr_len (0x%x)" + ". Aborting write.", attr_pos, attr_len); + err = -EIO; + goto err_out; + } + + bytes = attr_len - attr_pos; + if (unlikely(bytes > PAGE_CACHE_SIZE)) + bytes = PAGE_CACHE_SIZE; + + /* + * Calculate the address of the attribute value corresponding to the + * beginning of the current data @page. + */ + kattr = (u8*)ctx->attr + le16_to_cpu( + ctx->attr->data.resident.value_offset) + attr_pos; + + kaddr = kmap_atomic(page, KM_USER0); + + /* Copy the received data from the page to the mft record. */ + memcpy(kattr + from, kaddr + from, to - from); + flush_dcache_mft_record_page(ctx->ntfs_ino); + + if (!PageUptodate(page)) { + /* + * Bring the out of bounds area(s) uptodate by copying data + * from the mft record to the page. + */ + if (from > 0) + memcpy(kaddr, kattr, from); + if (to < bytes) + memcpy(kaddr + to, kattr + to, bytes - to); + + /* Zero the region outside the end of the attribute value. */ + if (likely(bytes < PAGE_CACHE_SIZE)) + memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes); + + /* + * The probability of not having done any of the above is + * extremely small, so we just flush unconditionally. + */ + flush_dcache_page(page); + SetPageUptodate(page); + } + kunmap_atomic(kaddr, KM_USER0); + + // TODO: Mark mft record dirty so it gets written back. + ntfs_error(vi->i_sb, "Writing to resident files is not supported yet. " + "Wrote to memory only..."); + + put_attr_search_ctx(ctx); + unmap_mft_record(base_ni); + ntfs_debug("Done."); + return 0; +err_out: + if (err == -ENOMEM) { + ntfs_warning(vi->i_sb, "Error allocating memory required to " + "commit the write."); + if (PageUptodate(page)) { + ntfs_warning(vi->i_sb, "Page is uptodate, setting " + "dirty so the write will be retried " + "later on by the VM."); + /* + * Put the page on mapping->dirty_pages, but leave its + * buffer's dirty state as-is. + */ + __set_page_dirty_nobuffers(page); + err = 0; + } else + ntfs_error(vi->i_sb, "Page is not uptodate. Written " + "data has been lost. )-:"); + } else { + ntfs_error(vi->i_sb, "Resident attribute write failed with " + "error %i. Setting page error flag.", -err); + SetPageError(page); + } + if (ctx) + put_attr_search_ctx(ctx); + if (m) + unmap_mft_record(base_ni); + return err; +} + +#endif /* NTFS_RW */ + +/** + * ntfs_aops - general address space operations for inodes and attributes + */ +struct address_space_operations ntfs_aops = { + .readpage = ntfs_readpage, /* Fill page with data. */ + .sync_page = block_sync_page, /* Currently, just unplugs the + disk request queue. */ +#ifdef NTFS_RW + .writepage = ntfs_writepage, /* Write dirty page to disk. */ + .prepare_write = ntfs_prepare_write, /* Prepare page and buffers + ready to receive data. */ + .commit_write = ntfs_commit_write, /* Commit received data. */ +#endif +}; + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/attrib.c b/reactos/drivers/fs/ntfs/linux-ntfs/attrib.c new file mode 100644 index 00000000000..476bcf512bd --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/attrib.c @@ -0,0 +1,1721 @@ +/** + * attrib.c - NTFS attribute operations. Part of the Linux-NTFS project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include "ntfs.h" +#include "dir.h" + +/* Temporary helper functions -- might become macros */ + +/** + * ntfs_rl_mm - run_list memmove + * + * It is up to the caller to serialize access to the run list @base. + */ +static inline void ntfs_rl_mm(run_list_element *base, int dst, int src, + int size) +{ + if (likely((dst != src) && (size > 0))) + memmove(base + dst, base + src, size * sizeof (*base)); +} + +/** + * ntfs_rl_mc - run_list memory copy + * + * It is up to the caller to serialize access to the run lists @dstbase and + * @srcbase. + */ +static inline void ntfs_rl_mc(run_list_element *dstbase, int dst, + run_list_element *srcbase, int src, int size) +{ + if (likely(size > 0)) + memcpy(dstbase + dst, srcbase + src, size * sizeof(*dstbase)); +} + +/** + * ntfs_rl_realloc - Reallocate memory for run_lists + * @rl: original run list + * @old_size: number of run list elements in the original run list @rl + * @new_size: number of run list elements we need space for + * + * As the run_lists grow, more memory will be required. To prevent the + * kernel having to allocate and reallocate large numbers of small bits of + * memory, this function returns and entire page of memory. + * + * It is up to the caller to serialize access to the run list @rl. + * + * N.B. If the new allocation doesn't require a different number of pages in + * memory, the function will return the original pointer. + * + * On success, return a pointer to the newly allocated, or recycled, memory. + * On error, return -errno. The following error codes are defined: + * -ENOMEM - Not enough memory to allocate run list array. + * -EINVAL - Invalid parameters were passed in. + */ +static inline run_list_element *ntfs_rl_realloc(run_list_element *rl, + int old_size, int new_size) +{ + run_list_element *new_rl; + + old_size = PAGE_ALIGN(old_size * sizeof(*rl)); + new_size = PAGE_ALIGN(new_size * sizeof(*rl)); + if (old_size == new_size) + return rl; + + new_rl = ntfs_malloc_nofs(new_size); + if (unlikely(!new_rl)) + return ERR_PTR(-ENOMEM); + + if (likely(rl != NULL)) { + if (unlikely(old_size > new_size)) + old_size = new_size; + memcpy(new_rl, rl, old_size); + ntfs_free(rl); + } + return new_rl; +} + +/** + * ntfs_are_rl_mergeable - test if two run lists can be joined together + * @dst: original run list + * @src: new run list to test for mergeability with @dst + * + * Test if two run lists can be joined together. For this, their VCNs and LCNs + * must be adjacent. + * + * It is up to the caller to serialize access to the run lists @dst and @src. + * + * Return: TRUE Success, the run lists can be merged. + * FALSE Failure, the run lists cannot be merged. + */ +static inline BOOL ntfs_are_rl_mergeable(run_list_element *dst, + run_list_element *src) +{ + BUG_ON(!dst); + BUG_ON(!src); + + if ((dst->lcn < 0) || (src->lcn < 0)) /* Are we merging holes? */ + return FALSE; + if ((dst->lcn + dst->length) != src->lcn) /* Are the runs contiguous? */ + return FALSE; + if ((dst->vcn + dst->length) != src->vcn) /* Are the runs misaligned? */ + return FALSE; + + return TRUE; +} + +/** + * __ntfs_rl_merge - merge two run lists without testing if they can be merged + * @dst: original, destination run list + * @src: new run list to merge with @dst + * + * Merge the two run lists, writing into the destination run list @dst. The + * caller must make sure the run lists can be merged or this will corrupt the + * destination run list. + * + * It is up to the caller to serialize access to the run lists @dst and @src. + */ +static inline void __ntfs_rl_merge(run_list_element *dst, run_list_element *src) +{ + dst->length += src->length; +} + +/** + * ntfs_rl_merge - test if two run lists can be joined together and merge them + * @dst: original, destination run list + * @src: new run list to merge with @dst + * + * Test if two run lists can be joined together. For this, their VCNs and LCNs + * must be adjacent. If they can be merged, perform the merge, writing into + * the destination run list @dst. + * + * It is up to the caller to serialize access to the run lists @dst and @src. + * + * Return: TRUE Success, the run lists have been merged. + * FALSE Failure, the run lists cannot be merged and have not been + * modified. + */ +static inline BOOL ntfs_rl_merge(run_list_element *dst, run_list_element *src) +{ + BOOL merge = ntfs_are_rl_mergeable(dst, src); + + if (merge) + __ntfs_rl_merge(dst, src); + return merge; +} + +/** + * ntfs_rl_append - append a run list after a given element + * @dst: original run list to be worked on + * @dsize: number of elements in @dst (including end marker) + * @src: run list to be inserted into @dst + * @ssize: number of elements in @src (excluding end marker) + * @loc: append the new run list @src after this element in @dst + * + * Append the run list @src after element @loc in @dst. Merge the right end of + * the new run list, if necessary. Adjust the size of the hole before the + * appended run list. + * + * It is up to the caller to serialize access to the run lists @dst and @src. + * + * On success, return a pointer to the new, combined, run list. Note, both + * run lists @dst and @src are deallocated before returning so you cannot use + * the pointers for anything any more. (Strictly speaking the returned run list + * may be the same as @dst but this is irrelevant.) + * + * On error, return -errno. Both run lists are left unmodified. The following + * error codes are defined: + * -ENOMEM - Not enough memory to allocate run list array. + * -EINVAL - Invalid parameters were passed in. + */ +static inline run_list_element *ntfs_rl_append(run_list_element *dst, + int dsize, run_list_element *src, int ssize, int loc) +{ + BOOL right; + int magic; + + BUG_ON(!dst); + BUG_ON(!src); + + /* First, check if the right hand end needs merging. */ + right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1); + + /* Space required: @dst size + @src size, less one if we merged. */ + dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - right); + if (IS_ERR(dst)) + return dst; + /* + * We are guaranteed to succeed from here so can start modifying the + * original run lists. + */ + + /* First, merge the right hand end, if necessary. */ + if (right) + __ntfs_rl_merge(src + ssize - 1, dst + loc + 1); + + magic = loc + ssize; + + /* Move the tail of @dst out of the way, then copy in @src. */ + ntfs_rl_mm(dst, magic + 1, loc + 1 + right, dsize - loc - 1 - right); + ntfs_rl_mc(dst, loc + 1, src, 0, ssize); + + /* Adjust the size of the preceding hole. */ + dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn; + + /* We may have changed the length of the file, so fix the end marker */ + if (dst[magic + 1].lcn == LCN_ENOENT) + dst[magic + 1].vcn = dst[magic].vcn + dst[magic].length; + + return dst; +} + +/** + * ntfs_rl_insert - insert a run list into another + * @dst: original run list to be worked on + * @dsize: number of elements in @dst (including end marker) + * @src: new run list to be inserted + * @ssize: number of elements in @src (excluding end marker) + * @loc: insert the new run list @src before this element in @dst + * + * Insert the run list @src before element @loc in the run list @dst. Merge the + * left end of the new run list, if necessary. Adjust the size of the hole + * after the inserted run list. + * + * It is up to the caller to serialize access to the run lists @dst and @src. + * + * On success, return a pointer to the new, combined, run list. Note, both + * run lists @dst and @src are deallocated before returning so you cannot use + * the pointers for anything any more. (Strictly speaking the returned run list + * may be the same as @dst but this is irrelevant.) + * + * On error, return -errno. Both run lists are left unmodified. The following + * error codes are defined: + * -ENOMEM - Not enough memory to allocate run list array. + * -EINVAL - Invalid parameters were passed in. + */ +static inline run_list_element *ntfs_rl_insert(run_list_element *dst, + int dsize, run_list_element *src, int ssize, int loc) +{ + BOOL left = FALSE; + BOOL disc = FALSE; /* Discontinuity */ + BOOL hole = FALSE; /* Following a hole */ + int magic; + + BUG_ON(!dst); + BUG_ON(!src); + + /* disc => Discontinuity between the end of @dst and the start of @src. + * This means we might need to insert a hole. + * hole => @dst ends with a hole or an unmapped region which we can + * extend to match the discontinuity. */ + if (loc == 0) + disc = (src[0].vcn > 0); + else { + s64 merged_length; + + left = ntfs_are_rl_mergeable(dst + loc - 1, src); + + merged_length = dst[loc - 1].length; + if (left) + merged_length += src->length; + + disc = (src[0].vcn > dst[loc - 1].vcn + merged_length); + if (disc) + hole = (dst[loc - 1].lcn == LCN_HOLE); + } + + /* Space required: @dst size + @src size, less one if we merged, plus + * one if there was a discontinuity, less one for a trailing hole. */ + dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc - hole); + if (IS_ERR(dst)) + return dst; + /* + * We are guaranteed to succeed from here so can start modifying the + * original run list. + */ + + if (left) + __ntfs_rl_merge(dst + loc - 1, src); + + magic = loc + ssize - left + disc - hole; + + /* Move the tail of @dst out of the way, then copy in @src. */ + ntfs_rl_mm(dst, magic, loc, dsize - loc); + ntfs_rl_mc(dst, loc + disc - hole, src, left, ssize - left); + + /* Adjust the VCN of the last run ... */ + if (dst[magic].lcn <= LCN_HOLE) + dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length; + /* ... and the length. */ + if (dst[magic].lcn == LCN_HOLE || dst[magic].lcn == LCN_RL_NOT_MAPPED) + dst[magic].length = dst[magic + 1].vcn - dst[magic].vcn; + + /* Writing beyond the end of the file and there's a discontinuity. */ + if (disc) { + if (hole) + dst[loc - 1].length = dst[loc].vcn - dst[loc - 1].vcn; + else { + if (loc > 0) { + dst[loc].vcn = dst[loc - 1].vcn + + dst[loc - 1].length; + dst[loc].length = dst[loc + 1].vcn - + dst[loc].vcn; + } else { + dst[loc].vcn = 0; + dst[loc].length = dst[loc + 1].vcn; + } + dst[loc].lcn = LCN_RL_NOT_MAPPED; + } + + magic += hole; + + if (dst[magic].lcn == LCN_ENOENT) + dst[magic].vcn = dst[magic - 1].vcn + + dst[magic - 1].length; + } + return dst; +} + +/** + * ntfs_rl_replace - overwrite a run_list element with another run list + * @dst: original run list to be worked on + * @dsize: number of elements in @dst (including end marker) + * @src: new run list to be inserted + * @ssize: number of elements in @src (excluding end marker) + * @loc: index in run list @dst to overwrite with @src + * + * Replace the run list element @dst at @loc with @src. Merge the left and + * right ends of the inserted run list, if necessary. + * + * It is up to the caller to serialize access to the run lists @dst and @src. + * + * On success, return a pointer to the new, combined, run list. Note, both + * run lists @dst and @src are deallocated before returning so you cannot use + * the pointers for anything any more. (Strictly speaking the returned run list + * may be the same as @dst but this is irrelevant.) + * + * On error, return -errno. Both run lists are left unmodified. The following + * error codes are defined: + * -ENOMEM - Not enough memory to allocate run list array. + * -EINVAL - Invalid parameters were passed in. + */ +static inline run_list_element *ntfs_rl_replace(run_list_element *dst, + int dsize, run_list_element *src, int ssize, int loc) +{ + BOOL left = FALSE; + BOOL right; + int magic; + + BUG_ON(!dst); + BUG_ON(!src); + + /* First, merge the left and right ends, if necessary. */ + right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1); + if (loc > 0) + left = ntfs_are_rl_mergeable(dst + loc - 1, src); + + /* Allocate some space. We'll need less if the left, right, or both + * ends were merged. */ + dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left - right); + if (IS_ERR(dst)) + return dst; + /* + * We are guaranteed to succeed from here so can start modifying the + * original run lists. + */ + if (right) + __ntfs_rl_merge(src + ssize - 1, dst + loc + 1); + if (left) + __ntfs_rl_merge(dst + loc - 1, src); + + /* FIXME: What does this mean? (AIA) */ + magic = loc + ssize - left; + + /* Move the tail of @dst out of the way, then copy in @src. */ + ntfs_rl_mm(dst, magic, loc + right + 1, dsize - loc - right - 1); + ntfs_rl_mc(dst, loc, src, left, ssize - left); + + /* We may have changed the length of the file, so fix the end marker */ + if (dst[magic].lcn == LCN_ENOENT) + dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length; + return dst; +} + +/** + * ntfs_rl_split - insert a run list into the centre of a hole + * @dst: original run list to be worked on + * @dsize: number of elements in @dst (including end marker) + * @src: new run list to be inserted + * @ssize: number of elements in @src (excluding end marker) + * @loc: index in run list @dst at which to split and insert @src + * + * Split the run list @dst at @loc into two and insert @new in between the two + * fragments. No merging of run lists is necessary. Adjust the size of the + * holes either side. + * + * It is up to the caller to serialize access to the run lists @dst and @src. + * + * On success, return a pointer to the new, combined, run list. Note, both + * run lists @dst and @src are deallocated before returning so you cannot use + * the pointers for anything any more. (Strictly speaking the returned run list + * may be the same as @dst but this is irrelevant.) + * + * On error, return -errno. Both run lists are left unmodified. The following + * error codes are defined: + * -ENOMEM - Not enough memory to allocate run list array. + * -EINVAL - Invalid parameters were passed in. + */ +static inline run_list_element *ntfs_rl_split(run_list_element *dst, int dsize, + run_list_element *src, int ssize, int loc) +{ + BUG_ON(!dst); + BUG_ON(!src); + + /* Space required: @dst size + @src size + one new hole. */ + dst = ntfs_rl_realloc(dst, dsize, dsize + ssize + 1); + if (IS_ERR(dst)) + return dst; + /* + * We are guaranteed to succeed from here so can start modifying the + * original run lists. + */ + + /* Move the tail of @dst out of the way, then copy in @src. */ + ntfs_rl_mm(dst, loc + 1 + ssize, loc, dsize - loc); + ntfs_rl_mc(dst, loc + 1, src, 0, ssize); + + /* Adjust the size of the holes either size of @src. */ + dst[loc].length = dst[loc+1].vcn - dst[loc].vcn; + dst[loc+ssize+1].vcn = dst[loc+ssize].vcn + dst[loc+ssize].length; + dst[loc+ssize+1].length = dst[loc+ssize+2].vcn - dst[loc+ssize+1].vcn; + + return dst; +} + +/** + * ntfs_merge_run_lists - merge two run_lists into one + * @drl: original run list to be worked on + * @srl: new run list to be merged into @drl + * + * First we sanity check the two run lists @srl and @drl to make sure that they + * are sensible and can be merged. The run list @srl must be either after the + * run list @drl or completely within a hole (or unmapped region) in @drl. + * + * It is up to the caller to serialize access to the run lists @drl and @srl. + * + * Merging of run lists is necessary in two cases: + * 1. When attribute lists are used and a further extent is being mapped. + * 2. When new clusters are allocated to fill a hole or extend a file. + * + * There are four possible ways @srl can be merged. It can: + * - be inserted at the beginning of a hole, + * - split the hole in two and be inserted between the two fragments, + * - be appended at the end of a hole, or it can + * - replace the whole hole. + * It can also be appended to the end of the run list, which is just a variant + * of the insert case. + * + * On success, return a pointer to the new, combined, run list. Note, both + * run lists @drl and @srl are deallocated before returning so you cannot use + * the pointers for anything any more. (Strictly speaking the returned run list + * may be the same as @dst but this is irrelevant.) + * + * On error, return -errno. Both run lists are left unmodified. The following + * error codes are defined: + * -ENOMEM - Not enough memory to allocate run list array. + * -EINVAL - Invalid parameters were passed in. + * -ERANGE - The run lists overlap and cannot be merged. + */ +run_list_element *ntfs_merge_run_lists(run_list_element *drl, + run_list_element *srl) +{ + int di, si; /* Current index into @[ds]rl. */ + int sstart; /* First index with lcn > LCN_RL_NOT_MAPPED. */ + int dins; /* Index into @drl at which to insert @srl. */ + int dend, send; /* Last index into @[ds]rl. */ + int dfinal, sfinal; /* The last index into @[ds]rl with + lcn >= LCN_HOLE. */ + int marker = 0; + VCN marker_vcn = 0; + +#ifdef DEBUG + ntfs_debug("dst:"); + ntfs_debug_dump_runlist(drl); + ntfs_debug("src:"); + ntfs_debug_dump_runlist(srl); +#endif + + /* Check for silly calling... */ + if (unlikely(!srl)) + return drl; + if (unlikely(IS_ERR(srl) || IS_ERR(drl))) + return ERR_PTR(-EINVAL); + + /* Check for the case where the first mapping is being done now. */ + if (unlikely(!drl)) { + drl = srl; + /* Complete the source run list if necessary. */ + if (unlikely(drl[0].vcn)) { + /* Scan to the end of the source run list. */ + for (dend = 0; likely(drl[dend].length); dend++) + ; + drl = ntfs_rl_realloc(drl, dend, dend + 1); + if (IS_ERR(drl)) + return drl; + /* Insert start element at the front of the run list. */ + ntfs_rl_mm(drl, 1, 0, dend); + drl[0].vcn = 0; + drl[0].lcn = LCN_RL_NOT_MAPPED; + drl[0].length = drl[1].vcn; + } + goto finished; + } + + si = di = 0; + + /* Skip any unmapped start element(s) in the source run_list. */ + while (srl[si].length && srl[si].lcn < (LCN)LCN_HOLE) + si++; + + /* Can't have an entirely unmapped source run list. */ + BUG_ON(!srl[si].length); + + /* Record the starting points. */ + sstart = si; + + /* + * Skip forward in @drl until we reach the position where @srl needs to + * be inserted. If we reach the end of @drl, @srl just needs to be + * appended to @drl. + */ + for (; drl[di].length; di++) { + if (drl[di].vcn + drl[di].length > srl[sstart].vcn) + break; + } + dins = di; + + /* Sanity check for illegal overlaps. */ + if ((drl[di].vcn == srl[si].vcn) && (drl[di].lcn >= 0) && + (srl[si].lcn >= 0)) { + ntfs_error(NULL, "Run lists overlap. Cannot merge!"); + return ERR_PTR(-ERANGE); + } + + /* Scan to the end of both run lists in order to know their sizes. */ + for (send = si; srl[send].length; send++) + ; + for (dend = di; drl[dend].length; dend++) + ; + + if (srl[send].lcn == (LCN)LCN_ENOENT) + marker_vcn = srl[marker = send].vcn; + + /* Scan to the last element with lcn >= LCN_HOLE. */ + for (sfinal = send; sfinal >= 0 && srl[sfinal].lcn < LCN_HOLE; sfinal--) + ; + for (dfinal = dend; dfinal >= 0 && drl[dfinal].lcn < LCN_HOLE; dfinal--) + ; + + { + BOOL start; + BOOL finish; + int ds = dend + 1; /* Number of elements in drl & srl */ + int ss = sfinal - sstart + 1; + + start = ((drl[dins].lcn < LCN_RL_NOT_MAPPED) || /* End of file */ + (drl[dins].vcn == srl[sstart].vcn)); /* Start of hole */ + finish = ((drl[dins].lcn >= LCN_RL_NOT_MAPPED) && /* End of file */ + ((drl[dins].vcn + drl[dins].length) <= /* End of hole */ + (srl[send - 1].vcn + srl[send - 1].length))); + + /* Or we'll lose an end marker */ + if (start && finish && (drl[dins].length == 0)) + ss++; + if (marker && (drl[dins].vcn + drl[dins].length > srl[send - 1].vcn)) + finish = FALSE; +#if 0 + ntfs_debug("dfinal = %i, dend = %i", dfinal, dend); + ntfs_debug("sstart = %i, sfinal = %i, send = %i", sstart, sfinal, send); + ntfs_debug("start = %i, finish = %i", start, finish); + ntfs_debug("ds = %i, ss = %i, dins = %i", ds, ss, dins); +#endif + if (start) { + if (finish) + drl = ntfs_rl_replace(drl, ds, srl + sstart, ss, dins); + else + drl = ntfs_rl_insert(drl, ds, srl + sstart, ss, dins); + } else { + if (finish) + drl = ntfs_rl_append(drl, ds, srl + sstart, ss, dins); + else + drl = ntfs_rl_split(drl, ds, srl + sstart, ss, dins); + } + if (IS_ERR(drl)) { + ntfs_error(NULL, "Merge failed."); + return drl; + } + ntfs_free(srl); + if (marker) { + ntfs_debug("Triggering marker code."); + for (ds = dend; drl[ds].length; ds++) + ; + /* We only need to care if @srl ended after @drl. */ + if (drl[ds].vcn <= marker_vcn) { + int slots = 0; + + if (drl[ds].vcn == marker_vcn) { + ntfs_debug("Old marker = 0x%Lx, replacing with " + "LCN_ENOENT.\n", + (unsigned long long) + drl[ds].lcn); + drl[ds].lcn = (LCN)LCN_ENOENT; + goto finished; + } + /* + * We need to create an unmapped run list element in + * @drl or extend an existing one before adding the + * ENOENT terminator. + */ + if (drl[ds].lcn == (LCN)LCN_ENOENT) { + ds--; + slots = 1; + } + if (drl[ds].lcn != (LCN)LCN_RL_NOT_MAPPED) { + /* Add an unmapped run list element. */ + if (!slots) { + /* FIXME/TODO: We need to have the + * extra memory already! (AIA) */ + drl = ntfs_rl_realloc(drl, ds, ds + 2); + if (!drl) + goto critical_error; + slots = 2; + } + ds++; + /* Need to set vcn if it isn't set already. */ + if (slots != 1) + drl[ds].vcn = drl[ds - 1].vcn + + drl[ds - 1].length; + drl[ds].lcn = (LCN)LCN_RL_NOT_MAPPED; + /* We now used up a slot. */ + slots--; + } + drl[ds].length = marker_vcn - drl[ds].vcn; + /* Finally add the ENOENT terminator. */ + ds++; + if (!slots) { + /* FIXME/TODO: We need to have the extra + * memory already! (AIA) */ + drl = ntfs_rl_realloc(drl, ds, ds + 1); + if (!drl) + goto critical_error; + } + drl[ds].vcn = marker_vcn; + drl[ds].lcn = (LCN)LCN_ENOENT; + drl[ds].length = (s64)0; + } + } + } + +finished: + /* The merge was completed successfully. */ + ntfs_debug("Merged run list:"); + ntfs_debug_dump_runlist(drl); + return drl; + +critical_error: + /* Critical error! We cannot afford to fail here. */ + ntfs_error(NULL, "Critical error! Not enough memory."); + panic("NTFS: Cannot continue."); +} + +/** + * decompress_mapping_pairs - convert mapping pairs array to run list + * @vol: ntfs volume on which the attribute resides + * @attr: attribute record whose mapping pairs array to decompress + * @old_rl: optional run list in which to insert @attr's run list + * + * It is up to the caller to serialize access to the run list @old_rl. + * + * Decompress the attribute @attr's mapping pairs array into a run list. On + * success, return the decompressed run list. + * + * If @old_rl is not NULL, decompressed run list is inserted into the + * appropriate place in @old_rl and the resultant, combined run list is + * returned. The original @old_rl is deallocated. + * + * On error, return -errno. @old_rl is left unmodified in that case. + * + * The following error codes are defined: + * -ENOMEM - Not enough memory to allocate run list array. + * -EIO - Corrupt run list. + * -EINVAL - Invalid parameters were passed in. + * -ERANGE - The two run lists overlap. + * + * FIXME: For now we take the conceptionally simplest approach of creating the + * new run list disregarding the already existing one and then splicing the + * two into one, if that is possible (we check for overlap and discard the new + * run list if overlap present before returning ERR_PTR(-ERANGE)). + */ +run_list_element *decompress_mapping_pairs(const ntfs_volume *vol, + const ATTR_RECORD *attr, run_list_element *old_rl) +{ + VCN vcn; /* Current vcn. */ + LCN lcn; /* Current lcn. */ + s64 deltaxcn; /* Change in [vl]cn. */ + run_list_element *rl; /* The output run list. */ + u8 *buf; /* Current position in mapping pairs array. */ + u8 *attr_end; /* End of attribute. */ + int rlsize; /* Size of run list buffer. */ + u16 rlpos; /* Current run list position in units of + run_list_elements. */ + u8 b; /* Current byte offset in buf. */ + +#ifdef DEBUG + /* Make sure attr exists and is non-resident. */ + if (!attr || !attr->non_resident || sle64_to_cpu( + attr->data.non_resident.lowest_vcn) < (VCN)0) { + ntfs_error(vol->sb, "Invalid arguments."); + return ERR_PTR(-EINVAL); + } +#endif + /* Start at vcn = lowest_vcn and lcn 0. */ + vcn = sle64_to_cpu(attr->data.non_resident.lowest_vcn); + lcn = 0; + /* Get start of the mapping pairs array. */ + buf = (u8*)attr + le16_to_cpu( + attr->data.non_resident.mapping_pairs_offset); + attr_end = (u8*)attr + le32_to_cpu(attr->length); + if (unlikely(buf < (u8*)attr || buf > attr_end)) { + ntfs_error(vol->sb, "Corrupt attribute."); + return ERR_PTR(-EIO); + } + /* Current position in run list array. */ + rlpos = 0; + /* Allocate first page and set current run list size to one page. */ + rl = ntfs_malloc_nofs(rlsize = PAGE_SIZE); + if (unlikely(!rl)) + return ERR_PTR(-ENOMEM); + /* Insert unmapped starting element if necessary. */ + if (vcn) { + rl->vcn = (VCN)0; + rl->lcn = (LCN)LCN_RL_NOT_MAPPED; + rl->length = vcn; + rlpos++; + } + while (buf < attr_end && *buf) { + /* + * Allocate more memory if needed, including space for the + * not-mapped and terminator elements. ntfs_malloc_nofs() + * operates on whole pages only. + */ + if (((rlpos + 3) * sizeof(*old_rl)) > rlsize) { + run_list_element *rl2; + + rl2 = ntfs_malloc_nofs(rlsize + (int)PAGE_SIZE); + if (unlikely(!rl2)) { + ntfs_free(rl); + return ERR_PTR(-ENOMEM); + } + memcpy(rl2, rl, rlsize); + ntfs_free(rl); + rl = rl2; + rlsize += PAGE_SIZE; + } + /* Enter the current vcn into the current run_list element. */ + rl[rlpos].vcn = vcn; + /* + * Get the change in vcn, i.e. the run length in clusters. + * Doing it this way ensures that we signextend negative values. + * A negative run length doesn't make any sense, but hey, I + * didn't make up the NTFS specs and Windows NT4 treats the run + * length as a signed value so that's how it is... + */ + b = *buf & 0xf; + if (b) { + if (unlikely(buf + b > attr_end)) + goto io_error; + for (deltaxcn = (s8)buf[b--]; b; b--) + deltaxcn = (deltaxcn << 8) + buf[b]; + } else { /* The length entry is compulsory. */ + ntfs_error(vol->sb, "Missing length entry in mapping " + "pairs array."); + deltaxcn = (s64)-1; + } + /* + * Assume a negative length to indicate data corruption and + * hence clean-up and return NULL. + */ + if (unlikely(deltaxcn < 0)) { + ntfs_error(vol->sb, "Invalid length in mapping pairs " + "array."); + goto err_out; + } + /* + * Enter the current run length into the current run list + * element. + */ + rl[rlpos].length = deltaxcn; + /* Increment the current vcn by the current run length. */ + vcn += deltaxcn; + /* + * There might be no lcn change at all, as is the case for + * sparse clusters on NTFS 3.0+, in which case we set the lcn + * to LCN_HOLE. + */ + if (!(*buf & 0xf0)) + rl[rlpos].lcn = (LCN)LCN_HOLE; + else { + /* Get the lcn change which really can be negative. */ + u8 b2 = *buf & 0xf; + b = b2 + ((*buf >> 4) & 0xf); + if (buf + b > attr_end) + goto io_error; + for (deltaxcn = (s8)buf[b--]; b > b2; b--) + deltaxcn = (deltaxcn << 8) + buf[b]; + /* Change the current lcn to its new value. */ + lcn += deltaxcn; +#ifdef DEBUG + /* + * On NTFS 1.2-, apparently can have lcn == -1 to + * indicate a hole. But we haven't verified ourselves + * whether it is really the lcn or the deltaxcn that is + * -1. So if either is found give us a message so we + * can investigate it further! + */ + if (vol->major_ver < 3) { + if (unlikely(deltaxcn == (LCN)-1)) + ntfs_error(vol->sb, "lcn delta == -1"); + if (unlikely(lcn == (LCN)-1)) + ntfs_error(vol->sb, "lcn == -1"); + } +#endif + /* Check lcn is not below -1. */ + if (unlikely(lcn < (LCN)-1)) { + ntfs_error(vol->sb, "Invalid LCN < -1 in " + "mapping pairs array."); + goto err_out; + } + /* Enter the current lcn into the run_list element. */ + rl[rlpos].lcn = lcn; + } + /* Get to the next run_list element. */ + rlpos++; + /* Increment the buffer position to the next mapping pair. */ + buf += (*buf & 0xf) + ((*buf >> 4) & 0xf) + 1; + } + if (unlikely(buf >= attr_end)) + goto io_error; + /* + * If there is a highest_vcn specified, it must be equal to the final + * vcn in the run list - 1, or something has gone badly wrong. + */ + deltaxcn = sle64_to_cpu(attr->data.non_resident.highest_vcn); + if (unlikely(deltaxcn && vcn - 1 != deltaxcn)) { +mpa_err: + ntfs_error(vol->sb, "Corrupt mapping pairs array in " + "non-resident attribute."); + goto err_out; + } + /* Setup not mapped run list element if this is the base extent. */ + if (!attr->data.non_resident.lowest_vcn) { + VCN max_cluster; + + max_cluster = (sle64_to_cpu( + attr->data.non_resident.allocated_size) + + vol->cluster_size - 1) >> + vol->cluster_size_bits; + /* + * If there is a difference between the highest_vcn and the + * highest cluster, the run list is either corrupt or, more + * likely, there are more extents following this one. + */ + if (deltaxcn < --max_cluster) { + ntfs_debug("More extents to follow; deltaxcn = 0x%Lx, " + "max_cluster = 0x%Lx", + (long long)deltaxcn, + (long long)max_cluster); + rl[rlpos].vcn = vcn; + vcn += rl[rlpos].length = max_cluster - deltaxcn; + rl[rlpos].lcn = (LCN)LCN_RL_NOT_MAPPED; + rlpos++; + } else if (unlikely(deltaxcn > max_cluster)) { + ntfs_error(vol->sb, "Corrupt attribute. deltaxcn = " + "0x%Lx, max_cluster = 0x%Lx", + (long long)deltaxcn, + (long long)max_cluster); + goto mpa_err; + } + rl[rlpos].lcn = (LCN)LCN_ENOENT; + } else /* Not the base extent. There may be more extents to follow. */ + rl[rlpos].lcn = (LCN)LCN_RL_NOT_MAPPED; + + /* Setup terminating run_list element. */ + rl[rlpos].vcn = vcn; + rl[rlpos].length = (s64)0; + /* If no existing run list was specified, we are done. */ + if (!old_rl) { + ntfs_debug("Mapping pairs array successfully decompressed:"); + ntfs_debug_dump_runlist(rl); + return rl; + } + /* Now combine the new and old run lists checking for overlaps. */ + old_rl = ntfs_merge_run_lists(old_rl, rl); + if (likely(!IS_ERR(old_rl))) + return old_rl; + ntfs_free(rl); + ntfs_error(vol->sb, "Failed to merge run lists."); + return old_rl; +io_error: + ntfs_error(vol->sb, "Corrupt attribute."); +err_out: + ntfs_free(rl); + return ERR_PTR(-EIO); +} + +/** + * map_run_list - map (a part of) a run list of an ntfs inode + * @ni: ntfs inode for which to map (part of) a run list + * @vcn: map run list part containing this vcn + * + * Map the part of a run list containing the @vcn of an the ntfs inode @ni. + * + * Return 0 on success and -errno on error. + */ +int map_run_list(ntfs_inode *ni, VCN vcn) +{ + ntfs_inode *base_ni; + attr_search_context *ctx; + MFT_RECORD *mrec; + int err = 0; + + ntfs_debug("Mapping run list part containing vcn 0x%Lx.", + (long long)vcn); + + if (!NInoAttr(ni)) + base_ni = ni; + else + base_ni = ni->ext.base_ntfs_ino; + + mrec = map_mft_record(base_ni); + if (IS_ERR(mrec)) + return PTR_ERR(mrec); + ctx = get_attr_search_ctx(base_ni, mrec); + if (!ctx) { + err = -ENOMEM; + goto err_out; + } + if (!lookup_attr(ni->type, ni->name, ni->name_len, IGNORE_CASE, vcn, + NULL, 0, ctx)) { + put_attr_search_ctx(ctx); + err = -ENOENT; + goto err_out; + } + + down_write(&ni->run_list.lock); + /* Make sure someone else didn't do the work while we were sleeping. */ + if (likely(vcn_to_lcn(ni->run_list.rl, vcn) <= LCN_RL_NOT_MAPPED)) { + run_list_element *rl; + + rl = decompress_mapping_pairs(ni->vol, ctx->attr, + ni->run_list.rl); + if (unlikely(IS_ERR(rl))) + err = PTR_ERR(rl); + else + ni->run_list.rl = rl; + } + up_write(&ni->run_list.lock); + + put_attr_search_ctx(ctx); +err_out: + unmap_mft_record(base_ni); + return err; +} + +/** + * vcn_to_lcn - convert a vcn into a lcn given a run list + * @rl: run list to use for conversion + * @vcn: vcn to convert + * + * Convert the virtual cluster number @vcn of an attribute into a logical + * cluster number (lcn) of a device using the run list @rl to map vcns to their + * corresponding lcns. + * + * It is up to the caller to serialize access to the run list @rl. + * + * Since lcns must be >= 0, we use negative return values with special meaning: + * + * Return value Meaning / Description + * ================================================== + * -1 = LCN_HOLE Hole / not allocated on disk. + * -2 = LCN_RL_NOT_MAPPED This is part of the run list which has not been + * inserted into the run list yet. + * -3 = LCN_ENOENT There is no such vcn in the attribute. + * -4 = LCN_EINVAL Input parameter error (if debug enabled). + */ +LCN vcn_to_lcn(const run_list_element *rl, const VCN vcn) +{ + int i; + +#ifdef DEBUG + if (vcn < (VCN)0) + return (LCN)LCN_EINVAL; +#endif + /* + * If rl is NULL, assume that we have found an unmapped run list. The + * caller can then attempt to map it and fail appropriately if + * necessary. + */ + if (unlikely(!rl)) + return (LCN)LCN_RL_NOT_MAPPED; + + /* Catch out of lower bounds vcn. */ + if (unlikely(vcn < rl[0].vcn)) + return (LCN)LCN_ENOENT; + + for (i = 0; likely(rl[i].length); i++) { + if (unlikely(vcn < rl[i+1].vcn)) { + if (likely(rl[i].lcn >= (LCN)0)) + return rl[i].lcn + (vcn - rl[i].vcn); + return rl[i].lcn; + } + } + /* + * The terminator element is setup to the correct value, i.e. one of + * LCN_HOLE, LCN_RL_NOT_MAPPED, or LCN_ENOENT. + */ + if (likely(rl[i].lcn < (LCN)0)) + return rl[i].lcn; + /* Just in case... We could replace this with BUG() some day. */ + return (LCN)LCN_ENOENT; +} + +/** + * find_attr - find (next) attribute in mft record + * @type: attribute type to find + * @name: attribute name to find (optional, i.e. NULL means don't care) + * @name_len: attribute name length (only needed if @name present) + * @ic: IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present) + * @val: attribute value to find (optional, resident attributes only) + * @val_len: attribute value length + * @ctx: search context with mft record and attribute to search from + * + * You shouldn't need to call this function directly. Use lookup_attr() instead. + * + * find_attr() takes a search context @ctx as parameter and searches the mft + * record specified by @ctx->mrec, beginning at @ctx->attr, for an attribute of + * @type, optionally @name and @val. If found, find_attr() returns TRUE and + * @ctx->attr will point to the found attribute. If not found, find_attr() + * returns FALSE and @ctx->attr is undefined (i.e. do not rely on it not + * changing). + * + * If @ctx->is_first is TRUE, the search begins with @ctx->attr itself. If it + * is FALSE, the search begins after @ctx->attr. + * + * If @ic is IGNORE_CASE, the @name comparisson is not case sensitive and + * @ctx->ntfs_ino must be set to the ntfs inode to which the mft record + * @ctx->mrec belongs. This is so we can get at the ntfs volume and hence at + * the upcase table. If @ic is CASE_SENSITIVE, the comparison is case + * sensitive. When @name is present, @name_len is the @name length in Unicode + * characters. + * + * If @name is not present (NULL), we assume that the unnamed attribute is + * being searched for. + * + * Finally, the resident attribute value @val is looked for, if present. If @val + * is not present (NULL), @val_len is ignored. + * + * find_attr() only searches the specified mft record and it ignores the + * presence of an attribute list attribute (unless it is the one being searched + * for, obviously). If you need to take attribute lists into consideration, use + * lookup_attr() instead (see below). This also means that you cannot use + * find_attr() to search for extent records of non-resident attributes, as + * extents with lowest_vcn != 0 are usually described by the attribute list + * attribute only. - Note that it is possible that the first extent is only in + * the attribute list while the last extent is in the base mft record, so don't + * rely on being able to find the first extent in the base mft record. + * + * Warning: Never use @val when looking for attribute types which can be + * non-resident as this most likely will result in a crash! + */ +BOOL find_attr(const ATTR_TYPES type, const uchar_t *name, const u32 name_len, + const IGNORE_CASE_BOOL ic, const u8 *val, const u32 val_len, + attr_search_context *ctx) +{ + ATTR_RECORD *a; + ntfs_volume *vol; + uchar_t *upcase; + u32 upcase_len; + + if (ic == IGNORE_CASE) { + vol = ctx->ntfs_ino->vol; + upcase = vol->upcase; + upcase_len = vol->upcase_len; + } else { + vol = NULL; + upcase = NULL; + upcase_len = 0; + } + /* + * Iterate over attributes in mft record starting at @ctx->attr, or the + * attribute following that, if @ctx->is_first is TRUE. + */ + if (ctx->is_first) { + a = ctx->attr; + ctx->is_first = FALSE; + } else + a = (ATTR_RECORD*)((u8*)ctx->attr + + le32_to_cpu(ctx->attr->length)); + for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { + if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + + le32_to_cpu(ctx->mrec->bytes_allocated)) + break; + ctx->attr = a; + /* We catch $END with this more general check, too... */ + if (le32_to_cpu(a->type) > le32_to_cpu(type)) + return FALSE; + if (unlikely(!a->length)) + break; + if (a->type != type) + continue; + /* + * If @name is present, compare the two names. If @name is + * missing, assume we want an unnamed attribute. + */ + if (!name) { + /* The search failed if the found attribute is named. */ + if (a->name_length) + return FALSE; + } else if (!ntfs_are_names_equal(name, name_len, + (uchar_t*)((u8*)a + le16_to_cpu(a->name_offset)), + a->name_length, ic, upcase, upcase_len)) { + register int rc; + + rc = ntfs_collate_names(name, name_len, + (uchar_t*)((u8*)a + + le16_to_cpu(a->name_offset)), + a->name_length, 1, IGNORE_CASE, + upcase, upcase_len); + /* + * If @name collates before a->name, there is no + * matching attribute. + */ + if (rc == -1) + return FALSE; + /* If the strings are not equal, continue search. */ + if (rc) + continue; + rc = ntfs_collate_names(name, name_len, + (uchar_t*)((u8*)a + + le16_to_cpu(a->name_offset)), + a->name_length, 1, CASE_SENSITIVE, + upcase, upcase_len); + if (rc == -1) + return FALSE; + if (rc) + continue; + } + /* + * The names match or @name not present and attribute is + * unnamed. If no @val specified, we have found the attribute + * and are done. + */ + if (!val) + return TRUE; + /* @val is present; compare values. */ + else { + u32 vl; + register int rc; + + vl = le32_to_cpu(a->data.resident.value_length); + if (vl > val_len) + vl = val_len; + + rc = memcmp(val, (u8*)a + le16_to_cpu( + a->data.resident.value_offset), vl); + /* + * If @val collates before the current attribute's + * value, there is no matching attribute. + */ + if (!rc) { + register u32 avl; + avl = le32_to_cpu( + a->data.resident.value_length); + if (val_len == avl) + return TRUE; + if (val_len < avl) + return FALSE; + } else if (rc < 0) + return FALSE; + } + } + ntfs_error(NULL, "Inode is corrupt. Run chkdsk."); + return FALSE; +} + +/** + * load_attribute_list - load an attribute list into memory + * @vol: ntfs volume from which to read + * @run_list: run list of the attribute list + * @al_start: destination buffer + * @size: size of the destination buffer in bytes + * @initialized_size: initialized size of the attribute list + * + * Walk the run list @run_list and load all clusters from it copying them into + * the linear buffer @al. The maximum number of bytes copied to @al is @size + * bytes. Note, @size does not need to be a multiple of the cluster size. If + * @initialized_size is less than @size, the region in @al between + * @initialized_size and @size will be zeroed and not read from disk. + * + * Return 0 on success or -errno on error. + */ +int load_attribute_list(ntfs_volume *vol, run_list *run_list, u8 *al_start, + const s64 size, const s64 initialized_size) +{ + LCN lcn; + u8 *al = al_start; + u8 *al_end = al + initialized_size; + run_list_element *rl; + struct buffer_head *bh; + struct super_block *sb = vol->sb; + unsigned long block_size = sb->s_blocksize; + unsigned long block, max_block; + int err = 0; + unsigned char block_size_bits = sb->s_blocksize_bits; + + ntfs_debug("Entering."); + if (!vol || !run_list || !al || size <= 0 || initialized_size < 0 || + initialized_size > size) + return -EINVAL; + if (!initialized_size) { + memset(al, 0, size); + return 0; + } + down_read(&run_list->lock); + rl = run_list->rl; + /* Read all clusters specified by the run list one run at a time. */ + while (rl->length) { + lcn = vcn_to_lcn(rl, rl->vcn); + ntfs_debug("Reading vcn = 0x%Lx, lcn = 0x%Lx.", + (long long)rl->vcn, (long long)lcn); + /* The attribute list cannot be sparse. */ + if (lcn < 0) { + ntfs_error(sb, "vcn_to_lcn() failed. Cannot read " + "attribute list."); + goto err_out; + } + block = lcn << vol->cluster_size_bits >> block_size_bits; + /* Read the run from device in chunks of block_size bytes. */ + max_block = block + (rl->length << vol->cluster_size_bits >> + block_size_bits); + ntfs_debug("max_block = 0x%lx.", max_block); + do { + ntfs_debug("Reading block = 0x%lx.", block); + bh = sb_bread(sb, block); + if (!bh) { + ntfs_error(sb, "sb_bread() failed. Cannot " + "read attribute list."); + goto err_out; + } + if (al + block_size >= al_end) + goto do_final; + memcpy(al, bh->b_data, block_size); + brelse(bh); + al += block_size; + } while (++block < max_block); + rl++; + } + if (initialized_size < size) { +initialize: + memset(al_start + initialized_size, 0, size - initialized_size); + } +done: + up_read(&run_list->lock); + return err; +do_final: + if (al < al_end) { + /* + * Partial block. + * + * Note: The attribute list can be smaller than its allocation + * by multiple clusters. This has been encountered by at least + * two people running Windows XP, thus we cannot do any + * truncation sanity checking here. (AIA) + */ + memcpy(al, bh->b_data, al_end - al); + brelse(bh); + if (initialized_size < size) + goto initialize; + goto done; + } + brelse(bh); + /* Real overflow! */ + ntfs_error(sb, "Attribute list buffer overflow. Read attribute list " + "is truncated."); +err_out: + err = -EIO; + goto done; +} + +/** + * find_external_attr - find an attribute in the attribute list of an ntfs inode + * @type: attribute type to find + * @name: attribute name to find (optional, i.e. NULL means don't care) + * @name_len: attribute name length (only needed if @name present) + * @ic: IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present) + * @lowest_vcn: lowest vcn to find (optional, non-resident attributes only) + * @val: attribute value to find (optional, resident attributes only) + * @val_len: attribute value length + * @ctx: search context with mft record and attribute to search from + * + * You shouldn't need to call this function directly. Use lookup_attr() instead. + * + * Find an attribute by searching the attribute list for the corresponding + * attribute list entry. Having found the entry, map the mft record for read + * if the attribute is in a different mft record/inode, find_attr the attribute + * in there and return it. + * + * On first search @ctx->ntfs_ino must be the base mft record and @ctx must + * have been obtained from a call to get_attr_search_ctx(). On subsequent calls + * @ctx->ntfs_ino can be any extent inode, too (@ctx->base_ntfs_ino is then the + * base inode). + * + * After finishing with the attribute/mft record you need to call + * release_attr_search_ctx() to cleanup the search context (unmapping any + * mapped inodes, etc). + * + * Return TRUE if the search was successful and FALSE if not. When TRUE, + * @ctx->attr is the found attribute and it is in mft record @ctx->mrec. When + * FALSE, @ctx->attr is the attribute which collates just after the attribute + * being searched for in the base ntfs inode, i.e. if one wants to add the + * attribute to the mft record this is the correct place to insert it into + * and if there is not enough space, the attribute should be placed in an + * extent mft record. + */ +static BOOL find_external_attr(const ATTR_TYPES type, const uchar_t *name, + const u32 name_len, const IGNORE_CASE_BOOL ic, + const VCN lowest_vcn, const u8 *val, const u32 val_len, + attr_search_context *ctx) +{ + ntfs_inode *base_ni, *ni; + ntfs_volume *vol; + ATTR_LIST_ENTRY *al_entry, *next_al_entry; + u8 *al_start, *al_end; + ATTR_RECORD *a; + uchar_t *al_name; + u32 al_name_len; + + ni = ctx->ntfs_ino; + base_ni = ctx->base_ntfs_ino; + ntfs_debug("Entering for inode 0x%lx, type 0x%x.", ni->mft_no, type); + if (!base_ni) { + /* First call happens with the base mft record. */ + base_ni = ctx->base_ntfs_ino = ctx->ntfs_ino; + ctx->base_mrec = ctx->mrec; + } + if (ni == base_ni) + ctx->base_attr = ctx->attr; + vol = base_ni->vol; + al_start = base_ni->attr_list; + al_end = al_start + base_ni->attr_list_size; + if (!ctx->al_entry) + ctx->al_entry = (ATTR_LIST_ENTRY*)al_start; + /* + * Iterate over entries in attribute list starting at @ctx->al_entry, + * or the entry following that, if @ctx->is_first is TRUE. + */ + if (ctx->is_first) { + al_entry = ctx->al_entry; + ctx->is_first = FALSE; + } else + al_entry = (ATTR_LIST_ENTRY*)((u8*)ctx->al_entry + + le16_to_cpu(ctx->al_entry->length)); + for (;; al_entry = next_al_entry) { + /* Out of bounds check. */ + if ((u8*)al_entry < base_ni->attr_list || + (u8*)al_entry > al_end) + break; /* Inode is corrupt. */ + ctx->al_entry = al_entry; + /* Catch the end of the attribute list. */ + if ((u8*)al_entry == al_end) + goto not_found; + if (!al_entry->length) + break; + if ((u8*)al_entry + 6 > al_end || (u8*)al_entry + + le16_to_cpu(al_entry->length) > al_end) + break; + next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry + + le16_to_cpu(al_entry->length)); + if (le32_to_cpu(al_entry->type) > le32_to_cpu(type)) + goto not_found; + if (type != al_entry->type) + continue; + /* + * If @name is present, compare the two names. If @name is + * missing, assume we want an unnamed attribute. + */ + al_name_len = al_entry->name_length; + al_name = (uchar_t*)((u8*)al_entry + al_entry->name_offset); + if (!name) { + if (al_name_len) + goto not_found; + } else if (!ntfs_are_names_equal(al_name, al_name_len, name, + name_len, ic, vol->upcase, vol->upcase_len)) { + register int rc; + + rc = ntfs_collate_names(name, name_len, al_name, + al_name_len, 1, IGNORE_CASE, + vol->upcase, vol->upcase_len); + /* + * If @name collates before al_name, there is no + * matching attribute. + */ + if (rc == -1) + goto not_found; + /* If the strings are not equal, continue search. */ + if (rc) + continue; + /* + * FIXME: Reverse engineering showed 0, IGNORE_CASE but + * that is inconsistent with find_attr(). The subsequent + * rc checks were also different. Perhaps I made a + * mistake in one of the two. Need to recheck which is + * correct or at least see what is going on... (AIA) + */ + rc = ntfs_collate_names(name, name_len, al_name, + al_name_len, 1, CASE_SENSITIVE, + vol->upcase, vol->upcase_len); + if (rc == -1) + goto not_found; + if (rc) + continue; + } + /* + * The names match or @name not present and attribute is + * unnamed. Now check @lowest_vcn. Continue search if the + * next attribute list entry still fits @lowest_vcn. Otherwise + * we have reached the right one or the search has failed. + */ + if (lowest_vcn && (u8*)next_al_entry >= al_start && + (u8*)next_al_entry + 6 < al_end && + (u8*)next_al_entry + le16_to_cpu( + next_al_entry->length) <= al_end && + sle64_to_cpu(next_al_entry->lowest_vcn) <= + sle64_to_cpu(lowest_vcn) && + next_al_entry->type == al_entry->type && + next_al_entry->name_length == al_name_len && + ntfs_are_names_equal((uchar_t*)((u8*) + next_al_entry + + next_al_entry->name_offset), + next_al_entry->name_length, + al_name, al_name_len, CASE_SENSITIVE, + vol->upcase, vol->upcase_len)) + continue; + if (MREF_LE(al_entry->mft_reference) == ni->mft_no) { + if (MSEQNO_LE(al_entry->mft_reference) != ni->seq_no) { + ntfs_error(vol->sb, "Found stale mft " + "reference in attribute list!"); + break; + } + } else { /* Mft references do not match. */ + /* If there is a mapped record unmap it first. */ + if (ni != base_ni) + unmap_extent_mft_record(ni); + /* Do we want the base record back? */ + if (MREF_LE(al_entry->mft_reference) == + base_ni->mft_no) { + ni = ctx->ntfs_ino = base_ni; + ctx->mrec = ctx->base_mrec; + } else { + /* We want an extent record. */ + ctx->mrec = map_extent_mft_record(base_ni, + al_entry->mft_reference, &ni); + ctx->ntfs_ino = ni; + if (IS_ERR(ctx->mrec)) { + ntfs_error(vol->sb, "Failed to map mft " + "record, error code " + "%ld.", + -PTR_ERR(ctx->mrec)); + break; + } + } + ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec + + le16_to_cpu(ctx->mrec->attrs_offset)); + } + /* + * ctx->vfs_ino, ctx->mrec, and ctx->attr now point to the + * mft record containing the attribute represented by the + * current al_entry. + */ + /* + * We could call into find_attr() to find the right attribute + * in this mft record but this would be less efficient and not + * quite accurate as find_attr() ignores the attribute instance + * numbers for example which become important when one plays + * with attribute lists. Also, because a proper match has been + * found in the attribute list entry above, the comparison can + * now be optimized. So it is worth re-implementing a + * simplified find_attr() here. + */ + a = ctx->attr; + /* + * Use a manual loop so we can still use break and continue + * with the same meanings as above. + */ +do_next_attr_loop: + if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + + le32_to_cpu(ctx->mrec->bytes_allocated)) + break; + if (a->type == AT_END) + continue; + if (!a->length) + break; + if (al_entry->instance != a->instance) + goto do_next_attr; + if (al_entry->type != a->type) + continue; + if (name) { + if (a->name_length != al_name_len) + continue; + if (!ntfs_are_names_equal((uchar_t*)((u8*)a + + le16_to_cpu(a->name_offset)), + a->name_length, al_name, al_name_len, + CASE_SENSITIVE, vol->upcase, + vol->upcase_len)) + continue; + } + ctx->attr = a; + /* + * If no @val specified or @val specified and it matches, we + * have found it! + */ + if (!val || (!a->non_resident && le32_to_cpu( + a->data.resident.value_length) == val_len && + !memcmp((u8*)a + + le16_to_cpu(a->data.resident.value_offset), + val, val_len))) { + ntfs_debug("Done, found."); + return TRUE; + } +do_next_attr: + /* Proceed to the next attribute in the current mft record. */ + a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length)); + goto do_next_attr_loop; + } + ntfs_error(base_ni->vol->sb, "Inode contains corrupt attribute list " + "attribute.\n"); + if (ni != base_ni) { + unmap_extent_mft_record(ni); + ctx->ntfs_ino = base_ni; + ctx->mrec = ctx->base_mrec; + ctx->attr = ctx->base_attr; + } + /* + * FIXME: We absolutely have to return ERROR status instead of just + * false or we will blow up or even worse cause corruption when we add + * write support and we reach this code path! + */ + printk(KERN_CRIT "NTFS: FIXME: Hit unfinished error code path!!!\n"); + return FALSE; +not_found: + /* + * Seek to the end of the base mft record, i.e. when we return false, + * ctx->mrec and ctx->attr indicate where the attribute should be + * inserted into the attribute record. + * And of course ctx->al_entry points to the end of the attribute + * list inside NTFS_I(ctx->base_vfs_ino)->attr_list. + * + * FIXME: Do we really want to do this here? Think about it... (AIA) + */ + reinit_attr_search_ctx(ctx); + find_attr(type, name, name_len, ic, val, val_len, ctx); + ntfs_debug("Done, not found."); + return FALSE; +} + +/** + * lookup_attr - find an attribute in an ntfs inode + * @type: attribute type to find + * @name: attribute name to find (optional, i.e. NULL means don't care) + * @name_len: attribute name length (only needed if @name present) + * @ic: IGNORE_CASE or CASE_SENSITIVE (ignored if @name not present) + * @lowest_vcn: lowest vcn to find (optional, non-resident attributes only) + * @val: attribute value to find (optional, resident attributes only) + * @val_len: attribute value length + * @ctx: search context with mft record and attribute to search from + * + * Find an attribute in an ntfs inode. On first search @ctx->ntfs_ino must + * be the base mft record and @ctx must have been obtained from a call to + * get_attr_search_ctx(). + * + * This function transparently handles attribute lists and @ctx is used to + * continue searches where they were left off at. + * + * After finishing with the attribute/mft record you need to call + * release_attr_search_ctx() to cleanup the search context (unmapping any + * mapped inodes, etc). + * + * Return TRUE if the search was successful and FALSE if not. When TRUE, + * @ctx->attr is the found attribute and it is in mft record @ctx->mrec. When + * FALSE, @ctx->attr is the attribute which collates just after the attribute + * being searched for, i.e. if one wants to add the attribute to the mft + * record this is the correct place to insert it into. + */ +BOOL lookup_attr(const ATTR_TYPES type, const uchar_t *name, const u32 name_len, + const IGNORE_CASE_BOOL ic, const VCN lowest_vcn, const u8 *val, + const u32 val_len, attr_search_context *ctx) +{ + ntfs_inode *base_ni; + + ntfs_debug("Entering."); + if (ctx->base_ntfs_ino) + base_ni = ctx->base_ntfs_ino; + else + base_ni = ctx->ntfs_ino; + /* Sanity check, just for debugging really. */ + BUG_ON(!base_ni); + if (!NInoAttrList(base_ni)) + return find_attr(type, name, name_len, ic, val, val_len, ctx); + return find_external_attr(type, name, name_len, ic, lowest_vcn, val, + val_len, ctx); +} + +/** + * init_attr_search_ctx - initialize an attribute search context + * @ctx: attribute search context to initialize + * @ni: ntfs inode with which to initialize the search context + * @mrec: mft record with which to initialize the search context + * + * Initialize the attribute search context @ctx with @ni and @mrec. + */ +static inline void init_attr_search_ctx(attr_search_context *ctx, + ntfs_inode *ni, MFT_RECORD *mrec) +{ + ctx->mrec = mrec; + /* Sanity checks are performed elsewhere. */ + ctx->attr = (ATTR_RECORD*)((u8*)mrec + le16_to_cpu(mrec->attrs_offset)); + ctx->is_first = TRUE; + ctx->ntfs_ino = ni; + ctx->al_entry = NULL; + ctx->base_ntfs_ino = NULL; + ctx->base_mrec = NULL; + ctx->base_attr = NULL; +} + +/** + * reinit_attr_search_ctx - reinitialize an attribute search context + * @ctx: attribute search context to reinitialize + * + * Reinitialize the attribute search context @ctx, unmapping an associated + * extent mft record if present, and initialize the search context again. + * + * This is used when a search for a new attribute is being started to reset + * the search context to the beginning. + */ +void reinit_attr_search_ctx(attr_search_context *ctx) +{ + if (likely(!ctx->base_ntfs_ino)) { + /* No attribute list. */ + ctx->is_first = TRUE; + /* Sanity checks are performed elsewhere. */ + ctx->attr = (ATTR_RECORD*)((u8*)ctx->mrec + + le16_to_cpu(ctx->mrec->attrs_offset)); + return; + } /* Attribute list. */ + if (ctx->ntfs_ino != ctx->base_ntfs_ino) + unmap_extent_mft_record(ctx->ntfs_ino); + init_attr_search_ctx(ctx, ctx->base_ntfs_ino, ctx->base_mrec); + return; +} + +/** + * get_attr_search_ctx - allocate and initialize a new attribute search context + * @ni: ntfs inode with which to initialize the search context + * @mrec: mft record with which to initialize the search context + * + * Allocate a new attribute search context, initialize it with @ni and @mrec, + * and return it. Return NULL if allocation failed. + */ +attr_search_context *get_attr_search_ctx(ntfs_inode *ni, MFT_RECORD *mrec) +{ + attr_search_context *ctx; + + ctx = kmem_cache_alloc(ntfs_attr_ctx_cache, SLAB_NOFS); + if (ctx) + init_attr_search_ctx(ctx, ni, mrec); + return ctx; +} + +/** + * put_attr_search_ctx - release an attribute search context + * @ctx: attribute search context to free + * + * Release the attribute search context @ctx, unmapping an associated extent + * mft record if present. + */ +void put_attr_search_ctx(attr_search_context *ctx) +{ + if (ctx->base_ntfs_ino && ctx->ntfs_ino != ctx->base_ntfs_ino) + unmap_extent_mft_record(ctx->ntfs_ino); + kmem_cache_free(ntfs_attr_ctx_cache, ctx); + return; +} + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/attrib.h b/reactos/drivers/fs/ntfs/linux-ntfs/attrib.h new file mode 100644 index 00000000000..8c36c1a4473 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/attrib.h @@ -0,0 +1,106 @@ +/* + * attrib.h - Defines for attribute handling in NTFS Linux kernel driver. + * Part of the Linux-NTFS project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_ATTRIB_H +#define _LINUX_NTFS_ATTRIB_H + +#include + +#include "endian.h" +#include "types.h" +#include "layout.h" + +static inline void init_run_list(run_list *rl) +{ + rl->rl = NULL; + init_rwsem(&rl->lock); +} + +typedef enum { + LCN_HOLE = -1, /* Keep this as highest value or die! */ + LCN_RL_NOT_MAPPED = -2, + LCN_ENOENT = -3, + LCN_EINVAL = -4, +} LCN_SPECIAL_VALUES; + +/** + * attr_search_context - used in attribute search functions + * @mrec: buffer containing mft record to search + * @attr: attribute record in @mrec where to begin/continue search + * @is_first: if true lookup_attr() begins search with @attr, else after @attr + * + * Structure must be initialized to zero before the first call to one of the + * attribute search functions. Initialize @mrec to point to the mft record to + * search, and @attr to point to the first attribute within @mrec (not necessary + * if calling the _first() functions), and set @is_first to TRUE (not necessary + * if calling the _first() functions). + * + * If @is_first is TRUE, the search begins with @attr. If @is_first is FALSE, + * the search begins after @attr. This is so that, after the first call to one + * of the search attribute functions, we can call the function again, without + * any modification of the search context, to automagically get the next + * matching attribute. + */ +typedef struct { + MFT_RECORD *mrec; + ATTR_RECORD *attr; + BOOL is_first; + ntfs_inode *ntfs_ino; + ATTR_LIST_ENTRY *al_entry; + ntfs_inode *base_ntfs_ino; + MFT_RECORD *base_mrec; + ATTR_RECORD *base_attr; +} attr_search_context; + +extern run_list_element *decompress_mapping_pairs(const ntfs_volume *vol, + const ATTR_RECORD *attr, run_list_element *old_rl); + +extern int map_run_list(ntfs_inode *ni, VCN vcn); + +extern LCN vcn_to_lcn(const run_list_element *rl, const VCN vcn); + +extern BOOL find_attr(const ATTR_TYPES type, const uchar_t *name, + const u32 name_len, const IGNORE_CASE_BOOL ic, const u8 *val, + const u32 val_len, attr_search_context *ctx); + +BOOL lookup_attr(const ATTR_TYPES type, const uchar_t *name, const u32 name_len, + const IGNORE_CASE_BOOL ic, const VCN lowest_vcn, const u8 *val, + const u32 val_len, attr_search_context *ctx); + +extern int load_attribute_list(ntfs_volume *vol, run_list *rl, u8 *al_start, + const s64 size, const s64 initialized_size); + +static inline s64 attribute_value_length(const ATTR_RECORD *a) +{ + if (!a->non_resident) + return (s64)le32_to_cpu(a->data.resident.value_length); + return sle64_to_cpu(a->data.non_resident.data_size); +} + +extern void reinit_attr_search_ctx(attr_search_context *ctx); +extern attr_search_context *get_attr_search_ctx(ntfs_inode *ni, + MFT_RECORD *mrec); +extern void put_attr_search_ctx(attr_search_context *ctx); + +#endif /* _LINUX_NTFS_ATTRIB_H */ + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/compress.c b/reactos/drivers/fs/ntfs/linux-ntfs/compress.c new file mode 100644 index 00000000000..a8618f107ea --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/compress.c @@ -0,0 +1,945 @@ +/** + * compress.c - NTFS kernel compressed attributes handling. + * Part of the Linux-NTFS project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include + +#include "ntfs.h" + +/** + * ntfs_compression_constants - enum of constants used in the compression code + */ +typedef enum { + /* Token types and access mask. */ + NTFS_SYMBOL_TOKEN = 0, + NTFS_PHRASE_TOKEN = 1, + NTFS_TOKEN_MASK = 1, + + /* Compression sub-block constants. */ + NTFS_SB_SIZE_MASK = 0x0fff, + NTFS_SB_SIZE = 0x1000, + NTFS_SB_IS_COMPRESSED = 0x8000, + + /* + * The maximum compression block size is by definition 16 * the cluster + * size, with the maximum supported cluster size being 4kiB. Thus the + * maximum compression buffer size is 64kiB, so we use this when + * initializing the compression buffer. + */ + NTFS_MAX_CB_SIZE = 64 * 1024, +} ntfs_compression_constants; + +/** + * ntfs_compression_buffer - one buffer for the decompression engine + */ +static u8 *ntfs_compression_buffer = NULL; + +/** + * ntfs_cb_lock - spinlock which protects ntfs_compression_buffer + */ +static spinlock_t ntfs_cb_lock = SPIN_LOCK_UNLOCKED; + +/** + * allocate_compression_buffers - allocate the decompression buffers + * + * Caller has to hold the ntfs_lock semaphore. + * + * Return 0 on success or -ENOMEM if the allocations failed. + */ +int allocate_compression_buffers(void) +{ + BUG_ON(ntfs_compression_buffer); + + ntfs_compression_buffer = vmalloc(NTFS_MAX_CB_SIZE); + if (!ntfs_compression_buffer) + return -ENOMEM; + return 0; +} + +/** + * free_compression_buffers - free the decompression buffers + * + * Caller has to hold the ntfs_lock semaphore. + */ +void free_compression_buffers(void) +{ + BUG_ON(!ntfs_compression_buffer); + vfree(ntfs_compression_buffer); + ntfs_compression_buffer = NULL; +} + +/** + * zero_partial_compressed_page - zero out of bounds compressed page region + */ +static void zero_partial_compressed_page(ntfs_inode *ni, struct page *page) +{ + u8 *kp = page_address(page); + unsigned int kp_ofs; + + ntfs_debug("Zeroing page region outside initialized size."); + if (((s64)page->index << PAGE_CACHE_SHIFT) >= ni->initialized_size) { + /* + * FIXME: Using clear_page() will become wrong when we get + * PAGE_CACHE_SIZE != PAGE_SIZE but for now there is no problem. + */ + clear_page(kp); + return; + } + kp_ofs = ni->initialized_size & ~PAGE_CACHE_MASK; + memset(kp + kp_ofs, 0, PAGE_CACHE_SIZE - kp_ofs); + return; +} + +/** + * handle_bounds_compressed_page - test for&handle out of bounds compressed page + */ +static inline void handle_bounds_compressed_page(ntfs_inode *ni, + struct page *page) +{ + if ((page->index >= (ni->initialized_size >> PAGE_CACHE_SHIFT)) && + (ni->initialized_size < VFS_I(ni)->i_size)) + zero_partial_compressed_page(ni, page); + return; +} + +/** + * ntfs_decompress - decompress a compression block into an array of pages + * @dest_pages: destination array of pages + * @dest_index: current index into @dest_pages (IN/OUT) + * @dest_ofs: current offset within @dest_pages[@dest_index] (IN/OUT) + * @dest_max_index: maximum index into @dest_pages (IN) + * @dest_max_ofs: maximum offset within @dest_pages[@dest_max_index] (IN) + * @xpage: the target page (-1 if none) (IN) + * @xpage_done: set to 1 if xpage was completed successfully (IN/OUT) + * @cb_start: compression block to decompress (IN) + * @cb_size: size of compression block @cb_start in bytes (IN) + * + * The caller must have disabled preemption. ntfs_decompress() reenables it when + * the critical section is finished. + * + * This decompresses the compression block @cb_start into the array of + * destination pages @dest_pages starting at index @dest_index into @dest_pages + * and at offset @dest_pos into the page @dest_pages[@dest_index]. + * + * When the page @dest_pages[@xpage] is completed, @xpage_done is set to 1. + * If xpage is -1 or @xpage has not been completed, @xpage_done is not modified. + * + * @cb_start is a pointer to the compression block which needs decompressing + * and @cb_size is the size of @cb_start in bytes (8-64kiB). + * + * Return 0 if success or -EOVERFLOW on error in the compressed stream. + * @xpage_done indicates whether the target page (@dest_pages[@xpage]) was + * completed during the decompression of the compression block (@cb_start). + * + * Warning: This function *REQUIRES* PAGE_CACHE_SIZE >= 4096 or it will blow up + * unpredicatbly! You have been warned! + * + * Note to hackers: This function may not sleep until it has finished accessing + * the compression block @cb_start as it is a per-CPU buffer. + */ +static int ntfs_decompress(struct page *dest_pages[], int *dest_index, + int *dest_ofs, const int dest_max_index, const int dest_max_ofs, + const int xpage, char *xpage_done, u8 *const cb_start, + const u32 cb_size) +{ + /* + * Pointers into the compressed data, i.e. the compression block (cb), + * and the therein contained sub-blocks (sb). + */ + u8 *cb_end = cb_start + cb_size; /* End of cb. */ + u8 *cb = cb_start; /* Current position in cb. */ + u8 *cb_sb_start = cb; /* Beginning of the current sb in the cb. */ + u8 *cb_sb_end; /* End of current sb / beginning of next sb. */ + + /* Variables for uncompressed data / destination. */ + struct page *dp; /* Current destination page being worked on. */ + u8 *dp_addr; /* Current pointer into dp. */ + u8 *dp_sb_start; /* Start of current sub-block in dp. */ + u8 *dp_sb_end; /* End of current sb in dp (dp_sb_start + + NTFS_SB_SIZE). */ + u16 do_sb_start; /* @dest_ofs when starting this sub-block. */ + u16 do_sb_end; /* @dest_ofs of end of this sb (do_sb_start + + NTFS_SB_SIZE). */ + + /* Variables for tag and token parsing. */ + u8 tag; /* Current tag. */ + int token; /* Loop counter for the eight tokens in tag. */ + + /* Need this because we can't sleep, so need two stages. */ + int completed_pages[dest_max_index - *dest_index + 1]; + int nr_completed_pages = 0; + + /* Default error code. */ + int err = -EOVERFLOW; + + ntfs_debug("Entering, cb_size = 0x%x.", cb_size); +do_next_sb: + ntfs_debug("Beginning sub-block at offset = 0x%x in the cb.", + cb - cb_start); + + /* Have we reached the end of the compression block? */ + if (cb == cb_end || !le16_to_cpup((u16*)cb)) { + int i; + + ntfs_debug("Completed. Returning success (0)."); + err = 0; +return_error: + /* We can sleep from now on, so we drop lock. */ + spin_unlock(&ntfs_cb_lock); + /* Second stage: finalize completed pages. */ + if (nr_completed_pages > 0) { + struct page *page = dest_pages[completed_pages[0]]; + ntfs_inode *ni = NTFS_I(page->mapping->host); + + for (i = 0; i < nr_completed_pages; i++) { + int di = completed_pages[i]; + + dp = dest_pages[di]; + /* + * If we are outside the initialized size, zero + * the out of bounds page range. + */ + handle_bounds_compressed_page(ni, dp); + flush_dcache_page(dp); + kunmap(dp); + SetPageUptodate(dp); + unlock_page(dp); + if (di == xpage) + *xpage_done = 1; + else + page_cache_release(dp); + dest_pages[di] = NULL; + } + } + return err; + } + + /* Setup offsets for the current sub-block destination. */ + do_sb_start = *dest_ofs; + do_sb_end = do_sb_start + NTFS_SB_SIZE; + + /* Check that we are still within allowed boundaries. */ + if (*dest_index == dest_max_index && do_sb_end > dest_max_ofs) + goto return_overflow; + + /* Does the minimum size of a compressed sb overflow valid range? */ + if (cb + 6 > cb_end) + goto return_overflow; + + /* Setup the current sub-block source pointers and validate range. */ + cb_sb_start = cb; + cb_sb_end = cb_sb_start + (le16_to_cpup((u16*)cb) & NTFS_SB_SIZE_MASK) + + 3; + if (cb_sb_end > cb_end) + goto return_overflow; + + /* Get the current destination page. */ + dp = dest_pages[*dest_index]; + if (!dp) { + /* No page present. Skip decompression of this sub-block. */ + cb = cb_sb_end; + + /* Advance destination position to next sub-block. */ + *dest_ofs = (*dest_ofs + NTFS_SB_SIZE) & ~PAGE_CACHE_MASK; + if (!*dest_ofs && (++*dest_index > dest_max_index)) + goto return_overflow; + goto do_next_sb; + } + + /* We have a valid destination page. Setup the destination pointers. */ + dp_addr = (u8*)page_address(dp) + do_sb_start; + + /* Now, we are ready to process the current sub-block (sb). */ + if (!(le16_to_cpup((u16*)cb) & NTFS_SB_IS_COMPRESSED)) { + ntfs_debug("Found uncompressed sub-block."); + /* This sb is not compressed, just copy it into destination. */ + + /* Advance source position to first data byte. */ + cb += 2; + + /* An uncompressed sb must be full size. */ + if (cb_sb_end - cb != NTFS_SB_SIZE) + goto return_overflow; + + /* Copy the block and advance the source position. */ + memcpy(dp_addr, cb, NTFS_SB_SIZE); + cb += NTFS_SB_SIZE; + + /* Advance destination position to next sub-block. */ + *dest_ofs += NTFS_SB_SIZE; + if (!(*dest_ofs &= ~PAGE_CACHE_MASK)) { +finalize_page: + /* + * First stage: add current page index to array of + * completed pages. + */ + completed_pages[nr_completed_pages++] = *dest_index; + if (++*dest_index > dest_max_index) + goto return_overflow; + } + goto do_next_sb; + } + ntfs_debug("Found compressed sub-block."); + /* This sb is compressed, decompress it into destination. */ + + /* Setup destination pointers. */ + dp_sb_start = dp_addr; + dp_sb_end = dp_sb_start + NTFS_SB_SIZE; + + /* Forward to the first tag in the sub-block. */ + cb += 2; +do_next_tag: + if (cb == cb_sb_end) { + /* Check if the decompressed sub-block was not full-length. */ + if (dp_addr < dp_sb_end) { + int nr_bytes = do_sb_end - *dest_ofs; + + ntfs_debug("Filling incomplete sub-block with " + "zeroes."); + /* Zero remainder and update destination position. */ + memset(dp_addr, 0, nr_bytes); + *dest_ofs += nr_bytes; + } + /* We have finished the current sub-block. */ + if (!(*dest_ofs &= ~PAGE_CACHE_MASK)) + goto finalize_page; + goto do_next_sb; + } + + /* Check we are still in range. */ + if (cb > cb_sb_end || dp_addr > dp_sb_end) + goto return_overflow; + + /* Get the next tag and advance to first token. */ + tag = *cb++; + + /* Parse the eight tokens described by the tag. */ + for (token = 0; token < 8; token++, tag >>= 1) { + u16 lg, pt, length, max_non_overlap; + register u16 i; + u8 *dp_back_addr; + + /* Check if we are done / still in range. */ + if (cb >= cb_sb_end || dp_addr > dp_sb_end) + break; + + /* Determine token type and parse appropriately.*/ + if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) { + /* + * We have a symbol token, copy the symbol across, and + * advance the source and destination positions. + */ + *dp_addr++ = *cb++; + ++*dest_ofs; + + /* Continue with the next token. */ + continue; + } + + /* + * We have a phrase token. Make sure it is not the first tag in + * the sb as this is illegal and would confuse the code below. + */ + if (dp_addr == dp_sb_start) + goto return_overflow; + + /* + * Determine the number of bytes to go back (p) and the number + * of bytes to copy (l). We use an optimized algorithm in which + * we first calculate log2(current destination position in sb), + * which allows determination of l and p in O(1) rather than + * O(n). We just need an arch-optimized log2() function now. + */ + lg = 0; + for (i = *dest_ofs - do_sb_start - 1; i >= 0x10; i >>= 1) + lg++; + + /* Get the phrase token into i. */ + pt = le16_to_cpup((u16*)cb); + + /* + * Calculate starting position of the byte sequence in + * the destination using the fact that p = (pt >> (12 - lg)) + 1 + * and make sure we don't go too far back. + */ + dp_back_addr = dp_addr - (pt >> (12 - lg)) - 1; + if (dp_back_addr < dp_sb_start) + goto return_overflow; + + /* Now calculate the length of the byte sequence. */ + length = (pt & (0xfff >> lg)) + 3; + + /* Advance destination position and verify it is in range. */ + *dest_ofs += length; + if (*dest_ofs > do_sb_end) + goto return_overflow; + + /* The number of non-overlapping bytes. */ + max_non_overlap = dp_addr - dp_back_addr; + + if (length <= max_non_overlap) { + /* The byte sequence doesn't overlap, just copy it. */ + memcpy(dp_addr, dp_back_addr, length); + + /* Advance destination pointer. */ + dp_addr += length; + } else { + /* + * The byte sequence does overlap, copy non-overlapping + * part and then do a slow byte by byte copy for the + * overlapping part. Also, advance the destination + * pointer. + */ + memcpy(dp_addr, dp_back_addr, max_non_overlap); + dp_addr += max_non_overlap; + dp_back_addr += max_non_overlap; + length -= max_non_overlap; + while (length--) + *dp_addr++ = *dp_back_addr++; + } + + /* Advance source position and continue with the next token. */ + cb += 2; + } + + /* No tokens left in the current tag. Continue with the next tag. */ + goto do_next_tag; + +return_overflow: + ntfs_error(NULL, "Failed. Returning -EOVERFLOW.\n"); + goto return_error; +} + +/** + * ntfs_read_compressed_block - read a compressed block into the page cache + * @page: locked page in the compression block(s) we need to read + * + * When we are called the page has already been verified to be locked and the + * attribute is known to be non-resident, not encrypted, but compressed. + * + * 1. Determine which compression block(s) @page is in. + * 2. Get hold of all pages corresponding to this/these compression block(s). + * 3. Read the (first) compression block. + * 4. Decompress it into the corresponding pages. + * 5. Throw the compressed data away and proceed to 3. for the next compression + * block or return success if no more compression blocks left. + * + * Warning: We have to be careful what we do about existing pages. They might + * have been written to so that we would lose data if we were to just overwrite + * them with the out-of-date uncompressed data. + * + * FIXME: For PAGE_CACHE_SIZE > cb_size we are not doing the Right Thing(TM) at + * the end of the file I think. We need to detect this case and zero the out + * of bounds remainder of the page in question and mark it as handled. At the + * moment we would just return -EIO on such a page. This bug will only become + * apparent if pages are above 8kiB and the NTFS volume only uses 512 byte + * clusters so is probably not going to be seen by anyone. Still this should + * be fixed. (AIA) + * + * FIXME: Again for PAGE_CACHE_SIZE > cb_size we are screwing up both in + * handling sparse and compressed cbs. (AIA) + * + * FIXME: At the moment we don't do any zeroing out in the case that + * initialized_size is less than data_size. This should be safe because of the + * nature of the compression algorithm used. Just in case we check and output + * an error message in read inode if the two sizes are not equal for a + * compressed file. (AIA) + */ +int ntfs_read_compressed_block(struct page *page) +{ + struct address_space *mapping = page->mapping; + ntfs_inode *ni = NTFS_I(mapping->host); + ntfs_volume *vol = ni->vol; + struct super_block *sb = vol->sb; + run_list_element *rl; + unsigned long block_size = sb->s_blocksize; + unsigned char block_size_bits = sb->s_blocksize_bits; + u8 *cb, *cb_pos, *cb_end; + struct buffer_head **bhs; + unsigned long offset, index = page->index; + u32 cb_size = ni->itype.compressed.block_size; + u64 cb_size_mask = cb_size - 1UL; + VCN vcn; + LCN lcn; + /* The first wanted vcn (minimum alignment is PAGE_CACHE_SIZE). */ + VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >> + vol->cluster_size_bits; + /* + * The first vcn after the last wanted vcn (minumum alignment is again + * PAGE_CACHE_SIZE. + */ + VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1) + & ~cb_size_mask) >> vol->cluster_size_bits; + /* Number of compression blocks (cbs) in the wanted vcn range. */ + unsigned int nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits + >> ni->itype.compressed.block_size_bits; + /* + * Number of pages required to store the uncompressed data from all + * compression blocks (cbs) overlapping @page. Due to alignment + * guarantees of start_vcn and end_vcn, no need to round up here. + */ + unsigned int nr_pages = (end_vcn - start_vcn) << + vol->cluster_size_bits >> PAGE_CACHE_SHIFT; + unsigned int xpage, max_page, cur_page, cur_ofs, i; + unsigned int cb_clusters, cb_max_ofs; + int block, max_block, cb_max_page, bhs_size, nr_bhs, err = 0; + struct page **pages; + unsigned char xpage_done = 0; + + ntfs_debug("Entering, page->index = 0x%lx, cb_size = 0x%x, nr_pages = " + "%i.", index, cb_size, nr_pages); + /* + * Bad things happen if we get here for anything that is not an + * unnamed $DATA attribute. + */ + BUG_ON(ni->type != AT_DATA); + BUG_ON(ni->name_len); + + pages = kmalloc(nr_pages * sizeof(struct page *), GFP_NOFS); + + /* Allocate memory to store the buffer heads we need. */ + bhs_size = cb_size / block_size * sizeof(struct buffer_head *); + bhs = kmalloc(bhs_size, GFP_NOFS); + + if (unlikely(!pages || !bhs)) { + kfree(bhs); + kfree(pages); + SetPageError(page); + unlock_page(page); + ntfs_error(vol->sb, "Failed to allocate internal buffers."); + return -ENOMEM; + } + + /* + * We have already been given one page, this is the one we must do. + * Once again, the alignment guarantees keep it simple. + */ + offset = start_vcn << vol->cluster_size_bits >> PAGE_CACHE_SHIFT; + xpage = index - offset; + pages[xpage] = page; + /* + * The remaining pages need to be allocated and inserted into the page + * cache, alignment guarantees keep all the below much simpler. (-8 + */ + max_page = ((VFS_I(ni)->i_size + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT) - offset; + if (nr_pages < max_page) + max_page = nr_pages; + for (i = 0; i < max_page; i++, offset++) { + if (i != xpage) + pages[i] = grab_cache_page_nowait(mapping, offset); + page = pages[i]; + if (page) { + /* + * We only (re)read the page if it isn't already read + * in and/or dirty or we would be losing data or at + * least wasting our time. + */ + if (!PageDirty(page) && (!PageUptodate(page) || + PageError(page))) { + ClearPageError(page); + kmap(page); + continue; + } + unlock_page(page); + page_cache_release(page); + pages[i] = NULL; + } + } + + /* + * We have the run list, and all the destination pages we need to fill. + * Now read the first compression block. + */ + cur_page = 0; + cur_ofs = 0; + cb_clusters = ni->itype.compressed.block_clusters; +do_next_cb: + nr_cbs--; + nr_bhs = 0; + + /* Read all cb buffer heads one cluster at a time. */ + rl = NULL; + for (vcn = start_vcn, start_vcn += cb_clusters; vcn < start_vcn; + vcn++) { + BOOL is_retry = FALSE; + + if (!rl) { +lock_retry_remap: + down_read(&ni->run_list.lock); + rl = ni->run_list.rl; + } + if (likely(rl != NULL)) { + /* Seek to element containing target vcn. */ + while (rl->length && rl[1].vcn <= vcn) + rl++; + lcn = vcn_to_lcn(rl, vcn); + } else + lcn = (LCN)LCN_RL_NOT_MAPPED; + ntfs_debug("Reading vcn = 0x%Lx, lcn = 0x%Lx.", + (long long)vcn, (long long)lcn); + if (lcn < 0) { + /* + * When we reach the first sparse cluster we have + * finished with the cb. + */ + if (lcn == LCN_HOLE) + break; + if (is_retry || lcn != LCN_RL_NOT_MAPPED) + goto rl_err; + is_retry = TRUE; + /* + * Attempt to map run list, dropping lock for the + * duration. + */ + up_read(&ni->run_list.lock); + if (!map_run_list(ni, vcn)) + goto lock_retry_remap; + goto map_rl_err; + } + block = lcn << vol->cluster_size_bits >> block_size_bits; + /* Read the lcn from device in chunks of block_size bytes. */ + max_block = block + (vol->cluster_size >> block_size_bits); + do { + ntfs_debug("block = 0x%x.", block); + if (unlikely(!(bhs[nr_bhs] = sb_getblk(sb, block)))) + goto getblk_err; + nr_bhs++; + } while (++block < max_block); + } + + /* Release the lock if we took it. */ + if (rl) + up_read(&ni->run_list.lock); + + /* Setup and initiate io on all buffer heads. */ + for (i = 0; i < nr_bhs; i++) { + struct buffer_head *tbh = bhs[i]; + + if (unlikely(test_set_buffer_locked(tbh))) + continue; + if (unlikely(buffer_uptodate(tbh))) { + unlock_buffer(tbh); + continue; + } + atomic_inc(&tbh->b_count); + tbh->b_end_io = end_buffer_read_sync; + submit_bh(READ, tbh); + } + + /* Wait for io completion on all buffer heads. */ + for (i = 0; i < nr_bhs; i++) { + struct buffer_head *tbh = bhs[i]; + + if (buffer_uptodate(tbh)) + continue; + wait_on_buffer(tbh); + /* + * We need an optimization barrier here, otherwise we start + * hitting the below fixup code when accessing a loopback + * mounted ntfs partition. This indicates either there is a + * race condition in the loop driver or, more likely, gcc + * overoptimises the code without the barrier and it doesn't + * do the Right Thing(TM). + */ + barrier(); + if (unlikely(!buffer_uptodate(tbh))) { + ntfs_warning(vol->sb, "Buffer is unlocked but not " + "uptodate! Unplugging the disk queue " + "and rescheduling."); + get_bh(tbh); + blk_run_queues(); + schedule(); + put_bh(tbh); + if (unlikely(!buffer_uptodate(tbh))) + goto read_err; + ntfs_warning(vol->sb, "Buffer is now uptodate. Good."); + } + } + + /* + * Get the compression buffer. We must not sleep any more + * until we are finished with it. + */ + spin_lock(&ntfs_cb_lock); + cb = ntfs_compression_buffer; + + BUG_ON(!cb); + + cb_pos = cb; + cb_end = cb + cb_size; + + /* Copy the buffer heads into the contiguous buffer. */ + for (i = 0; i < nr_bhs; i++) { + memcpy(cb_pos, bhs[i]->b_data, block_size); + cb_pos += block_size; + } + + /* Just a precaution. */ + if (cb_pos + 2 <= cb + cb_size) + *(u16*)cb_pos = 0; + + /* Reset cb_pos back to the beginning. */ + cb_pos = cb; + + /* We now have both source (if present) and destination. */ + ntfs_debug("Successfully read the compression block."); + + /* The last page and maximum offset within it for the current cb. */ + cb_max_page = (cur_page << PAGE_CACHE_SHIFT) + cur_ofs + cb_size; + cb_max_ofs = cb_max_page & ~PAGE_CACHE_MASK; + cb_max_page >>= PAGE_CACHE_SHIFT; + + /* Catch end of file inside a compression block. */ + if (cb_max_page > max_page) + cb_max_page = max_page; + + if (vcn == start_vcn - cb_clusters) { + /* Sparse cb, zero out page range overlapping the cb. */ + ntfs_debug("Found sparse compression block."); + /* We can sleep from now on, so we drop lock. */ + spin_unlock(&ntfs_cb_lock); + if (cb_max_ofs) + cb_max_page--; + for (; cur_page < cb_max_page; cur_page++) { + page = pages[cur_page]; + if (page) { + /* + * FIXME: Using clear_page() will become wrong + * when we get PAGE_CACHE_SIZE != PAGE_SIZE but + * for now there is no problem. + */ + if (likely(!cur_ofs)) + clear_page(page_address(page)); + else + memset(page_address(page) + cur_ofs, 0, + PAGE_CACHE_SIZE - + cur_ofs); + flush_dcache_page(page); + kunmap(page); + SetPageUptodate(page); + unlock_page(page); + if (cur_page == xpage) + xpage_done = 1; + else + page_cache_release(page); + pages[cur_page] = NULL; + } + cb_pos += PAGE_CACHE_SIZE - cur_ofs; + cur_ofs = 0; + if (cb_pos >= cb_end) + break; + } + /* If we have a partial final page, deal with it now. */ + if (cb_max_ofs && cb_pos < cb_end) { + page = pages[cur_page]; + if (page) + memset(page_address(page) + cur_ofs, 0, + cb_max_ofs - cur_ofs); + /* + * No need to update cb_pos at this stage: + * cb_pos += cb_max_ofs - cur_ofs; + */ + cur_ofs = cb_max_ofs; + } + } else if (vcn == start_vcn) { + /* We can't sleep so we need two stages. */ + unsigned int cur2_page = cur_page; + unsigned int cur_ofs2 = cur_ofs; + u8 *cb_pos2 = cb_pos; + + ntfs_debug("Found uncompressed compression block."); + /* Uncompressed cb, copy it to the destination pages. */ + /* + * TODO: As a big optimization, we could detect this case + * before we read all the pages and use block_read_full_page() + * on all full pages instead (we still have to treat partial + * pages especially but at least we are getting rid of the + * synchronous io for the majority of pages. + * Or if we choose not to do the read-ahead/-behind stuff, we + * could just return block_read_full_page(pages[xpage]) as long + * as PAGE_CACHE_SIZE <= cb_size. + */ + if (cb_max_ofs) + cb_max_page--; + /* First stage: copy data into destination pages. */ + for (; cur_page < cb_max_page; cur_page++) { + page = pages[cur_page]; + if (page) + memcpy(page_address(page) + cur_ofs, cb_pos, + PAGE_CACHE_SIZE - cur_ofs); + cb_pos += PAGE_CACHE_SIZE - cur_ofs; + cur_ofs = 0; + if (cb_pos >= cb_end) + break; + } + /* If we have a partial final page, deal with it now. */ + if (cb_max_ofs && cb_pos < cb_end) { + page = pages[cur_page]; + if (page) + memcpy(page_address(page) + cur_ofs, cb_pos, + cb_max_ofs - cur_ofs); + cb_pos += cb_max_ofs - cur_ofs; + cur_ofs = cb_max_ofs; + } + /* We can sleep from now on, so drop lock. */ + spin_unlock(&ntfs_cb_lock); + /* Second stage: finalize pages. */ + for (; cur2_page < cb_max_page; cur2_page++) { + page = pages[cur2_page]; + if (page) { + /* + * If we are outside the initialized size, zero + * the out of bounds page range. + */ + handle_bounds_compressed_page(ni, page); + flush_dcache_page(page); + kunmap(page); + SetPageUptodate(page); + unlock_page(page); + if (cur2_page == xpage) + xpage_done = 1; + else + page_cache_release(page); + pages[cur2_page] = NULL; + } + cb_pos2 += PAGE_CACHE_SIZE - cur_ofs2; + cur_ofs2 = 0; + if (cb_pos2 >= cb_end) + break; + } + } else { + /* Compressed cb, decompress it into the destination page(s). */ + unsigned int prev_cur_page = cur_page; + + ntfs_debug("Found compressed compression block."); + err = ntfs_decompress(pages, &cur_page, &cur_ofs, + cb_max_page, cb_max_ofs, xpage, &xpage_done, + cb_pos, cb_size - (cb_pos - cb)); + /* + * We can sleep from now on, lock already dropped by + * ntfs_decompress(). + */ + if (err) { + ntfs_error(vol->sb, "ntfs_decompress() failed in inode " + "0x%lx with error code %i. Skipping " + "this compression block.\n", + ni->mft_no, -err); + /* Release the unfinished pages. */ + for (; prev_cur_page < cur_page; prev_cur_page++) { + page = pages[prev_cur_page]; + if (page) { + if (prev_cur_page == xpage && + !xpage_done) + SetPageError(page); + flush_dcache_page(page); + kunmap(page); + unlock_page(page); + if (prev_cur_page != xpage) + page_cache_release(page); + pages[prev_cur_page] = NULL; + } + } + } + } + + /* Release the buffer heads. */ + for (i = 0; i < nr_bhs; i++) + brelse(bhs[i]); + + /* Do we have more work to do? */ + if (nr_cbs) + goto do_next_cb; + + /* We no longer need the list of buffer heads. */ + kfree(bhs); + + /* Clean up if we have any pages left. Should never happen. */ + for (cur_page = 0; cur_page < max_page; cur_page++) { + page = pages[cur_page]; + if (page) { + ntfs_error(vol->sb, "Still have pages left! " + "Terminating them with extreme " + "prejudice."); + if (cur_page == xpage && !xpage_done) + SetPageError(page); + flush_dcache_page(page); + kunmap(page); + unlock_page(page); + if (cur_page != xpage) + page_cache_release(page); + pages[cur_page] = NULL; + } + } + + /* We no longer need the list of pages. */ + kfree(pages); + + /* If we have completed the requested page, we return success. */ + if (likely(xpage_done)) + return 0; + + ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ? + "EOVERFLOW" : (!err ? "EIO" : "unkown error")); + return err < 0 ? err : -EIO; + +read_err: + ntfs_error(vol->sb, "IO error while reading compressed data."); + /* Release the buffer heads. */ + for (i = 0; i < nr_bhs; i++) + brelse(bhs[i]); + goto err_out; + +map_rl_err: + ntfs_error(vol->sb, "map_run_list() failed. Cannot read compression " + "block."); + goto err_out; + +rl_err: + up_read(&ni->run_list.lock); + ntfs_error(vol->sb, "vcn_to_lcn() failed. Cannot read compression " + "block."); + goto err_out; + +getblk_err: + up_read(&ni->run_list.lock); + ntfs_error(vol->sb, "getblk() failed. Cannot read compression block."); + +err_out: + kfree(bhs); + for (i = cur_page; i < max_page; i++) { + page = pages[i]; + if (page) { + if (i == xpage && !xpage_done) + SetPageError(page); + flush_dcache_page(page); + kunmap(page); + unlock_page(page); + if (i != xpage) + page_cache_release(page); + } + } + kfree(pages); + return -EIO; +} + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/debug.c b/reactos/drivers/fs/ntfs/linux-ntfs/debug.c new file mode 100644 index 00000000000..b78932c15f0 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/debug.c @@ -0,0 +1,175 @@ +/* + * debug.c - NTFS kernel debug support. Part of the Linux-NTFS project. + * + * Copyright (c) 2001,2002 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "debug.h" + +/* + * A static buffer to hold the error string being displayed and a spinlock + * to protect concurrent accesses to it. + */ +static char err_buf[1024]; +static spinlock_t err_buf_lock = SPIN_LOCK_UNLOCKED; + +/** + * __ntfs_warning - output a warning to the syslog + * @function: name of function outputting the warning + * @sb: super block of mounted ntfs filesystem + * @fmt: warning string containing format specifications + * @...: a variable number of arguments specified in @fmt + * + * Outputs a warning to the syslog for the mounted ntfs filesystem described + * by @sb. + * + * @fmt and the corresponding @... is printf style format string containing + * the warning string and the corresponding format arguments, respectively. + * + * @function is the name of the function from which __ntfs_warning is being + * called. + * + * Note, you should be using debug.h::ntfs_warning(@sb, @fmt, @...) instead + * as this provides the @function parameter automatically. + */ +void __ntfs_warning(const char *function, const struct super_block *sb, + const char *fmt, ...) +{ + va_list args; + int flen = 0; + + if (function) + flen = strlen(function); + spin_lock(&err_buf_lock); + va_start(args, fmt); + vsnprintf(err_buf, sizeof(err_buf), fmt, args); + va_end(args); + if (sb) + printk(KERN_ERR "NTFS-fs warning (device %s): %s(): %s\n", + sb->s_id, flen ? function : "", err_buf); + else + printk(KERN_ERR "NTFS-fs warning: %s(): %s\n", + flen ? function : "", err_buf); + spin_unlock(&err_buf_lock); +} + +/** + * __ntfs_error - output an error to the syslog + * @function: name of function outputting the error + * @sb: super block of mounted ntfs filesystem + * @fmt: error string containing format specifications + * @...: a variable number of arguments specified in @fmt + * + * Outputs an error to the syslog for the mounted ntfs filesystem described + * by @sb. + * + * @fmt and the corresponding @... is printf style format string containing + * the error string and the corresponding format arguments, respectively. + * + * @function is the name of the function from which __ntfs_error is being + * called. + * + * Note, you should be using debug.h::ntfs_error(@sb, @fmt, @...) instead + * as this provides the @function parameter automatically. + */ +void __ntfs_error(const char *function, const struct super_block *sb, + const char *fmt, ...) +{ + va_list args; + int flen = 0; + + if (function) + flen = strlen(function); + spin_lock(&err_buf_lock); + va_start(args, fmt); + vsnprintf(err_buf, sizeof(err_buf), fmt, args); + va_end(args); + if (sb) + printk(KERN_ERR "NTFS-fs error (device %s): %s(): %s\n", + sb->s_id, flen ? function : "", err_buf); + else + printk(KERN_ERR "NTFS-fs error: %s(): %s\n", + flen ? function : "", err_buf); + spin_unlock(&err_buf_lock); +} + +#ifdef DEBUG + +/* If 1, output debug messages, and if 0, don't. */ +int debug_msgs = 0; + +void __ntfs_debug (const char *file, int line, const char *function, + const char *fmt, ...) +{ + va_list args; + int flen = 0; + + if (!debug_msgs) + return; + if (function) + flen = strlen(function); + spin_lock(&err_buf_lock); + va_start(args, fmt); + vsnprintf(err_buf, sizeof(err_buf), fmt, args); + va_end(args); + printk(KERN_DEBUG "NTFS-fs DEBUG (%s, %d): %s: %s\n", + file, line, flen ? function : "", err_buf); + spin_unlock(&err_buf_lock); +} + +/* Dump a run list. Caller has to provide synchronisation for @rl. */ +void ntfs_debug_dump_runlist(const run_list_element *rl) +{ + int i; + const char *lcn_str[5] = { "LCN_HOLE ", "LCN_RL_NOT_MAPPED", + "LCN_ENOENT ", "LCN_EINVAL ", + "LCN_unknown " }; + + if (!debug_msgs) + return; + printk(KERN_DEBUG "NTFS-fs DEBUG: Dumping run list (values " + "in hex):\n"); + if (!rl) { + printk(KERN_DEBUG "Run list not present.\n"); + return; + } + printk(KERN_DEBUG "VCN LCN Run length\n"); + for (i = 0; ; i++) { + LCN lcn = (rl + i)->lcn; + + if (lcn < (LCN)0) { + int index = -lcn - 1; + + if (index > -LCN_EINVAL - 1) + index = 4; + printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n", + (rl + i)->vcn, lcn_str[index], + (rl + i)->length, (rl + i)->length ? + "" : " (run list end)"); + } else + printk(KERN_DEBUG "%-16Lx %-16Lx %-16Lx%s\n", + (rl + i)->vcn, (rl + i)->lcn, + (rl + i)->length, (rl + i)->length ? + "" : " (run list end)"); + if (!(rl + i)->length) + break; + } +} + +#endif + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/debug.h b/reactos/drivers/fs/ntfs/linux-ntfs/debug.h new file mode 100644 index 00000000000..633ecc7ca79 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/debug.h @@ -0,0 +1,72 @@ +/* + * debug.h - NTFS kernel debug support. Part of the Linux-NTFS project. + * + * Copyright (c) 2001,2002 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_DEBUG_H +#define _LINUX_NTFS_DEBUG_H + +#include +#include +#include +#include + +#include "inode.h" +#include "attrib.h" + +#ifdef DEBUG + +extern int debug_msgs; + +#if 0 /* Fool kernel-doc since it doesn't do macros yet */ +/** + * ntfs_debug - write a debug level message to syslog + * @f: a printf format string containing the message + * @...: the variables to substitute into @f + * + * ntfs_debug() writes a DEBUG level message to the syslog but only if the + * driver was compiled with -DDEBUG. Otherwise, the call turns into a NOP. + */ +static void ntfs_debug(const char *f, ...); +#endif + +extern void __ntfs_debug (const char *file, int line, const char *function, + const char *format, ...) __attribute__ ((format (printf, 4, 5))); +#define ntfs_debug(f, a...) \ + __ntfs_debug(__FILE__, __LINE__, __FUNCTION__, f, ##a) + +extern void ntfs_debug_dump_runlist(const run_list_element *rl); + +#else /* !DEBUG */ + +#define ntfs_debug(f, a...) do {} while (0) +#define ntfs_debug_dump_runlist(rl) do {} while (0) + +#endif /* !DEBUG */ + +extern void __ntfs_warning(const char *function, const struct super_block *sb, + const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); +#define ntfs_warning(sb, f, a...) __ntfs_warning(__FUNCTION__, sb, f, ##a) + +extern void __ntfs_error(const char *function, const struct super_block *sb, + const char *fmt, ...) __attribute__ ((format (printf, 3, 4))); +#define ntfs_error(sb, f, a...) __ntfs_error(__FUNCTION__, sb, f, ##a) + +#endif /* _LINUX_NTFS_DEBUG_H */ + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/dir.c b/reactos/drivers/fs/ntfs/linux-ntfs/dir.c new file mode 100644 index 00000000000..30dee6372e4 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/dir.c @@ -0,0 +1,1420 @@ +/** + * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include "ntfs.h" +#include "dir.h" + +/** + * The little endian Unicode string $I30 as a global constant. + */ +uchar_t I30[5] = { const_cpu_to_le16('$'), const_cpu_to_le16('I'), + const_cpu_to_le16('3'), const_cpu_to_le16('0'), + const_cpu_to_le16(0) }; + +/** + * ntfs_lookup_inode_by_name - find an inode in a directory given its name + * @dir_ni: ntfs inode of the directory in which to search for the name + * @uname: Unicode name for which to search in the directory + * @uname_len: length of the name @uname in Unicode characters + * @res: return the found file name if necessary (see below) + * + * Look for an inode with name @uname in the directory with inode @dir_ni. + * ntfs_lookup_inode_by_name() walks the contents of the directory looking for + * the Unicode name. If the name is found in the directory, the corresponding + * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it + * is a 64-bit number containing the sequence number. + * + * On error, a negative value is returned corresponding to the error code. In + * particular if the inode is not found -ENOENT is returned. Note that you + * can't just check the return value for being negative, you have to check the + * inode number for being negative which you can extract using MREC(return + * value). + * + * Note, @uname_len does not include the (optional) terminating NULL character. + * + * Note, we look for a case sensitive match first but we also look for a case + * insensitive match at the same time. If we find a case insensitive match, we + * save that for the case that we don't find an exact match, where we return + * the case insensitive match and setup @res (which we allocate!) with the mft + * reference, the file name type, length and with a copy of the little endian + * Unicode file name itself. If we match a file name which is in the DOS name + * space, we only return the mft reference and file name type in @res. + * ntfs_lookup() then uses this to find the long file name in the inode itself. + * This is to avoid polluting the dcache with short file names. We want them to + * work but we don't care for how quickly one can access them. This also fixes + * the dcache aliasing issues. + */ +MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const uchar_t *uname, + const int uname_len, ntfs_name **res) +{ + ntfs_volume *vol = dir_ni->vol; + struct super_block *sb = vol->sb; + MFT_RECORD *m; + INDEX_ROOT *ir; + INDEX_ENTRY *ie; + INDEX_ALLOCATION *ia; + u8 *index_end; + u64 mref; + attr_search_context *ctx; + int err, rc; + VCN vcn, old_vcn; + struct address_space *ia_mapping; + struct page *page; + u8 *kaddr; + ntfs_name *name = NULL; + + /* Get hold of the mft record for the directory. */ + m = map_mft_record(dir_ni); + if (unlikely(IS_ERR(m))) { + ntfs_error(sb, "map_mft_record() failed with error code %ld.", + -PTR_ERR(m)); + return ERR_MREF(PTR_ERR(m)); + } + ctx = get_attr_search_ctx(dir_ni, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto err_out; + } + /* Find the index root attribute in the mft record. */ + if (!lookup_attr(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 0, + ctx)) { + ntfs_error(sb, "Index root attribute missing in directory " + "inode 0x%lx.", dir_ni->mft_no); + err = -EIO; + goto err_out; + } + /* Get to the index root value (it's been verified in read_inode). */ + ir = (INDEX_ROOT*)((u8*)ctx->attr + + le16_to_cpu(ctx->attr->data.resident.value_offset)); + index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length); + /* The first index entry. */ + ie = (INDEX_ENTRY*)((u8*)&ir->index + + le32_to_cpu(ir->index.entries_offset)); + /* + * Loop until we exceed valid memory (corruption case) or until we + * reach the last entry. + */ + for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { + /* Bounds checks. */ + if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie + + sizeof(INDEX_ENTRY_HEADER) > index_end || + (u8*)ie + le16_to_cpu(ie->key_length) > + index_end) + goto dir_err_out; + /* + * The last entry cannot contain a name. It can however contain + * a pointer to a child node in the B+tree so we just break out. + */ + if (ie->flags & INDEX_ENTRY_END) + break; + /* + * We perform a case sensitive comparison and if that matches + * we are done and return the mft reference of the inode (i.e. + * the inode number together with the sequence number for + * consistency checking). We convert it to cpu format before + * returning. + */ + if (ntfs_are_names_equal(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, + CASE_SENSITIVE, vol->upcase, vol->upcase_len)) { +found_it: + /* + * We have a perfect match, so we don't need to care + * about having matched imperfectly before, so we can + * free name and set *res to NULL. + * However, if the perfect match is a short file name, + * we need to signal this through *res, so that + * ntfs_lookup() can fix dcache aliasing issues. + * As an optimization we just reuse an existing + * allocation of *res. + */ + if (ie->key.file_name.file_name_type == FILE_NAME_DOS) { + if (!name) { + name = kmalloc(sizeof(ntfs_name), + GFP_NOFS); + if (!name) { + err = -ENOMEM; + goto err_out; + } + } + name->mref = le64_to_cpu( + ie->data.dir.indexed_file); + name->type = FILE_NAME_DOS; + name->len = 0; + *res = name; + } else { + if (name) + kfree(name); + *res = NULL; + } + mref = le64_to_cpu(ie->data.dir.indexed_file); + put_attr_search_ctx(ctx); + unmap_mft_record(dir_ni); + return mref; + } + /* + * For a case insensitive mount, we also perform a case + * insensitive comparison (provided the file name is not in the + * POSIX namespace). If the comparison matches, and the name is + * in the WIN32 namespace, we cache the filename in *res so + * that the caller, ntfs_lookup(), can work on it. If the + * comparison matches, and the name is in the DOS namespace, we + * only cache the mft reference and the file name type (we set + * the name length to zero for simplicity). + */ + if (!NVolCaseSensitive(vol) && + ie->key.file_name.file_name_type && + ntfs_are_names_equal(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, + IGNORE_CASE, vol->upcase, vol->upcase_len)) { + int name_size = sizeof(ntfs_name); + u8 type = ie->key.file_name.file_name_type; + u8 len = ie->key.file_name.file_name_length; + + /* Only one case insensitive matching name allowed. */ + if (name) { + ntfs_error(sb, "Found already allocated name " + "in phase 1. Please run chkdsk " + "and if that doesn't find any " + "errors please report you saw " + "this message to " + "linux-ntfs-dev@lists.sf.net."); + goto dir_err_out; + } + + if (type != FILE_NAME_DOS) + name_size += len * sizeof(uchar_t); + name = kmalloc(name_size, GFP_NOFS); + if (!name) { + err = -ENOMEM; + goto err_out; + } + name->mref = le64_to_cpu(ie->data.dir.indexed_file); + name->type = type; + if (type != FILE_NAME_DOS) { + name->len = len; + memcpy(name->name, ie->key.file_name.file_name, + len * sizeof(uchar_t)); + } else + name->len = 0; + *res = name; + } + /* + * Not a perfect match, need to do full blown collation so we + * know which way in the B+tree we have to go. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + IGNORE_CASE, vol->upcase, vol->upcase_len); + /* + * If uname collates before the name of the current entry, there + * is definitely no such name in this index but we might need to + * descend into the B+tree so we just break out of the loop. + */ + if (rc == -1) + break; + /* The names are not equal, continue the search. */ + if (rc) + continue; + /* + * Names match with case insensitive comparison, now try the + * case sensitive comparison, which is required for proper + * collation. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + CASE_SENSITIVE, vol->upcase, vol->upcase_len); + if (rc == -1) + break; + if (rc) + continue; + /* + * Perfect match, this will never happen as the + * ntfs_are_names_equal() call will have gotten a match but we + * still treat it correctly. + */ + goto found_it; + } + /* + * We have finished with this index without success. Check for the + * presence of a child node and if not present return -ENOENT, unless + * we have got a matching name cached in name in which case return the + * mft reference associated with it. + */ + if (!(ie->flags & INDEX_ENTRY_NODE)) { + if (name) { + put_attr_search_ctx(ctx); + unmap_mft_record(dir_ni); + return name->mref; + } + ntfs_debug("Entry not found."); + err = -ENOENT; + goto err_out; + } /* Child node present, descend into it. */ + /* Consistency check: Verify that an index allocation exists. */ + if (!NInoIndexAllocPresent(dir_ni)) { + ntfs_error(sb, "No index allocation attribute but index entry " + "requires one. Directory inode 0x%lx is " + "corrupt or driver bug.", dir_ni->mft_no); + err = -EIO; + goto err_out; + } + /* Get the starting vcn of the index_block holding the child node. */ + vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8); + ia_mapping = VFS_I(dir_ni)->i_mapping; + /* + * We are done with the index root and the mft record. Release them, + * otherwise we deadlock with ntfs_map_page(). + */ + put_attr_search_ctx(ctx); + unmap_mft_record(dir_ni); + m = NULL; + ctx = NULL; +descend_into_child_node: + /* + * Convert vcn to index into the index allocation attribute in units + * of PAGE_CACHE_SIZE and map the page cache page, reading it from + * disk if necessary. + */ + page = ntfs_map_page(ia_mapping, vcn << + dir_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT); + if (IS_ERR(page)) { + ntfs_error(sb, "Failed to map directory index page, error %ld.", + -PTR_ERR(page)); + err = PTR_ERR(page); + goto err_out; + } + kaddr = (u8*)page_address(page); +fast_descend_into_child_node: + /* Get to the index allocation block. */ + ia = (INDEX_ALLOCATION*)(kaddr + ((vcn << + dir_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK)); + /* Bounds checks. */ + if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) { + ntfs_error(sb, "Out of bounds check failed. Corrupt directory " + "inode 0x%lx or driver bug.", dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + if (sle64_to_cpu(ia->index_block_vcn) != vcn) { + ntfs_error(sb, "Actual VCN (0x%Lx) of index buffer is " + "different from expected VCN (0x%Lx). " + "Directory inode 0x%lx is corrupt or driver " + "bug.", + (long long)sle64_to_cpu(ia->index_block_vcn), + (long long)vcn, dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + if (le32_to_cpu(ia->index.allocated_size) + 0x18 != + dir_ni->itype.index.block_size) { + ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode " + "0x%lx has a size (%u) differing from the " + "directory specified size (%u). Directory " + "inode is corrupt or driver bug.", + (long long)vcn, dir_ni->mft_no, + le32_to_cpu(ia->index.allocated_size) + 0x18, + dir_ni->itype.index.block_size); + err = -EIO; + goto unm_err_out; + } + index_end = (u8*)ia + dir_ni->itype.index.block_size; + if (index_end > kaddr + PAGE_CACHE_SIZE) { + ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode " + "0x%lx crosses page boundary. Impossible! " + "Cannot access! This is probably a bug in the " + "driver.", (long long)vcn, dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length); + if (index_end > (u8*)ia + dir_ni->itype.index.block_size) { + ntfs_error(sb, "Size of index buffer (VCN 0x%Lx) of directory " + "inode 0x%lx exceeds maximum size.", + (long long)vcn, dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + /* The first index entry. */ + ie = (INDEX_ENTRY*)((u8*)&ia->index + + le32_to_cpu(ia->index.entries_offset)); + /* + * Iterate similar to above big loop but applied to index buffer, thus + * loop until we exceed valid memory (corruption case) or until we + * reach the last entry. + */ + for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { + /* Bounds check. */ + if ((u8*)ie < (u8*)ia || (u8*)ie + + sizeof(INDEX_ENTRY_HEADER) > index_end || + (u8*)ie + le16_to_cpu(ie->key_length) > + index_end) { + ntfs_error(sb, "Index entry out of bounds in " + "directory inode 0x%lx.", + dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + /* + * The last entry cannot contain a name. It can however contain + * a pointer to a child node in the B+tree so we just break out. + */ + if (ie->flags & INDEX_ENTRY_END) + break; + /* + * We perform a case sensitive comparison and if that matches + * we are done and return the mft reference of the inode (i.e. + * the inode number together with the sequence number for + * consistency checking). We convert it to cpu format before + * returning. + */ + if (ntfs_are_names_equal(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, + CASE_SENSITIVE, vol->upcase, vol->upcase_len)) { +found_it2: + /* + * We have a perfect match, so we don't need to care + * about having matched imperfectly before, so we can + * free name and set *res to NULL. + * However, if the perfect match is a short file name, + * we need to signal this through *res, so that + * ntfs_lookup() can fix dcache aliasing issues. + * As an optimization we just reuse an existing + * allocation of *res. + */ + if (ie->key.file_name.file_name_type == FILE_NAME_DOS) { + if (!name) { + name = kmalloc(sizeof(ntfs_name), + GFP_NOFS); + if (!name) { + err = -ENOMEM; + goto unm_err_out; + } + } + name->mref = le64_to_cpu( + ie->data.dir.indexed_file); + name->type = FILE_NAME_DOS; + name->len = 0; + *res = name; + } else { + if (name) + kfree(name); + *res = NULL; + } + mref = le64_to_cpu(ie->data.dir.indexed_file); + ntfs_unmap_page(page); + return mref; + } + /* + * For a case insensitive mount, we also perform a case + * insensitive comparison (provided the file name is not in the + * POSIX namespace). If the comparison matches, and the name is + * in the WIN32 namespace, we cache the filename in *res so + * that the caller, ntfs_lookup(), can work on it. If the + * comparison matches, and the name is in the DOS namespace, we + * only cache the mft reference and the file name type (we set + * the name length to zero for simplicity). + */ + if (!NVolCaseSensitive(vol) && + ie->key.file_name.file_name_type && + ntfs_are_names_equal(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, + IGNORE_CASE, vol->upcase, vol->upcase_len)) { + int name_size = sizeof(ntfs_name); + u8 type = ie->key.file_name.file_name_type; + u8 len = ie->key.file_name.file_name_length; + + /* Only one case insensitive matching name allowed. */ + if (name) { + ntfs_error(sb, "Found already allocated name " + "in phase 2. Please run chkdsk " + "and if that doesn't find any " + "errors please report you saw " + "this message to " + "linux-ntfs-dev@lists.sf.net."); + ntfs_unmap_page(page); + goto dir_err_out; + } + + if (type != FILE_NAME_DOS) + name_size += len * sizeof(uchar_t); + name = kmalloc(name_size, GFP_NOFS); + if (!name) { + err = -ENOMEM; + goto unm_err_out; + } + name->mref = le64_to_cpu(ie->data.dir.indexed_file); + name->type = type; + if (type != FILE_NAME_DOS) { + name->len = len; + memcpy(name->name, ie->key.file_name.file_name, + len * sizeof(uchar_t)); + } else + name->len = 0; + *res = name; + } + /* + * Not a perfect match, need to do full blown collation so we + * know which way in the B+tree we have to go. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + IGNORE_CASE, vol->upcase, vol->upcase_len); + /* + * If uname collates before the name of the current entry, there + * is definitely no such name in this index but we might need to + * descend into the B+tree so we just break out of the loop. + */ + if (rc == -1) + break; + /* The names are not equal, continue the search. */ + if (rc) + continue; + /* + * Names match with case insensitive comparison, now try the + * case sensitive comparison, which is required for proper + * collation. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + CASE_SENSITIVE, vol->upcase, vol->upcase_len); + if (rc == -1) + break; + if (rc) + continue; + /* + * Perfect match, this will never happen as the + * ntfs_are_names_equal() call will have gotten a match but we + * still treat it correctly. + */ + goto found_it2; + } + /* + * We have finished with this index buffer without success. Check for + * the presence of a child node. + */ + if (ie->flags & INDEX_ENTRY_NODE) { + if ((ia->index.flags & NODE_MASK) == LEAF_NODE) { + ntfs_error(sb, "Index entry with child node found in " + "a leaf node in directory inode 0x%lx.", + dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + /* Child node present, descend into it. */ + old_vcn = vcn; + vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8); + if (vcn >= 0) { + /* If vcn is in the same page cache page as old_vcn we + * recycle the mapped page. */ + if (old_vcn << vol->cluster_size_bits >> + PAGE_CACHE_SHIFT == vcn << + vol->cluster_size_bits >> + PAGE_CACHE_SHIFT) + goto fast_descend_into_child_node; + ntfs_unmap_page(page); + goto descend_into_child_node; + } + ntfs_error(sb, "Negative child node vcn in directory inode " + "0x%lx.", dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + /* + * No child node present, return -ENOENT, unless we have got a matching + * name cached in name in which case return the mft reference + * associated with it. + */ + if (name) { + ntfs_unmap_page(page); + return name->mref; + } + ntfs_debug("Entry not found."); + err = -ENOENT; +unm_err_out: + ntfs_unmap_page(page); +err_out: + if (ctx) + put_attr_search_ctx(ctx); + if (m) + unmap_mft_record(dir_ni); + if (name) { + kfree(name); + *res = NULL; + } + return ERR_MREF(err); +dir_err_out: + ntfs_error(sb, "Corrupt directory. Aborting lookup."); + err = -EIO; + goto err_out; +} + +#if 0 + +// TODO: (AIA) +// The algorithm embedded in this code will be required for the time when we +// want to support adding of entries to directories, where we require correct +// collation of file names in order not to cause corruption of the file system. + +/** + * ntfs_lookup_inode_by_name - find an inode in a directory given its name + * @dir_ni: ntfs inode of the directory in which to search for the name + * @uname: Unicode name for which to search in the directory + * @uname_len: length of the name @uname in Unicode characters + * + * Look for an inode with name @uname in the directory with inode @dir_ni. + * ntfs_lookup_inode_by_name() walks the contents of the directory looking for + * the Unicode name. If the name is found in the directory, the corresponding + * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it + * is a 64-bit number containing the sequence number. + * + * On error, a negative value is returned corresponding to the error code. In + * particular if the inode is not found -ENOENT is returned. Note that you + * can't just check the return value for being negative, you have to check the + * inode number for being negative which you can extract using MREC(return + * value). + * + * Note, @uname_len does not include the (optional) terminating NULL character. + */ +u64 ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const uchar_t *uname, + const int uname_len) +{ + ntfs_volume *vol = dir_ni->vol; + struct super_block *sb = vol->sb; + MFT_RECORD *m; + INDEX_ROOT *ir; + INDEX_ENTRY *ie; + INDEX_ALLOCATION *ia; + u8 *index_end; + u64 mref; + attr_search_context *ctx; + int err, rc; + IGNORE_CASE_BOOL ic; + VCN vcn, old_vcn; + struct address_space *ia_mapping; + struct page *page; + u8 *kaddr; + + /* Get hold of the mft record for the directory. */ + m = map_mft_record(dir_ni); + if (IS_ERR(m)) { + ntfs_error(sb, "map_mft_record() failed with error code %ld.", + -PTR_ERR(m)); + return ERR_MREF(PTR_ERR(m)); + } + ctx = get_attr_search_ctx(dir_ni, m); + if (!ctx) { + err = -ENOMEM; + goto err_out; + } + /* Find the index root attribute in the mft record. */ + if (!lookup_attr(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 0, + ctx)) { + ntfs_error(sb, "Index root attribute missing in directory " + "inode 0x%lx.", dir_ni->mft_no); + err = -EIO; + goto err_out; + } + /* Get to the index root value (it's been verified in read_inode). */ + ir = (INDEX_ROOT*)((u8*)ctx->attr + + le16_to_cpu(ctx->attr->data.resident.value_offset)); + index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length); + /* The first index entry. */ + ie = (INDEX_ENTRY*)((u8*)&ir->index + + le32_to_cpu(ir->index.entries_offset)); + /* + * Loop until we exceed valid memory (corruption case) or until we + * reach the last entry. + */ + for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { + /* Bounds checks. */ + if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie + + sizeof(INDEX_ENTRY_HEADER) > index_end || + (u8*)ie + le16_to_cpu(ie->key_length) > + index_end) + goto dir_err_out; + /* + * The last entry cannot contain a name. It can however contain + * a pointer to a child node in the B+tree so we just break out. + */ + if (ie->flags & INDEX_ENTRY_END) + break; + /* + * If the current entry has a name type of POSIX, the name is + * case sensitive and not otherwise. This has the effect of us + * not being able to access any POSIX file names which collate + * after the non-POSIX one when they only differ in case, but + * anyone doing screwy stuff like that deserves to burn in + * hell... Doing that kind of stuff on NT4 actually causes + * corruption on the partition even when using SP6a and Linux + * is not involved at all. + */ + ic = ie->key.file_name.file_name_type ? IGNORE_CASE : + CASE_SENSITIVE; + /* + * If the names match perfectly, we are done and return the + * mft reference of the inode (i.e. the inode number together + * with the sequence number for consistency checking. We + * convert it to cpu format before returning. + */ + if (ntfs_are_names_equal(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, ic, + vol->upcase, vol->upcase_len)) { +found_it: + mref = le64_to_cpu(ie->data.dir.indexed_file); + put_attr_search_ctx(ctx); + unmap_mft_record(dir_ni); + return mref; + } + /* + * Not a perfect match, need to do full blown collation so we + * know which way in the B+tree we have to go. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + IGNORE_CASE, vol->upcase, vol->upcase_len); + /* + * If uname collates before the name of the current entry, there + * is definitely no such name in this index but we might need to + * descend into the B+tree so we just break out of the loop. + */ + if (rc == -1) + break; + /* The names are not equal, continue the search. */ + if (rc) + continue; + /* + * Names match with case insensitive comparison, now try the + * case sensitive comparison, which is required for proper + * collation. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + CASE_SENSITIVE, vol->upcase, vol->upcase_len); + if (rc == -1) + break; + if (rc) + continue; + /* + * Perfect match, this will never happen as the + * ntfs_are_names_equal() call will have gotten a match but we + * still treat it correctly. + */ + goto found_it; + } + /* + * We have finished with this index without success. Check for the + * presence of a child node. + */ + if (!(ie->flags & INDEX_ENTRY_NODE)) { + /* No child node, return -ENOENT. */ + err = -ENOENT; + goto err_out; + } /* Child node present, descend into it. */ + /* Consistency check: Verify that an index allocation exists. */ + if (!NInoIndexAllocPresent(dir_ni)) { + ntfs_error(sb, "No index allocation attribute but index entry " + "requires one. Directory inode 0x%lx is " + "corrupt or driver bug.", dir_ni->mft_no); + err = -EIO; + goto err_out; + } + /* Get the starting vcn of the index_block holding the child node. */ + vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8); + ia_mapping = VFS_I(dir_ni)->i_mapping; + /* + * We are done with the index root and the mft record. Release them, + * otherwise we deadlock with ntfs_map_page(). + */ + put_attr_search_ctx(ctx); + unmap_mft_record(dir_ni); + m = NULL; + ctx = NULL; +descend_into_child_node: + /* + * Convert vcn to index into the index allocation attribute in units + * of PAGE_CACHE_SIZE and map the page cache page, reading it from + * disk if necessary. + */ + page = ntfs_map_page(ia_mapping, vcn << + dir_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT); + if (IS_ERR(page)) { + ntfs_error(sb, "Failed to map directory index page, error %ld.", + -PTR_ERR(page)); + err = PTR_ERR(page); + goto err_out; + } + kaddr = (u8*)page_address(page); +fast_descend_into_child_node: + /* Get to the index allocation block. */ + ia = (INDEX_ALLOCATION*)(kaddr + ((vcn << + dir_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK)); + /* Bounds checks. */ + if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) { + ntfs_error(sb, "Out of bounds check failed. Corrupt directory " + "inode 0x%lx or driver bug.", dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + if (sle64_to_cpu(ia->index_block_vcn) != vcn) { + ntfs_error(sb, "Actual VCN (0x%Lx) of index buffer is " + "different from expected VCN (0x%Lx). " + "Directory inode 0x%lx is corrupt or driver " + "bug.", + (long long)sle64_to_cpu(ia->index_block_vcn), + (long long)vcn, dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + if (le32_to_cpu(ia->index.allocated_size) + 0x18 != + dir_ni->itype.index.block_size) { + ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode " + "0x%lx has a size (%u) differing from the " + "directory specified size (%u). Directory " + "inode is corrupt or driver bug.", + (long long)vcn, dir_ni->mft_no, + le32_to_cpu(ia->index.allocated_size) + 0x18, + dir_ni->itype.index.block_size); + err = -EIO; + goto unm_err_out; + } + index_end = (u8*)ia + dir_ni->itype.index.block_size; + if (index_end > kaddr + PAGE_CACHE_SIZE) { + ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode " + "0x%lx crosses page boundary. Impossible! " + "Cannot access! This is probably a bug in the " + "driver.", (long long)vcn, dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length); + if (index_end > (u8*)ia + dir_ni->itype.index.block_size) { + ntfs_error(sb, "Size of index buffer (VCN 0x%Lx) of directory " + "inode 0x%lx exceeds maximum size.", + (long long)vcn, dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + /* The first index entry. */ + ie = (INDEX_ENTRY*)((u8*)&ia->index + + le32_to_cpu(ia->index.entries_offset)); + /* + * Iterate similar to above big loop but applied to index buffer, thus + * loop until we exceed valid memory (corruption case) or until we + * reach the last entry. + */ + for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { + /* Bounds check. */ + if ((u8*)ie < (u8*)ia || (u8*)ie + + sizeof(INDEX_ENTRY_HEADER) > index_end || + (u8*)ie + le16_to_cpu(ie->key_length) > + index_end) { + ntfs_error(sb, "Index entry out of bounds in " + "directory inode 0x%lx.", + dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + /* + * The last entry cannot contain a name. It can however contain + * a pointer to a child node in the B+tree so we just break out. + */ + if (ie->flags & INDEX_ENTRY_END) + break; + /* + * If the current entry has a name type of POSIX, the name is + * case sensitive and not otherwise. This has the effect of us + * not being able to access any POSIX file names which collate + * after the non-POSIX one when they only differ in case, but + * anyone doing screwy stuff like that deserves to burn in + * hell... Doing that kind of stuff on NT4 actually causes + * corruption on the partition even when using SP6a and Linux + * is not involved at all. + */ + ic = ie->key.file_name.file_name_type ? IGNORE_CASE : + CASE_SENSITIVE; + /* + * If the names match perfectly, we are done and return the + * mft reference of the inode (i.e. the inode number together + * with the sequence number for consistency checking. We + * convert it to cpu format before returning. + */ + if (ntfs_are_names_equal(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, ic, + vol->upcase, vol->upcase_len)) { +found_it2: + mref = le64_to_cpu(ie->data.dir.indexed_file); + ntfs_unmap_page(page); + return mref; + } + /* + * Not a perfect match, need to do full blown collation so we + * know which way in the B+tree we have to go. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + IGNORE_CASE, vol->upcase, vol->upcase_len); + /* + * If uname collates before the name of the current entry, there + * is definitely no such name in this index but we might need to + * descend into the B+tree so we just break out of the loop. + */ + if (rc == -1) + break; + /* The names are not equal, continue the search. */ + if (rc) + continue; + /* + * Names match with case insensitive comparison, now try the + * case sensitive comparison, which is required for proper + * collation. + */ + rc = ntfs_collate_names(uname, uname_len, + (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, 1, + CASE_SENSITIVE, vol->upcase, vol->upcase_len); + if (rc == -1) + break; + if (rc) + continue; + /* + * Perfect match, this will never happen as the + * ntfs_are_names_equal() call will have gotten a match but we + * still treat it correctly. + */ + goto found_it2; + } + /* + * We have finished with this index buffer without success. Check for + * the presence of a child node. + */ + if (ie->flags & INDEX_ENTRY_NODE) { + if ((ia->index.flags & NODE_MASK) == LEAF_NODE) { + ntfs_error(sb, "Index entry with child node found in " + "a leaf node in directory inode 0x%lx.", + dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + /* Child node present, descend into it. */ + old_vcn = vcn; + vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8); + if (vcn >= 0) { + /* If vcn is in the same page cache page as old_vcn we + * recycle the mapped page. */ + if (old_vcn << vol->cluster_size_bits >> + PAGE_CACHE_SHIFT == vcn << + vol->cluster_size_bits >> + PAGE_CACHE_SHIFT) + goto fast_descend_into_child_node; + ntfs_unmap_page(page); + goto descend_into_child_node; + } + ntfs_error(sb, "Negative child node vcn in directory inode " + "0x%lx.", dir_ni->mft_no); + err = -EIO; + goto unm_err_out; + } + /* No child node, return -ENOENT. */ + ntfs_debug("Entry not found."); + err = -ENOENT; +unm_err_out: + ntfs_unmap_page(page); +err_out: + if (ctx) + put_attr_search_ctx(ctx); + if (m) + unmap_mft_record(dir_ni); + return ERR_MREF(err); +dir_err_out: + ntfs_error(sb, "Corrupt directory. Aborting lookup."); + err = -EIO; + goto err_out; +} + +#endif + +typedef union { + INDEX_ROOT *ir; + INDEX_ALLOCATION *ia; +} index_union __attribute__ ((__transparent_union__)); + +typedef enum { + INDEX_TYPE_ROOT, /* index root */ + INDEX_TYPE_ALLOCATION, /* index allocation */ +} INDEX_TYPE; + +/** + * ntfs_filldir - ntfs specific filldir method + * @vol: current ntfs volume + * @fpos: position in the directory + * @ndir: ntfs inode of current directory + * @index_type: specifies whether @iu is an index root or an index allocation + * @iu: index root or index allocation attribute to which @ie belongs + * @ie: current index entry + * @name: buffer to use for the converted name + * @dirent: vfs filldir callback context + * @filldir: vfs filldir callback + * + * Convert the Unicode @name to the loaded NLS and pass it to the @filldir + * callback. + */ +static inline int ntfs_filldir(ntfs_volume *vol, loff_t *fpos, + ntfs_inode *ndir, const INDEX_TYPE index_type, + index_union iu, INDEX_ENTRY *ie, u8 *name, + void *dirent, filldir_t filldir) +{ + int name_len; + unsigned dt_type; + FILE_NAME_TYPE_FLAGS name_type; + + /* Advance the position even if going to skip the entry. */ + if (index_type == INDEX_TYPE_ALLOCATION) + *fpos = (u8*)ie - (u8*)iu.ia + + (sle64_to_cpu(iu.ia->index_block_vcn) << + ndir->itype.index.vcn_size_bits) + + vol->mft_record_size; + else /* if (index_type == INDEX_TYPE_ROOT) */ + *fpos = (u8*)ie - (u8*)iu.ir; + name_type = ie->key.file_name.file_name_type; + if (name_type == FILE_NAME_DOS) { + ntfs_debug("Skipping DOS name space entry."); + return 0; + } + if (MREF_LE(ie->data.dir.indexed_file) == FILE_root) { + ntfs_debug("Skipping root directory self reference entry."); + return 0; + } + if (MREF_LE(ie->data.dir.indexed_file) < FILE_first_user && + !NVolShowSystemFiles(vol)) { + ntfs_debug("Skipping system file."); + return 0; + } + name_len = ntfs_ucstonls(vol, (uchar_t*)&ie->key.file_name.file_name, + ie->key.file_name.file_name_length, &name, + NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1); + if (name_len <= 0) { + ntfs_debug("Skipping unrepresentable file."); + return 0; + } + if (ie->key.file_name.file_attributes & + FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT) + dt_type = DT_DIR; + else + dt_type = DT_REG; + ntfs_debug("Calling filldir for %s with len %i, fpos 0x%Lx, inode " + "0x%lx, DT_%s.", name, name_len, *fpos, + MREF_LE(ie->data.dir.indexed_file), + dt_type == DT_DIR ? "DIR" : "REG"); + return filldir(dirent, name, name_len, *fpos, + MREF_LE(ie->data.dir.indexed_file), dt_type); +} + +/* + * VFS calls readdir without BKL but with i_sem held. This protects the VFS + * parts (e.g. ->f_pos and ->i_size, and it also protects against directory + * modifications). + * + * We use the same basic approach as the old NTFS driver, i.e. we parse the + * index root entries and then the index allocation entries that are marked + * as in use in the index bitmap. + * While this will return the names in random order this doesn't matter for + * readdir but OTOH results in a faster readdir. + */ +static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + s64 ia_pos, ia_start, prev_ia_pos, bmp_pos; + loff_t fpos; + struct inode *bmp_vi, *vdir = filp->f_dentry->d_inode; + struct super_block *sb = vdir->i_sb; + ntfs_inode *ndir = NTFS_I(vdir); + ntfs_volume *vol = NTFS_SB(sb); + MFT_RECORD *m; + INDEX_ROOT *ir; + INDEX_ENTRY *ie; + INDEX_ALLOCATION *ia; + u8 *name = NULL; + int rc, err, ir_pos, cur_bmp_pos; + struct address_space *ia_mapping, *bmp_mapping; + struct page *bmp_page = NULL, *ia_page = NULL; + u8 *kaddr, *bmp, *index_end; + attr_search_context *ctx; + + fpos = filp->f_pos; + ntfs_debug("Entering for inode 0x%lx, fpos 0x%Lx.", + vdir->i_ino, fpos); + rc = err = 0; + /* Are we at end of dir yet? */ + if (fpos >= vdir->i_size + vol->mft_record_size) + goto done; + /* Emulate . and .. for all directories. */ + if (!fpos) { + ntfs_debug("Calling filldir for . with len 1, fpos 0x0, " + "inode 0x%lx, DT_DIR.", vdir->i_ino); + rc = filldir(dirent, ".", 1, fpos, vdir->i_ino, DT_DIR); + if (rc) + goto done; + fpos++; + } + if (fpos == 1) { + ntfs_debug("Calling filldir for .. with len 2, fpos 0x1, " + "inode 0x%lx, DT_DIR.", + parent_ino(filp->f_dentry)); + rc = filldir(dirent, "..", 2, fpos, + parent_ino(filp->f_dentry), DT_DIR); + if (rc) + goto done; + fpos++; + } + m = NULL; + ctx = NULL; + /* + * Allocate a buffer to store the current name being processed + * converted to format determined by current NLS. + */ + name = (u8*)kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, + GFP_NOFS); + if (unlikely(!name)) { + err = -ENOMEM; + goto err_out; + } + /* Are we jumping straight into the index allocation attribute? */ + if (fpos >= vol->mft_record_size) + goto skip_index_root; + /* Get hold of the mft record for the directory. */ + m = map_mft_record(ndir); + if (unlikely(IS_ERR(m))) { + err = PTR_ERR(m); + m = NULL; + goto err_out; + } + ctx = get_attr_search_ctx(ndir, m); + if (unlikely(!ctx)) { + err = -ENOMEM; + goto err_out; + } + /* Get the offset into the index root attribute. */ + ir_pos = (s64)fpos; + /* Find the index root attribute in the mft record. */ + if (unlikely(!lookup_attr(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, + NULL, 0, ctx))) { + ntfs_error(sb, "Index root attribute missing in directory " + "inode 0x%lx.", vdir->i_ino); + goto err_out; + } + /* Get to the index root value (it's been verified in read_inode). */ + ir = (INDEX_ROOT*)((u8*)ctx->attr + + le16_to_cpu(ctx->attr->data.resident.value_offset)); + index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length); + /* The first index entry. */ + ie = (INDEX_ENTRY*)((u8*)&ir->index + + le32_to_cpu(ir->index.entries_offset)); + /* + * Loop until we exceed valid memory (corruption case) or until we + * reach the last entry or until filldir tells us it has had enough + * or signals an error (both covered by the rc test). + */ + for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { + ntfs_debug("In index root, offset 0x%x.", (u8*)ie - (u8*)ir); + /* Bounds checks. */ + if (unlikely((u8*)ie < (u8*)ctx->mrec || (u8*)ie + + sizeof(INDEX_ENTRY_HEADER) > index_end || + (u8*)ie + le16_to_cpu(ie->key_length) > + index_end)) + goto err_out; + /* The last entry cannot contain a name. */ + if (ie->flags & INDEX_ENTRY_END) + break; + /* Skip index root entry if continuing previous readdir. */ + if (ir_pos > (u8*)ie - (u8*)ir) + continue; + /* Submit the name to the filldir callback. */ + rc = ntfs_filldir(vol, &fpos, ndir, INDEX_TYPE_ROOT, ir, ie, + name, dirent, filldir); + if (rc) { + put_attr_search_ctx(ctx); + unmap_mft_record(ndir); + goto abort; + } + } + /* + * We are done with the index root and the mft record for that matter. + * We need to release it, otherwise we deadlock on ntfs_attr_iget() + * and/or ntfs_read_page(). + */ + put_attr_search_ctx(ctx); + unmap_mft_record(ndir); + m = NULL; + ctx = NULL; + /* If there is no index allocation attribute we are finished. */ + if (!NInoIndexAllocPresent(ndir)) + goto EOD; + /* Advance fpos to the beginning of the index allocation. */ + fpos = vol->mft_record_size; +skip_index_root: + kaddr = NULL; + prev_ia_pos = -1LL; + /* Get the offset into the index allocation attribute. */ + ia_pos = (s64)fpos - vol->mft_record_size; + ia_mapping = vdir->i_mapping; + bmp_vi = ndir->itype.index.bmp_ino; + if (unlikely(!bmp_vi)) { + ntfs_debug("Inode %lu, regetting index bitmap.", vdir->i_ino); + bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4); + if (unlikely(IS_ERR(bmp_vi))) { + ntfs_error(sb, "Failed to get bitmap attribute."); + err = PTR_ERR(bmp_vi); + goto err_out; + } + ndir->itype.index.bmp_ino = bmp_vi; + } + bmp_mapping = bmp_vi->i_mapping; + /* Get the starting bitmap bit position and sanity check it. */ + bmp_pos = ia_pos >> ndir->itype.index.block_size_bits; + if (unlikely(bmp_pos >> 3 >= bmp_vi->i_size)) { + ntfs_error(sb, "Current index allocation position exceeds " + "index bitmap size."); + goto err_out; + } + /* Get the starting bit position in the current bitmap page. */ + cur_bmp_pos = bmp_pos & ((PAGE_CACHE_SIZE * 8) - 1); + bmp_pos &= ~(u64)((PAGE_CACHE_SIZE * 8) - 1); +get_next_bmp_page: + ntfs_debug("Reading bitmap with page index 0x%Lx, bit ofs 0x%Lx", + (long long)bmp_pos >> (3 + PAGE_CACHE_SHIFT), + (long long)bmp_pos & ((PAGE_CACHE_SIZE * 8) - 1)); + bmp_page = ntfs_map_page(bmp_mapping, + bmp_pos >> (3 + PAGE_CACHE_SHIFT)); + if (unlikely(IS_ERR(bmp_page))) { + ntfs_error(sb, "Reading index bitmap failed."); + err = PTR_ERR(bmp_page); + bmp_page = NULL; + goto err_out; + } + bmp = (u8*)page_address(bmp_page); + /* Find next index block in use. */ + while (!(bmp[cur_bmp_pos >> 3] & (1 << (cur_bmp_pos & 7)))) { +find_next_index_buffer: + cur_bmp_pos++; + /* + * If we have reached the end of the bitmap page, get the next + * page, and put away the old one. + */ + if (unlikely((cur_bmp_pos >> 3) >= PAGE_CACHE_SIZE)) { + ntfs_unmap_page(bmp_page); + bmp_pos += PAGE_CACHE_SIZE * 8; + cur_bmp_pos = 0; + goto get_next_bmp_page; + } + /* If we have reached the end of the bitmap, we are done. */ + if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= vdir->i_size)) + goto unm_EOD; + ia_pos = (bmp_pos + cur_bmp_pos) << + ndir->itype.index.block_size_bits; + } + ntfs_debug("Handling index buffer 0x%Lx.", + (long long)bmp_pos + cur_bmp_pos); + /* If the current index buffer is in the same page we reuse the page. */ + if ((prev_ia_pos & PAGE_CACHE_MASK) != (ia_pos & PAGE_CACHE_MASK)) { + prev_ia_pos = ia_pos; + if (likely(ia_page != NULL)) + ntfs_unmap_page(ia_page); + /* + * Map the page cache page containing the current ia_pos, + * reading it from disk if necessary. + */ + ia_page = ntfs_map_page(ia_mapping, ia_pos >> PAGE_CACHE_SHIFT); + if (unlikely(IS_ERR(ia_page))) { + ntfs_error(sb, "Reading index allocation data failed."); + err = PTR_ERR(ia_page); + ia_page = NULL; + goto err_out; + } + kaddr = (u8*)page_address(ia_page); + } + /* Get the current index buffer. */ + ia = (INDEX_ALLOCATION*)(kaddr + (ia_pos & ~PAGE_CACHE_MASK & + ~(s64)(ndir->itype.index.block_size - 1))); + /* Bounds checks. */ + if (unlikely((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE)) { + ntfs_error(sb, "Out of bounds check failed. Corrupt directory " + "inode 0x%lx or driver bug.", vdir->i_ino); + goto err_out; + } + if (unlikely(sle64_to_cpu(ia->index_block_vcn) != (ia_pos & + ~(s64)(ndir->itype.index.block_size - 1)) >> + ndir->itype.index.vcn_size_bits)) { + ntfs_error(sb, "Actual VCN (0x%Lx) of index buffer is " + "different from expected VCN (0x%Lx). " + "Directory inode 0x%lx is corrupt or driver " + "bug. ", + (long long)sle64_to_cpu(ia->index_block_vcn), + (long long)ia_pos >> + ndir->itype.index.vcn_size_bits, vdir->i_ino); + goto err_out; + } + if (unlikely(le32_to_cpu(ia->index.allocated_size) + 0x18 != + ndir->itype.index.block_size)) { + ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode " + "0x%lx has a size (%u) differing from the " + "directory specified size (%u). Directory " + "inode is corrupt or driver bug.", + (long long)ia_pos >> + ndir->itype.index.vcn_size_bits, vdir->i_ino, + le32_to_cpu(ia->index.allocated_size) + 0x18, + ndir->itype.index.block_size); + goto err_out; + } + index_end = (u8*)ia + ndir->itype.index.block_size; + if (unlikely(index_end > kaddr + PAGE_CACHE_SIZE)) { + ntfs_error(sb, "Index buffer (VCN 0x%Lx) of directory inode " + "0x%lx crosses page boundary. Impossible! " + "Cannot access! This is probably a bug in the " + "driver.", (long long)ia_pos >> + ndir->itype.index.vcn_size_bits, vdir->i_ino); + goto err_out; + } + ia_start = ia_pos & ~(s64)(ndir->itype.index.block_size - 1); + index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length); + if (unlikely(index_end > (u8*)ia + ndir->itype.index.block_size)) { + ntfs_error(sb, "Size of index buffer (VCN 0x%Lx) of directory " + "inode 0x%lx exceeds maximum size.", + (long long)ia_pos >> + ndir->itype.index.vcn_size_bits, vdir->i_ino); + goto err_out; + } + /* The first index entry in this index buffer. */ + ie = (INDEX_ENTRY*)((u8*)&ia->index + + le32_to_cpu(ia->index.entries_offset)); + /* + * Loop until we exceed valid memory (corruption case) or until we + * reach the last entry or until filldir tells us it has had enough + * or signals an error (both covered by the rc test). + */ + for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) { + ntfs_debug("In index allocation, offset 0x%Lx.", + (long long)ia_start + ((u8*)ie - (u8*)ia)); + /* Bounds checks. */ + if (unlikely((u8*)ie < (u8*)ia || (u8*)ie + + sizeof(INDEX_ENTRY_HEADER) > index_end || + (u8*)ie + le16_to_cpu(ie->key_length) > + index_end)) + goto err_out; + /* The last entry cannot contain a name. */ + if (ie->flags & INDEX_ENTRY_END) + break; + /* Skip index block entry if continuing previous readdir. */ + if (ia_pos - ia_start > (u8*)ie - (u8*)ia) + continue; + /* Submit the name to the filldir callback. */ + rc = ntfs_filldir(vol, &fpos, ndir, INDEX_TYPE_ALLOCATION, ia, + ie, name, dirent, filldir); + if (rc) { + ntfs_unmap_page(ia_page); + ntfs_unmap_page(bmp_page); + goto abort; + } + } + goto find_next_index_buffer; +unm_EOD: + if (ia_page) + ntfs_unmap_page(ia_page); + ntfs_unmap_page(bmp_page); +EOD: + /* We are finished, set fpos to EOD. */ + fpos = vdir->i_size + vol->mft_record_size; +abort: + kfree(name); +done: +#ifdef DEBUG + if (!rc) + ntfs_debug("EOD, fpos 0x%Lx, returning 0.", fpos); + else + ntfs_debug("filldir returned %i, fpos 0x%Lx, returning 0.", + rc, fpos); +#endif + filp->f_pos = fpos; + return 0; +err_out: + if (bmp_page) + ntfs_unmap_page(bmp_page); + if (ia_page) + ntfs_unmap_page(ia_page); + if (name) + kfree(name); + if (ctx) + put_attr_search_ctx(ctx); + if (m) + unmap_mft_record(ndir); + if (!err) + err = -EIO; + ntfs_debug("Failed. Returning error code %i.", -err); + filp->f_pos = fpos; + return err; +} + +/** + * ntfs_dir_open - called when an inode is about to be opened + * @vi: inode to be opened + * @filp: file structure describing the inode + * + * Limit directory size to the page cache limit on architectures where unsigned + * long is 32-bits. This is the most we can do for now without overflowing the + * page cache page index. Doing it this way means we don't run into problems + * because of existing too large directories. It would be better to allow the + * user to read the accessible part of the directory but I doubt very much + * anyone is going to hit this check on a 32-bit architecture, so there is no + * point in adding the extra complexity required to support this. + * + * On 64-bit architectures, the check is hopefully optimized away by the + * compiler. + */ +static int ntfs_dir_open(struct inode *vi, struct file *filp) +{ + if (sizeof(unsigned long) < 8) { + if (vi->i_size > MAX_LFS_FILESIZE) + return -EFBIG; + } + return 0; +} + +struct file_operations ntfs_dir_ops = { + .llseek = generic_file_llseek, /* Seek inside directory. */ + .read = generic_read_dir, /* Return -EISDIR. */ + .readdir = ntfs_readdir, /* Read directory contents. */ + .open = ntfs_dir_open, /* Open directory. */ +}; + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/dir.h b/reactos/drivers/fs/ntfs/linux-ntfs/dir.h new file mode 100644 index 00000000000..7b8ebee5d8c --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/dir.h @@ -0,0 +1,47 @@ +/* + * dir.h - Defines for directory handling in NTFS Linux kernel driver. Part of + * the Linux-NTFS project. + * + * Copyright (c) 2002 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_DIR_H +#define _LINUX_NTFS_DIR_H + +#include "layout.h" + +/* + * ntfs_name is used to return the file name to the caller of + * ntfs_lookup_inode_by_name() in order for the caller (namei.c::ntfs_lookup()) + * to be able to deal with dcache aliasing issues. + */ +typedef struct { + MFT_REF mref; + FILE_NAME_TYPE_FLAGS type; + u8 len; + uchar_t name[0]; +} __attribute__ ((__packed__)) ntfs_name; + +/* The little endian Unicode string $I30 as a global constant. */ +extern uchar_t I30[5]; + +extern MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, + const uchar_t *uname, const int uname_len, ntfs_name **res); + +#endif /* _LINUX_NTFS_FS_DIR_H */ + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/endian.h b/reactos/drivers/fs/ntfs/linux-ntfs/endian.h new file mode 100644 index 00000000000..7a40cdd8147 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/endian.h @@ -0,0 +1,48 @@ +/* + * endian.h - Defines for endianness handling in NTFS Linux kernel driver. + * Part of the Linux-NTFS project. + * + * Copyright (c) 2001 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_ENDIAN_H +#define _LINUX_NTFS_ENDIAN_H + +#include + +/* + * Signed endianness conversion defines. + */ +#define sle16_to_cpu(x) ((s16)__le16_to_cpu((s16)(x))) +#define sle32_to_cpu(x) ((s32)__le32_to_cpu((s32)(x))) +#define sle64_to_cpu(x) ((s64)__le64_to_cpu((s64)(x))) + +#define sle16_to_cpup(x) ((s16)__le16_to_cpu(*(s16*)(x))) +#define sle32_to_cpup(x) ((s32)__le32_to_cpu(*(s32*)(x))) +#define sle64_to_cpup(x) ((s64)__le64_to_cpu(*(s64*)(x))) + +#define cpu_to_sle16(x) ((s16)__cpu_to_le16((s16)(x))) +#define cpu_to_sle32(x) ((s32)__cpu_to_le32((s32)(x))) +#define cpu_to_sle64(x) ((s64)__cpu_to_le64((s64)(x))) + +#define cpu_to_sle16p(x) ((s16)__cpu_to_le16(*(s16*)(x))) +#define cpu_to_sle32p(x) ((s32)__cpu_to_le32(*(s32*)(x))) +#define cpu_to_sle64p(x) ((s64)__cpu_to_le64(*(s64*)(x))) + +#endif /* _LINUX_NTFS_ENDIAN_H */ + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/file.c b/reactos/drivers/fs/ntfs/linux-ntfs/file.c new file mode 100644 index 00000000000..234be728017 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/file.c @@ -0,0 +1,76 @@ +/* + * file.c - NTFS kernel file operations. Part of the Linux-NTFS project. + * + * Copyright (c) 2001 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "ntfs.h" + +/** + * ntfs_file_open - called when an inode is about to be opened + * @vi: inode to be opened + * @filp: file structure describing the inode + * + * Limit file size to the page cache limit on architectures where unsigned long + * is 32-bits. This is the most we can do for now without overflowing the page + * cache page index. Doing it this way means we don't run into problems because + * of existing too large files. It would be better to allow the user to read + * the beginning of the file but I doubt very much anyone is going to hit this + * check on a 32-bit architecture, so there is no point in adding the extra + * complexity required to support this. + * + * On 64-bit architectures, the check is hopefully optimized away by the + * compiler. + * + * After the check passes, just call generic_file_open() to do its work. + */ +static int ntfs_file_open(struct inode *vi, struct file *filp) +{ + if (sizeof(unsigned long) < 8) { + if (vi->i_size > MAX_LFS_FILESIZE) + return -EFBIG; + } + return generic_file_open(vi, filp); +} + +struct file_operations ntfs_file_ops = { + .llseek = generic_file_llseek, /* Seek inside file. */ + .read = generic_file_read, /* Read from file. */ +#ifdef NTFS_RW + .write = generic_file_write, /* Write to a file. */ +#endif + .mmap = generic_file_mmap, /* Mmap file. */ + .sendfile = generic_file_sendfile,/* Zero-copy data send with the + data source being on the + ntfs partition. We don't + need to care about the data + destination. */ + .open = ntfs_file_open, /* Open file. */ +}; + +struct inode_operations ntfs_file_inode_ops = { +#ifdef NTFS_RW + .truncate = ntfs_truncate, + .setattr = ntfs_setattr, +#endif +}; + +struct file_operations ntfs_empty_file_ops = {}; + +struct inode_operations ntfs_empty_inode_ops = {}; + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/inode.c b/reactos/drivers/fs/ntfs/linux-ntfs/inode.c new file mode 100644 index 00000000000..fc1c2dbff04 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/inode.c @@ -0,0 +1,2025 @@ +/** + * inode.c - NTFS kernel inode handling. Part of the Linux-NTFS project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include + +#include "ntfs.h" +#include "dir.h" +#include "inode.h" +#include "attrib.h" + +/** + * ntfs_attr - ntfs in memory attribute structure + * @mft_no: mft record number of the base mft record of this attribute + * @name: Unicode name of the attribute (NULL if unnamed) + * @name_len: length of @name in Unicode characters (0 if unnamed) + * @type: attribute type (see layout.h) + * + * This structure exists only to provide a small structure for the + * ntfs_{attr_}iget()/ntfs_test_inode()/ntfs_init_locked_inode() mechanism. + * + * NOTE: Elements are ordered by size to make the structure as compact as + * possible on all architectures. + */ +typedef struct { + unsigned long mft_no; + uchar_t *name; + u32 name_len; + ATTR_TYPES type; +} ntfs_attr; + +/** + * ntfs_test_inode - compare two (possibly fake) inodes for equality + * @vi: vfs inode which to test + * @na: ntfs attribute which is being tested with + * + * Compare the ntfs attribute embedded in the ntfs specific part of the vfs + * inode @vi for equality with the ntfs attribute @na. + * + * If searching for the normal file/directory inode, set @na->type to AT_UNUSED. + * @na->name and @na->name_len are then ignored. + * + * Return 1 if the attributes match and 0 if not. + * + * NOTE: This function runs with the inode_lock spin lock held so it is not + * allowed to sleep. + */ +static int ntfs_test_inode(struct inode *vi, ntfs_attr *na) +{ + ntfs_inode *ni; + + if (vi->i_ino != na->mft_no) + return 0; + ni = NTFS_I(vi); + /* If !NInoAttr(ni), @vi is a normal file or directory inode. */ + if (likely(!NInoAttr(ni))) { + /* If not looking for a normal inode this is a mismatch. */ + if (unlikely(na->type != AT_UNUSED)) + return 0; + } else { + /* A fake inode describing an attribute. */ + if (ni->type != na->type) + return 0; + if (ni->name_len != na->name_len) + return 0; + if (na->name_len && memcmp(ni->name, na->name, + na->name_len * sizeof(uchar_t))) + return 0; + } + /* Match! */ + return 1; +} + +/** + * ntfs_init_locked_inode - initialize an inode + * @vi: vfs inode to initialize + * @na: ntfs attribute which to initialize @vi to + * + * Initialize the vfs inode @vi with the values from the ntfs attribute @na in + * order to enable ntfs_test_inode() to do its work. + * + * If initializing the normal file/directory inode, set @na->type to AT_UNUSED. + * In that case, @na->name and @na->name_len should be set to NULL and 0, + * respectively. Although that is not strictly necessary as + * ntfs_read_inode_locked() will fill them in later. + * + * Return 0 on success and -errno on error. + * + * NOTE: This function runs with the inode_lock spin lock held so it is not + * allowed to sleep. (Hence the GFP_ATOMIC allocation.) + */ +static int ntfs_init_locked_inode(struct inode *vi, ntfs_attr *na) +{ + ntfs_inode *ni = NTFS_I(vi); + + vi->i_ino = na->mft_no; + + ni->type = na->type; + if (na->type == AT_INDEX_ALLOCATION) + NInoSetMstProtected(ni); + + ni->name = na->name; + ni->name_len = na->name_len; + + /* If initializing a normal inode, we are done. */ + if (likely(na->type == AT_UNUSED)) + return 0; + + /* It is a fake inode. */ + NInoSetAttr(ni); + + /* + * We have I30 global constant as an optimization as it is the name + * in >99.9% of named attributes! The other <0.1% incur a GFP_ATOMIC + * allocation but that is ok. And most attributes are unnamed anyway, + * thus the fraction of named attributes with name != I30 is actually + * absolutely tiny. + */ + if (na->name && na->name_len && na->name != I30) { + unsigned int i; + + i = na->name_len * sizeof(uchar_t); + ni->name = (uchar_t*)kmalloc(i + sizeof(uchar_t), GFP_ATOMIC); + if (!ni->name) + return -ENOMEM; + memcpy(ni->name, na->name, i); + ni->name[i] = cpu_to_le16('\0'); + } + return 0; +} + +typedef int (*test_t)(struct inode *, void *); +typedef int (*set_t)(struct inode *, void *); +static int ntfs_read_locked_inode(struct inode *vi); +static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi); + +/** + * ntfs_iget - obtain a struct inode corresponding to a specific normal inode + * @sb: super block of mounted volume + * @mft_no: mft record number / inode number to obtain + * + * Obtain the struct inode corresponding to a specific normal inode (i.e. a + * file or directory). + * + * If the inode is in the cache, it is just returned with an increased + * reference count. Otherwise, a new struct inode is allocated and initialized, + * and finally ntfs_read_locked_inode() is called to read in the inode and + * fill in the remainder of the inode structure. + * + * Return the struct inode on success. Check the return value with IS_ERR() and + * if true, the function failed and the error code is obtained from PTR_ERR(). + */ +struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no) +{ + struct inode *vi; + ntfs_attr na; + int err; + + na.mft_no = mft_no; + na.type = AT_UNUSED; + na.name = NULL; + na.name_len = 0; + + vi = iget5_locked(sb, mft_no, (test_t)ntfs_test_inode, + (set_t)ntfs_init_locked_inode, &na); + if (!vi) + return ERR_PTR(-ENOMEM); + + err = 0; + + /* If this is a freshly allocated inode, need to read it now. */ + if (vi->i_state & I_NEW) { + err = ntfs_read_locked_inode(vi); + unlock_new_inode(vi); + } + /* + * There is no point in keeping bad inodes around if the failure was + * due to ENOMEM. We want to be able to retry again layer. + */ + if (err == -ENOMEM) { + iput(vi); + vi = ERR_PTR(err); + } + return vi; +} + +/** + * ntfs_attr_iget - obtain a struct inode corresponding to an attribute + * @base_vi: vfs base inode containing the attribute + * @type: attribute type + * @name: Unicode name of the attribute (NULL if unnamed) + * @name_len: length of @name in Unicode characters (0 if unnamed) + * + * Obtain the (fake) struct inode corresponding to the attribute specified by + * @type, @name, and @name_len, which is present in the base mft record + * specified by the vfs inode @base_vi. + * + * If the attribute inode is in the cache, it is just returned with an + * increased reference count. Otherwise, a new struct inode is allocated and + * initialized, and finally ntfs_read_locked_attr_inode() is called to read the + * attribute and fill in the inode structure. + * + * Return the struct inode of the attribute inode on success. Check the return + * value with IS_ERR() and if true, the function failed and the error code is + * obtained from PTR_ERR(). + */ +struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPES type, + uchar_t *name, u32 name_len) +{ + struct inode *vi; + ntfs_attr na; + int err; + + na.mft_no = base_vi->i_ino; + na.type = type; + na.name = name; + na.name_len = name_len; + + vi = iget5_locked(base_vi->i_sb, na.mft_no, (test_t)ntfs_test_inode, + (set_t)ntfs_init_locked_inode, &na); + if (!vi) + return ERR_PTR(-ENOMEM); + + err = 0; + + /* If this is a freshly allocated inode, need to read it now. */ + if (vi->i_state & I_NEW) { + err = ntfs_read_locked_attr_inode(base_vi, vi); + unlock_new_inode(vi); + } + /* + * There is no point in keeping bad attribute inodes around. This also + * simplifies things in that we never need to check for bad attribute + * inodes elsewhere. + */ + if (err) { + iput(vi); + vi = ERR_PTR(err); + } + return vi; +} + +struct inode *ntfs_alloc_big_inode(struct super_block *sb) +{ + ntfs_inode *ni; + + ntfs_debug("Entering."); + ni = (ntfs_inode *)kmem_cache_alloc(ntfs_big_inode_cache, + SLAB_NOFS); + if (likely(ni != NULL)) { + ni->state = 0; + return VFS_I(ni); + } + ntfs_error(sb, "Allocation of NTFS big inode structure failed."); + return NULL; +} + +void ntfs_destroy_big_inode(struct inode *inode) +{ + ntfs_inode *ni = NTFS_I(inode); + + ntfs_debug("Entering."); + BUG_ON(ni->page); + if (!atomic_dec_and_test(&ni->count)) + BUG(); + kmem_cache_free(ntfs_big_inode_cache, NTFS_I(inode)); +} + +static inline ntfs_inode *ntfs_alloc_extent_inode(void) +{ + ntfs_inode *ni; + + ntfs_debug("Entering."); + ni = (ntfs_inode *)kmem_cache_alloc(ntfs_inode_cache, SLAB_NOFS); + if (likely(ni != NULL)) { + ni->state = 0; + return ni; + } + ntfs_error(NULL, "Allocation of NTFS inode structure failed."); + return NULL; +} + +void ntfs_destroy_extent_inode(ntfs_inode *ni) +{ + ntfs_debug("Entering."); + BUG_ON(ni->page); + if (!atomic_dec_and_test(&ni->count)) + BUG(); + kmem_cache_free(ntfs_inode_cache, ni); +} + +/** + * __ntfs_init_inode - initialize ntfs specific part of an inode + * @sb: super block of mounted volume + * @ni: freshly allocated ntfs inode which to initialize + * + * Initialize an ntfs inode to defaults. + * + * NOTE: ni->mft_no, ni->state, ni->type, ni->name, and ni->name_len are left + * untouched. Make sure to initialize them elsewhere. + * + * Return zero on success and -ENOMEM on error. + */ +static void __ntfs_init_inode(struct super_block *sb, ntfs_inode *ni) +{ + ntfs_debug("Entering."); + ni->initialized_size = ni->allocated_size = 0; + ni->seq_no = 0; + atomic_set(&ni->count, 1); + ni->vol = NTFS_SB(sb); + init_run_list(&ni->run_list); + init_MUTEX(&ni->mrec_lock); + ni->page = NULL; + ni->page_ofs = 0; + ni->attr_list_size = 0; + ni->attr_list = NULL; + init_run_list(&ni->attr_list_rl); + ni->itype.index.bmp_ino = NULL; + ni->itype.index.block_size = 0; + ni->itype.index.vcn_size = 0; + ni->itype.index.block_size_bits = 0; + ni->itype.index.vcn_size_bits = 0; + init_MUTEX(&ni->extent_lock); + ni->nr_extents = 0; + ni->ext.base_ntfs_ino = NULL; + return; +} + +static inline void ntfs_init_big_inode(struct inode *vi) +{ + ntfs_inode *ni = NTFS_I(vi); + + ntfs_debug("Entering."); + __ntfs_init_inode(vi->i_sb, ni); + ni->mft_no = vi->i_ino; + return; +} + +inline ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, + unsigned long mft_no) +{ + ntfs_inode *ni = ntfs_alloc_extent_inode(); + + ntfs_debug("Entering."); + if (likely(ni != NULL)) { + __ntfs_init_inode(sb, ni); + ni->mft_no = mft_no; + ni->type = AT_UNUSED; + ni->name = NULL; + ni->name_len = 0; + } + return ni; +} + +/** + * ntfs_is_extended_system_file - check if a file is in the $Extend directory + * @ctx: initialized attribute search context + * + * Search all file name attributes in the inode described by the attribute + * search context @ctx and check if any of the names are in the $Extend system + * directory. + * + * Return values: + * 1: file is in $Extend directory + * 0: file is not in $Extend directory + * -EIO: file is corrupt + */ +static int ntfs_is_extended_system_file(attr_search_context *ctx) +{ + int nr_links; + + /* Restart search. */ + reinit_attr_search_ctx(ctx); + + /* Get number of hard links. */ + nr_links = le16_to_cpu(ctx->mrec->link_count); + + /* Loop through all hard links. */ + while (lookup_attr(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0, ctx)) { + FILE_NAME_ATTR *file_name_attr; + ATTR_RECORD *attr = ctx->attr; + u8 *p, *p2; + + nr_links--; + /* + * Maximum sanity checking as we are called on an inode that + * we suspect might be corrupt. + */ + p = (u8*)attr + le32_to_cpu(attr->length); + if (p < (u8*)ctx->mrec || (u8*)p > (u8*)ctx->mrec + + le32_to_cpu(ctx->mrec->bytes_in_use)) { +err_corrupt_attr: + ntfs_error(ctx->ntfs_ino->vol->sb, "Corrupt file name " + "attribute. You should run chkdsk."); + return -EIO; + } + if (attr->non_resident) { + ntfs_error(ctx->ntfs_ino->vol->sb, "Non-resident file " + "name. You should run chkdsk."); + return -EIO; + } + if (attr->flags) { + ntfs_error(ctx->ntfs_ino->vol->sb, "File name with " + "invalid flags. You should run " + "chkdsk."); + return -EIO; + } + if (!(attr->data.resident.flags & RESIDENT_ATTR_IS_INDEXED)) { + ntfs_error(ctx->ntfs_ino->vol->sb, "Unindexed file " + "name. You should run chkdsk."); + return -EIO; + } + file_name_attr = (FILE_NAME_ATTR*)((u8*)attr + + le16_to_cpu(attr->data.resident.value_offset)); + p2 = (u8*)attr + le32_to_cpu(attr->data.resident.value_length); + if (p2 < (u8*)attr || p2 > p) + goto err_corrupt_attr; + /* This attribute is ok, but is it in the $Extend directory? */ + if (MREF_LE(file_name_attr->parent_directory) == FILE_Extend) + return 1; /* YES, it's an extended system file. */ + } + if (nr_links) { + ntfs_error(ctx->ntfs_ino->vol->sb, "Inode hard link count " + "doesn't match number of name attributes. You " + "should run chkdsk."); + return -EIO; + } + return 0; /* NO, it is not an extended system file. */ +} + +/** + * ntfs_read_locked_inode - read an inode from its device + * @vi: inode to read + * + * ntfs_read_locked_inode() is called from ntfs_iget() to read the inode + * described by @vi into memory from the device. + * + * The only fields in @vi that we need to/can look at when the function is + * called are i_sb, pointing to the mounted device's super block, and i_ino, + * the number of the inode to load. If this is a fake inode, i.e. NInoAttr(), + * then the fields type, name, and name_len are also valid, and describe the + * attribute which this fake inode represents. + * + * ntfs_read_locked_inode() maps, pins and locks the mft record number i_ino + * for reading and sets up the necessary @vi fields as well as initializing + * the ntfs inode. + * + * Q: What locks are held when the function is called? + * A: i_state has I_LOCK set, hence the inode is locked, also + * i_count is set to 1, so it is not going to go away + * i_flags is set to 0 and we have no business touching it. Only an ioctl() + * is allowed to write to them. We should of course be honouring them but + * we need to do that using the IS_* macros defined in include/linux/fs.h. + * In any case ntfs_read_locked_inode() has nothing to do with i_flags. + * + * Return 0 on success and -errno on error. In the error case, the inode will + * have had make_bad_inode() executed on it. + */ +static int ntfs_read_locked_inode(struct inode *vi) +{ + ntfs_volume *vol = NTFS_SB(vi->i_sb); + ntfs_inode *ni; + MFT_RECORD *m; + STANDARD_INFORMATION *si; + attr_search_context *ctx; + int err = 0; + + ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino); + + /* Setup the generic vfs inode parts now. */ + + /* This is the optimal IO size (for stat), not the fs block size. */ + vi->i_blksize = PAGE_CACHE_SIZE; + /* + * This is for checking whether an inode has changed w.r.t. a file so + * that the file can be updated if necessary (compare with f_version). + */ + vi->i_version = 1; + + vi->i_uid = vol->uid; + vi->i_gid = vol->gid; + vi->i_mode = 0; + + /* + * Initialize the ntfs specific part of @vi special casing + * FILE_MFT which we need to do at mount time. + */ + if (vi->i_ino != FILE_MFT) + ntfs_init_big_inode(vi); + ni = NTFS_I(vi); + + m = map_mft_record(ni); + if (IS_ERR(m)) { + err = PTR_ERR(m); + goto err_out; + } + ctx = get_attr_search_ctx(ni, m); + if (!ctx) { + err = -ENOMEM; + goto unm_err_out; + } + + if (!(m->flags & MFT_RECORD_IN_USE)) { + ntfs_error(vi->i_sb, "Inode is not in use! You should " + "run chkdsk."); + goto unm_err_out; + } + if (m->base_mft_record) { + ntfs_error(vi->i_sb, "Inode is an extent inode! You should " + "run chkdsk."); + goto unm_err_out; + } + + /* Transfer information from mft record into vfs and ntfs inodes. */ + ni->seq_no = le16_to_cpu(m->sequence_number); + + /* + * FIXME: Keep in mind that link_count is two for files which have both + * a long file name and a short file name as separate entries, so if + * we are hiding short file names this will be too high. Either we need + * to account for the short file names by subtracting them or we need + * to make sure we delete files even though i_nlink is not zero which + * might be tricky due to vfs interactions. Need to think about this + * some more when implementing the unlink command. + */ + vi->i_nlink = le16_to_cpu(m->link_count); + /* + * FIXME: Reparse points can have the directory bit set even though + * they would be S_IFLNK. Need to deal with this further below when we + * implement reparse points / symbolic links but it will do for now. + * Also if not a directory, it could be something else, rather than + * a regular file. But again, will do for now. + */ + if (m->flags & MFT_RECORD_IS_DIRECTORY) { + vi->i_mode |= S_IFDIR; + /* Things break without this kludge! */ + if (vi->i_nlink > 1) + vi->i_nlink = 1; + } else + vi->i_mode |= S_IFREG; + + /* + * Find the standard information attribute in the mft record. At this + * stage we haven't setup the attribute list stuff yet, so this could + * in fact fail if the standard information is in an extent record, but + * I don't think this actually ever happens. + */ + if (!lookup_attr(AT_STANDARD_INFORMATION, NULL, 0, 0, 0, NULL, 0, + ctx)) { + /* + * TODO: We should be performing a hot fix here (if the recover + * mount option is set) by creating a new attribute. + */ + ntfs_error(vi->i_sb, "$STANDARD_INFORMATION attribute is " + "missing."); + goto unm_err_out; + } + /* Get the standard information attribute value. */ + si = (STANDARD_INFORMATION*)((char*)ctx->attr + + le16_to_cpu(ctx->attr->data.resident.value_offset)); + + /* Transfer information from the standard information into vfs_ino. */ + /* + * Note: The i_?times do not quite map perfectly onto the NTFS times, + * but they are close enough, and in the end it doesn't really matter + * that much... + */ + /* + * mtime is the last change of the data within the file. Not changed + * when only metadata is changed, e.g. a rename doesn't affect mtime. + */ + vi->i_mtime.tv_sec = ntfs2utc(si->last_data_change_time); + vi->i_mtime.tv_nsec = 0; + /* + * ctime is the last change of the metadata of the file. This obviously + * always changes, when mtime is changed. ctime can be changed on its + * own, mtime is then not changed, e.g. when a file is renamed. + */ + vi->i_ctime.tv_sec = ntfs2utc(si->last_mft_change_time); + vi->i_ctime.tv_nsec = 0; + /* + * Last access to the data within the file. Not changed during a rename + * for example but changed whenever the file is written to. + */ + vi->i_atime.tv_sec = ntfs2utc(si->last_access_time); + vi->i_atime.tv_nsec = 0; + + /* Find the attribute list attribute if present. */ + reinit_attr_search_ctx(ctx); + if (lookup_attr(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx)) { + if (vi->i_ino == FILE_MFT) + goto skip_attr_list_load; + ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino); + NInoSetAttrList(ni); + if (ctx->attr->flags & ATTR_IS_ENCRYPTED || + ctx->attr->flags & ATTR_COMPRESSION_MASK || + ctx->attr->flags & ATTR_IS_SPARSE) { + ntfs_error(vi->i_sb, "Attribute list attribute is " + "compressed/encrypted/sparse. Not " + "allowed. Corrupt inode. You should " + "run chkdsk."); + goto unm_err_out; + } + /* Now allocate memory for the attribute list. */ + ni->attr_list_size = (u32)attribute_value_length(ctx->attr); + ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); + if (!ni->attr_list) { + ntfs_error(vi->i_sb, "Not enough memory to allocate " + "buffer for attribute list."); + err = -ENOMEM; + goto unm_err_out; + } + if (ctx->attr->non_resident) { + NInoSetAttrListNonResident(ni); + if (ctx->attr->data.non_resident.lowest_vcn) { + ntfs_error(vi->i_sb, "Attribute list has non " + "zero lowest_vcn. Inode is " + "corrupt. You should run " + "chkdsk."); + goto unm_err_out; + } + /* + * Setup the run list. No need for locking as we have + * exclusive access to the inode at this time. + */ + ni->attr_list_rl.rl = decompress_mapping_pairs(vol, + ctx->attr, NULL); + if (IS_ERR(ni->attr_list_rl.rl)) { + err = PTR_ERR(ni->attr_list_rl.rl); + ni->attr_list_rl.rl = NULL; + ntfs_error(vi->i_sb, "Mapping pairs " + "decompression failed with " + "error code %i. Corrupt " + "attribute list in inode.", + -err); + goto unm_err_out; + } + /* Now load the attribute list. */ + if ((err = load_attribute_list(vol, &ni->attr_list_rl, + ni->attr_list, ni->attr_list_size, + sle64_to_cpu(ctx->attr->data. + non_resident.initialized_size)))) { + ntfs_error(vi->i_sb, "Failed to load " + "attribute list attribute."); + goto unm_err_out; + } + } else /* if (!ctx.attr->non_resident) */ { + if ((u8*)ctx->attr + le16_to_cpu( + ctx->attr->data.resident.value_offset) + + le32_to_cpu( + ctx->attr->data.resident.value_length) > + (u8*)ctx->mrec + vol->mft_record_size) { + ntfs_error(vi->i_sb, "Corrupt attribute list " + "in inode."); + goto unm_err_out; + } + /* Now copy the attribute list. */ + memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu( + ctx->attr->data.resident.value_offset), + le32_to_cpu( + ctx->attr->data.resident.value_length)); + } + } +skip_attr_list_load: + /* + * If an attribute list is present we now have the attribute list value + * in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes. + */ + if (S_ISDIR(vi->i_mode)) { + struct inode *bvi; + ntfs_inode *bni; + INDEX_ROOT *ir; + char *ir_end, *index_end; + + /* It is a directory, find index root attribute. */ + reinit_attr_search_ctx(ctx); + if (!lookup_attr(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, + NULL, 0, ctx)) { + // FIXME: File is corrupt! Hot-fix with empty index + // root attribute if recovery option is set. + ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is " + "missing."); + goto unm_err_out; + } + /* Set up the state. */ + if (ctx->attr->non_resident) { + ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is " + "not resident. Not allowed."); + goto unm_err_out; + } + /* + * Compressed/encrypted index root just means that the newly + * created files in that directory should be created compressed/ + * encrypted. However index root cannot be both compressed and + * encrypted. + */ + if (ctx->attr->flags & ATTR_COMPRESSION_MASK) + NInoSetCompressed(ni); + if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { + if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + ntfs_error(vi->i_sb, "Found encrypted and " + "compressed attribute. Not " + "allowed."); + goto unm_err_out; + } + NInoSetEncrypted(ni); + } + if (ctx->attr->flags & ATTR_IS_SPARSE) + NInoSetSparse(ni); + ir = (INDEX_ROOT*)((char*)ctx->attr + le16_to_cpu( + ctx->attr->data.resident.value_offset)); + ir_end = (char*)ir + le32_to_cpu( + ctx->attr->data.resident.value_length); + if (ir_end > (char*)ctx->mrec + vol->mft_record_size) { + ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is " + "corrupt."); + goto unm_err_out; + } + index_end = (char*)&ir->index + + le32_to_cpu(ir->index.index_length); + if (index_end > ir_end) { + ntfs_error(vi->i_sb, "Directory index is corrupt."); + goto unm_err_out; + } + if (ir->type != AT_FILE_NAME) { + ntfs_error(vi->i_sb, "Indexed attribute is not " + "$FILE_NAME. Not allowed."); + goto unm_err_out; + } + if (ir->collation_rule != COLLATION_FILE_NAME) { + ntfs_error(vi->i_sb, "Index collation rule is not " + "COLLATION_FILE_NAME. Not allowed."); + goto unm_err_out; + } + ni->itype.index.block_size = le32_to_cpu(ir->index_block_size); + if (ni->itype.index.block_size & + (ni->itype.index.block_size - 1)) { + ntfs_error(vi->i_sb, "Index block size (%u) is not a " + "power of two.", + ni->itype.index.block_size); + goto unm_err_out; + } + if (ni->itype.index.block_size > PAGE_CACHE_SIZE) { + ntfs_error(vi->i_sb, "Index block size (%u) > " + "PAGE_CACHE_SIZE (%ld) is not " + "supported. Sorry.", + ni->itype.index.block_size, + PAGE_CACHE_SIZE); + err = -EOPNOTSUPP; + goto unm_err_out; + } + if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) { + ntfs_error(vi->i_sb, "Index block size (%u) < " + "NTFS_BLOCK_SIZE (%i) is not " + "supported. Sorry.", + ni->itype.index.block_size, + NTFS_BLOCK_SIZE); + err = -EOPNOTSUPP; + goto unm_err_out; + } + ni->itype.index.block_size_bits = + ffs(ni->itype.index.block_size) - 1; + /* Determine the size of a vcn in the directory index. */ + if (vol->cluster_size <= ni->itype.index.block_size) { + ni->itype.index.vcn_size = vol->cluster_size; + ni->itype.index.vcn_size_bits = vol->cluster_size_bits; + } else { + ni->itype.index.vcn_size = vol->sector_size; + ni->itype.index.vcn_size_bits = vol->sector_size_bits; + } + + /* Setup the index allocation attribute, even if not present. */ + NInoSetMstProtected(ni); + ni->type = AT_INDEX_ALLOCATION; + ni->name = I30; + ni->name_len = 4; + + if (!(ir->index.flags & LARGE_INDEX)) { + /* No index allocation. */ + vi->i_size = ni->initialized_size = + ni->allocated_size = 0; + /* We are done with the mft record, so we release it. */ + put_attr_search_ctx(ctx); + unmap_mft_record(ni); + m = NULL; + ctx = NULL; + goto skip_large_dir_stuff; + } /* LARGE_INDEX: Index allocation present. Setup state. */ + NInoSetIndexAllocPresent(ni); + /* Find index allocation attribute. */ + reinit_attr_search_ctx(ctx); + if (!lookup_attr(AT_INDEX_ALLOCATION, I30, 4, CASE_SENSITIVE, + 0, NULL, 0, ctx)) { + ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " + "is not present but $INDEX_ROOT " + "indicated it is."); + goto unm_err_out; + } + if (!ctx->attr->non_resident) { + ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " + "is resident."); + goto unm_err_out; + } + if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { + ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " + "is encrypted."); + goto unm_err_out; + } + if (ctx->attr->flags & ATTR_IS_SPARSE) { + ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " + "is sparse."); + goto unm_err_out; + } + if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute " + "is compressed."); + goto unm_err_out; + } + if (ctx->attr->data.non_resident.lowest_vcn) { + ntfs_error(vi->i_sb, "First extent of " + "$INDEX_ALLOCATION attribute has non " + "zero lowest_vcn. Inode is corrupt. " + "You should run chkdsk."); + goto unm_err_out; + } + vi->i_size = sle64_to_cpu( + ctx->attr->data.non_resident.data_size); + ni->initialized_size = sle64_to_cpu( + ctx->attr->data.non_resident.initialized_size); + ni->allocated_size = sle64_to_cpu( + ctx->attr->data.non_resident.allocated_size); + /* + * We are done with the mft record, so we release it. Otherwise + * we would deadlock in ntfs_attr_iget(). + */ + put_attr_search_ctx(ctx); + unmap_mft_record(ni); + m = NULL; + ctx = NULL; + /* Get the index bitmap attribute inode. */ + bvi = ntfs_attr_iget(vi, AT_BITMAP, I30, 4); + if (unlikely(IS_ERR(bvi))) { + ntfs_error(vi->i_sb, "Failed to get bitmap attribute."); + err = PTR_ERR(bvi); + goto unm_err_out; + } + ni->itype.index.bmp_ino = bvi; + bni = NTFS_I(bvi); + if (NInoCompressed(bni) || NInoEncrypted(bni) || + NInoSparse(bni)) { + ntfs_error(vi->i_sb, "$BITMAP attribute is compressed " + "and/or encrypted and/or sparse."); + goto unm_err_out; + } + /* Consistency check bitmap size vs. index allocation size. */ + if ((bvi->i_size << 3) < (vi->i_size >> + ni->itype.index.block_size_bits)) { + ntfs_error(vi->i_sb, "Index bitmap too small (0x%Lx) " + "for index allocation (0x%Lx).", + bvi->i_size << 3, vi->i_size); + goto unm_err_out; + } +skip_large_dir_stuff: + /* Everyone gets read and scan permissions. */ + vi->i_mode |= S_IRUGO | S_IXUGO; + /* If not read-only, set write permissions. */ + if (!IS_RDONLY(vi)) + vi->i_mode |= S_IWUGO; + /* + * Apply the directory permissions mask set in the mount + * options. + */ + vi->i_mode &= ~vol->dmask; + /* Setup the operations for this inode. */ + vi->i_op = &ntfs_dir_inode_ops; + vi->i_fop = &ntfs_dir_ops; + vi->i_mapping->a_ops = &ntfs_aops; + } else { + /* It is a file. */ + reinit_attr_search_ctx(ctx); + + /* Setup the data attribute, even if not present. */ + ni->type = AT_DATA; + ni->name = NULL; + ni->name_len = 0; + + /* Find first extent of the unnamed data attribute. */ + if (!lookup_attr(AT_DATA, NULL, 0, 0, 0, NULL, 0, ctx)) { + vi->i_size = ni->initialized_size = + ni->allocated_size = 0LL; + /* + * FILE_Secure does not have an unnamed $DATA + * attribute, so we special case it here. + */ + if (vi->i_ino == FILE_Secure) + goto no_data_attr_special_case; + /* + * Most if not all the system files in the $Extend + * system directory do not have unnamed data + * attributes so we need to check if the parent + * directory of the file is FILE_Extend and if it is + * ignore this error. To do this we need to get the + * name of this inode from the mft record as the name + * contains the back reference to the parent directory. + */ + if (ntfs_is_extended_system_file(ctx) > 0) + goto no_data_attr_special_case; + // FIXME: File is corrupt! Hot-fix with empty data + // attribute if recovery option is set. + ntfs_error(vi->i_sb, "$DATA attribute is " + "missing."); + goto unm_err_out; + } + /* Setup the state. */ + if (ctx->attr->non_resident) { + NInoSetNonResident(ni); + if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + NInoSetCompressed(ni); + if (vol->cluster_size > 4096) { + ntfs_error(vi->i_sb, "Found " + "compressed data but " + "compression is disabled due " + "to cluster size (%i) > 4kiB.", + vol->cluster_size); + goto unm_err_out; + } + if ((ctx->attr->flags & ATTR_COMPRESSION_MASK) + != ATTR_IS_COMPRESSED) { + ntfs_error(vi->i_sb, "Found " + "unknown compression method or " + "corrupt file."); + goto unm_err_out; + } + ni->itype.compressed.block_clusters = 1U << + ctx->attr->data.non_resident. + compression_unit; + if (ctx->attr->data.non_resident. + compression_unit != 4) { + ntfs_error(vi->i_sb, "Found " + "nonstandard compression unit " + "(%u instead of 4). Cannot " + "handle this. This might " + "indicate corruption so you " + "should run chkdsk.", + ctx->attr->data.non_resident. + compression_unit); + err = -EOPNOTSUPP; + goto unm_err_out; + } + ni->itype.compressed.block_size = 1U << ( + ctx->attr->data.non_resident. + compression_unit + + vol->cluster_size_bits); + ni->itype.compressed.block_size_bits = ffs( + ni->itype.compressed.block_size) - 1; + } + if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { + if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + ntfs_error(vi->i_sb, "Found encrypted " + "and compressed data."); + goto unm_err_out; + } + NInoSetEncrypted(ni); + } + if (ctx->attr->flags & ATTR_IS_SPARSE) + NInoSetSparse(ni); + if (ctx->attr->data.non_resident.lowest_vcn) { + ntfs_error(vi->i_sb, "First extent of $DATA " + "attribute has non zero " + "lowest_vcn. Inode is corrupt. " + "You should run chkdsk."); + goto unm_err_out; + } + /* Setup all the sizes. */ + vi->i_size = sle64_to_cpu( + ctx->attr->data.non_resident.data_size); + ni->initialized_size = sle64_to_cpu( + ctx->attr->data.non_resident. + initialized_size); + ni->allocated_size = sle64_to_cpu( + ctx->attr->data.non_resident. + allocated_size); + if (NInoCompressed(ni)) { + ni->itype.compressed.size = sle64_to_cpu( + ctx->attr->data.non_resident. + compressed_size); + } + } else { /* Resident attribute. */ + /* + * Make all sizes equal for simplicity in read code + * paths. FIXME: Need to keep this in mind when + * converting to non-resident attribute in write code + * path. (Probably only affects truncate().) + */ + vi->i_size = ni->initialized_size = ni->allocated_size = + le32_to_cpu( + ctx->attr->data.resident.value_length); + } +no_data_attr_special_case: + /* We are done with the mft record, so we release it. */ + put_attr_search_ctx(ctx); + unmap_mft_record(ni); + m = NULL; + ctx = NULL; + /* Everyone gets all permissions. */ + vi->i_mode |= S_IRWXUGO; + /* If read-only, noone gets write permissions. */ + if (IS_RDONLY(vi)) + vi->i_mode &= ~S_IWUGO; + /* Apply the file permissions mask set in the mount options. */ + vi->i_mode &= ~vol->fmask; + /* Setup the operations for this inode. */ + vi->i_op = &ntfs_file_inode_ops; + vi->i_fop = &ntfs_file_ops; + vi->i_mapping->a_ops = &ntfs_aops; + } + /* + * The number of 512-byte blocks used on disk (for stat). This is in so + * far inaccurate as it doesn't account for any named streams or other + * special non-resident attributes, but that is how Windows works, too, + * so we are at least consistent with Windows, if not entirely + * consistent with the Linux Way. Doing it the Linux Way would cause a + * significant slowdown as it would involve iterating over all + * attributes in the mft record and adding the allocated/compressed + * sizes of all non-resident attributes present to give us the Linux + * correct size that should go into i_blocks (after division by 512). + */ + if (!NInoCompressed(ni)) + vi->i_blocks = ni->allocated_size >> 9; + else + vi->i_blocks = ni->itype.compressed.size >> 9; + + ntfs_debug("Done."); + return 0; + +unm_err_out: + if (!err) + err = -EIO; + if (ctx) + put_attr_search_ctx(ctx); + if (m) + unmap_mft_record(ni); +err_out: + ntfs_error(vi->i_sb, "Failed with error code %i. Marking inode 0x%lx " + "as bad.", -err, vi->i_ino); + make_bad_inode(vi); + return err; +} + +/** + * ntfs_read_locked_attr_inode - read an attribute inode from its base inode + * @base_vi: base inode + * @vi: attribute inode to read + * + * ntfs_read_locked_attr_inode() is called from the ntfs_attr_iget() to read + * the attribute inode described by @vi into memory from the base mft record + * described by @base_ni. + * + * ntfs_read_locked_attr_inode() maps, pins and locks the base inode for + * reading and looks up the attribute described by @vi before setting up the + * necessary fields in @vi as well as initializing the ntfs inode. + * + * Q: What locks are held when the function is called? + * A: i_state has I_LOCK set, hence the inode is locked, also + * i_count is set to 1, so it is not going to go away + */ +static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) +{ + ntfs_volume *vol = NTFS_SB(vi->i_sb); + ntfs_inode *ni, *base_ni; + MFT_RECORD *m; + attr_search_context *ctx; + int err = 0; + + ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino); + + ntfs_init_big_inode(vi); + + ni = NTFS_I(vi); + base_ni = NTFS_I(base_vi); + + /* Just mirror the values from the base inode. */ + vi->i_blksize = base_vi->i_blksize; + vi->i_version = base_vi->i_version; + vi->i_uid = base_vi->i_uid; + vi->i_gid = base_vi->i_gid; + vi->i_nlink = base_vi->i_nlink; + vi->i_mtime = base_vi->i_mtime; + vi->i_ctime = base_vi->i_ctime; + vi->i_atime = base_vi->i_atime; + ni->seq_no = base_ni->seq_no; + + /* Set inode type to zero but preserve permissions. */ + vi->i_mode = base_vi->i_mode & ~S_IFMT; + + m = map_mft_record(base_ni); + if (IS_ERR(m)) { + err = PTR_ERR(m); + goto err_out; + } + ctx = get_attr_search_ctx(base_ni, m); + if (!ctx) { + err = -ENOMEM; + goto unm_err_out; + } + + /* Find the attribute. */ + if (!lookup_attr(ni->type, ni->name, ni->name_len, IGNORE_CASE, 0, + NULL, 0, ctx)) + goto unm_err_out; + + if (!ctx->attr->non_resident) { + if (NInoMstProtected(ni) || ctx->attr->flags) { + ntfs_error(vi->i_sb, "Found mst protected attribute " + "or attribute with non-zero flags but " + "the attribute is resident (mft_no " + "0x%lx, type 0x%x, name_len %i). " + "Please report you saw this message " + "to linux-ntfs-dev@lists.sf.net", + vi->i_ino, ni->type, ni->name_len); + goto unm_err_out; + } + /* + * Resident attribute. Make all sizes equal for simplicity in + * read code paths. + */ + vi->i_size = ni->initialized_size = ni->allocated_size = + le32_to_cpu(ctx->attr->data.resident.value_length); + } else { + NInoSetNonResident(ni); + if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + if (NInoMstProtected(ni)) { + ntfs_error(vi->i_sb, "Found mst protected " + "attribute but the attribute " + "is compressed (mft_no 0x%lx, " + "type 0x%x, name_len %i). " + "Please report you saw this " + "message to linux-ntfs-dev@" + "lists.sf.net", vi->i_ino, + ni->type, ni->name_len); + goto unm_err_out; + } + NInoSetCompressed(ni); + if ((ni->type != AT_DATA) || (ni->type == AT_DATA && + ni->name_len)) { + ntfs_error(vi->i_sb, "Found compressed non-" + "data or named data attribute " + "(mft_no 0x%lx, type 0x%x, " + "name_len %i). Please report " + "you saw this message to " + "linux-ntfs-dev@lists.sf.net", + vi->i_ino, ni->type, + ni->name_len); + goto unm_err_out; + } + if (vol->cluster_size > 4096) { + ntfs_error(vi->i_sb, "Found " + "compressed attribute but " + "compression is disabled due " + "to cluster size (%i) > 4kiB.", + vol->cluster_size); + goto unm_err_out; + } + if ((ctx->attr->flags & ATTR_COMPRESSION_MASK) + != ATTR_IS_COMPRESSED) { + ntfs_error(vi->i_sb, "Found unknown " + "compression method or " + "corrupt file."); + goto unm_err_out; + } + ni->itype.compressed.block_clusters = 1U << + ctx->attr->data.non_resident. + compression_unit; + if (ctx->attr->data.non_resident.compression_unit != 4) { + ntfs_error(vi->i_sb, "Found " + "nonstandard compression unit " + "(%u instead of 4). Cannot " + "handle this. This might " + "indicate corruption so you " + "should run chkdsk.", + ctx->attr->data.non_resident. + compression_unit); + err = -EOPNOTSUPP; + goto unm_err_out; + } + ni->itype.compressed.block_size = 1U << ( + ctx->attr->data.non_resident. + compression_unit + + vol->cluster_size_bits); + ni->itype.compressed.block_size_bits = ffs( + ni->itype.compressed.block_size) - 1; + } + if (ctx->attr->flags & ATTR_IS_ENCRYPTED) { + if (ctx->attr->flags & ATTR_COMPRESSION_MASK) { + ntfs_error(vi->i_sb, "Found encrypted " + "and compressed data."); + goto unm_err_out; + } + if (NInoMstProtected(ni)) { + ntfs_error(vi->i_sb, "Found mst protected " + "attribute but the attribute " + "is encrypted (mft_no 0x%lx, " + "type 0x%x, name_len %i). " + "Please report you saw this " + "message to linux-ntfs-dev@" + "lists.sf.net", vi->i_ino, + ni->type, ni->name_len); + goto unm_err_out; + } + NInoSetEncrypted(ni); + } + if (ctx->attr->flags & ATTR_IS_SPARSE) { + if (NInoMstProtected(ni)) { + ntfs_error(vi->i_sb, "Found mst protected " + "attribute but the attribute " + "is sparse (mft_no 0x%lx, " + "type 0x%x, name_len %i). " + "Please report you saw this " + "message to linux-ntfs-dev@" + "lists.sf.net", vi->i_ino, + ni->type, ni->name_len); + goto unm_err_out; + } + NInoSetSparse(ni); + } + if (ctx->attr->data.non_resident.lowest_vcn) { + ntfs_error(vi->i_sb, "First extent of attribute has " + "non-zero lowest_vcn. Inode is " + "corrupt. You should run chkdsk."); + goto unm_err_out; + } + /* Setup all the sizes. */ + vi->i_size = sle64_to_cpu( + ctx->attr->data.non_resident.data_size); + ni->initialized_size = sle64_to_cpu( + ctx->attr->data.non_resident.initialized_size); + ni->allocated_size = sle64_to_cpu( + ctx->attr->data.non_resident.allocated_size); + if (NInoCompressed(ni)) { + ni->itype.compressed.size = sle64_to_cpu( + ctx->attr->data.non_resident. + compressed_size); + } + } + + /* Setup the operations for this attribute inode. */ + vi->i_op = NULL; + vi->i_fop = NULL; + vi->i_mapping->a_ops = &ntfs_aops; + + if (!NInoCompressed(ni)) + vi->i_blocks = ni->allocated_size >> 9; + else + vi->i_blocks = ni->itype.compressed.size >> 9; + + /* + * Make sure the base inode doesn't go away and attach it to the + * attribute inode. + */ + igrab(base_vi); + ni->ext.base_ntfs_ino = base_ni; + ni->nr_extents = -1; + + put_attr_search_ctx(ctx); + unmap_mft_record(base_ni); + + ntfs_debug("Done."); + return 0; + +unm_err_out: + if (!err) + err = -EIO; + if (ctx) + put_attr_search_ctx(ctx); + unmap_mft_record(base_ni); +err_out: + ntfs_error(vi->i_sb, "Failed with error code %i while reading " + "attribute inode (mft_no 0x%lx, type 0x%x, name_len " + "%i.", -err, vi->i_ino, ni->type, ni->name_len); + make_bad_inode(vi); + return err; +} + +/** + * ntfs_read_inode_mount - special read_inode for mount time use only + * @vi: inode to read + * + * Read inode FILE_MFT at mount time, only called with super_block lock + * held from within the read_super() code path. + * + * This function exists because when it is called the page cache for $MFT/$DATA + * is not initialized and hence we cannot get at the contents of mft records + * by calling map_mft_record*(). + * + * Further it needs to cope with the circular references problem, i.e. can't + * load any attributes other than $ATTRIBUTE_LIST until $DATA is loaded, because + * we don't know where the other extent mft records are yet and again, because + * we cannot call map_mft_record*() yet. Obviously this applies only when an + * attribute list is actually present in $MFT inode. + * + * We solve these problems by starting with the $DATA attribute before anything + * else and iterating using lookup_attr($DATA) over all extents. As each extent + * is found, we decompress_mapping_pairs() including the implied + * merge_run_lists(). Each step of the iteration necessarily provides + * sufficient information for the next step to complete. + * + * This should work but there are two possible pit falls (see inline comments + * below), but only time will tell if they are real pits or just smoke... + */ +void ntfs_read_inode_mount(struct inode *vi) +{ + VCN next_vcn, last_vcn, highest_vcn; + s64 block; + struct super_block *sb = vi->i_sb; + ntfs_volume *vol = NTFS_SB(sb); + struct buffer_head *bh; + ntfs_inode *ni; + MFT_RECORD *m = NULL; + ATTR_RECORD *attr; + attr_search_context *ctx; + unsigned int i, nr_blocks; + int err; + + ntfs_debug("Entering."); + + if (vi->i_ino != FILE_MFT) { + ntfs_error(sb, "Called for inode 0x%lx but only inode %d " + "allowed.", vi->i_ino, FILE_MFT); + goto err_out; + } + + /* Initialize the ntfs specific part of @vi. */ + ntfs_init_big_inode(vi); + + ni = NTFS_I(vi); + + /* Setup the data attribute. It is special as it is mst protected. */ + NInoSetNonResident(ni); + NInoSetMstProtected(ni); + ni->type = AT_DATA; + ni->name = NULL; + ni->name_len = 0; + + /* + * This sets up our little cheat allowing us to reuse the async io + * completion handler for directories. + */ + ni->itype.index.block_size = vol->mft_record_size; + ni->itype.index.block_size_bits = vol->mft_record_size_bits; + + /* Very important! Needed to be able to call map_mft_record*(). */ + vol->mft_ino = vi; + + /* Allocate enough memory to read the first mft record. */ + if (vol->mft_record_size > 64 * 1024) { + ntfs_error(sb, "Unsupported mft record size %i (max 64kiB).", + vol->mft_record_size); + goto err_out; + } + i = vol->mft_record_size; + if (i < sb->s_blocksize) + i = sb->s_blocksize; + m = (MFT_RECORD*)ntfs_malloc_nofs(i); + if (!m) { + ntfs_error(sb, "Failed to allocate buffer for $MFT record 0."); + goto err_out; + } + + /* Determine the first block of the $MFT/$DATA attribute. */ + block = vol->mft_lcn << vol->cluster_size_bits >> + sb->s_blocksize_bits; + nr_blocks = vol->mft_record_size >> sb->s_blocksize_bits; + if (!nr_blocks) + nr_blocks = 1; + + /* Load $MFT/$DATA's first mft record. */ + for (i = 0; i < nr_blocks; i++) { + bh = sb_bread(sb, block++); + if (!bh) { + ntfs_error(sb, "Device read failed."); + goto err_out; + } + memcpy((char*)m + (i << sb->s_blocksize_bits), bh->b_data, + sb->s_blocksize); + brelse(bh); + } + + /* Apply the mst fixups. */ + if (post_read_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size)) { + /* FIXME: Try to use the $MFTMirr now. */ + ntfs_error(sb, "MST fixup failed. $MFT is corrupt."); + goto err_out; + } + + /* Need this to sanity check attribute list references to $MFT. */ + ni->seq_no = le16_to_cpu(m->sequence_number); + + /* Provides readpage() and sync_page() for map_mft_record(). */ + vi->i_mapping->a_ops = &ntfs_mft_aops; + + ctx = get_attr_search_ctx(ni, m); + if (!ctx) { + err = -ENOMEM; + goto err_out; + } + + /* Find the attribute list attribute if present. */ + if (lookup_attr(AT_ATTRIBUTE_LIST, NULL, 0, 0, 0, NULL, 0, ctx)) { + ATTR_LIST_ENTRY *al_entry, *next_al_entry; + u8 *al_end; + + ntfs_debug("Attribute list attribute found in $MFT."); + NInoSetAttrList(ni); + if (ctx->attr->flags & ATTR_IS_ENCRYPTED || + ctx->attr->flags & ATTR_COMPRESSION_MASK || + ctx->attr->flags & ATTR_IS_SPARSE) { + ntfs_error(sb, "Attribute list attribute is " + "compressed/encrypted/sparse. Not " + "allowed. $MFT is corrupt. You should " + "run chkdsk."); + goto put_err_out; + } + /* Now allocate memory for the attribute list. */ + ni->attr_list_size = (u32)attribute_value_length(ctx->attr); + ni->attr_list = ntfs_malloc_nofs(ni->attr_list_size); + if (!ni->attr_list) { + ntfs_error(sb, "Not enough memory to allocate buffer " + "for attribute list."); + goto put_err_out; + } + if (ctx->attr->non_resident) { + NInoSetAttrListNonResident(ni); + if (ctx->attr->data.non_resident.lowest_vcn) { + ntfs_error(sb, "Attribute list has non zero " + "lowest_vcn. $MFT is corrupt. " + "You should run chkdsk."); + goto put_err_out; + } + /* Setup the run list. */ + ni->attr_list_rl.rl = decompress_mapping_pairs(vol, + ctx->attr, NULL); + if (IS_ERR(ni->attr_list_rl.rl)) { + err = PTR_ERR(ni->attr_list_rl.rl); + ni->attr_list_rl.rl = NULL; + ntfs_error(sb, "Mapping pairs decompression " + "failed with error code %i.", + -err); + goto put_err_out; + } + /* Now load the attribute list. */ + if ((err = load_attribute_list(vol, &ni->attr_list_rl, + ni->attr_list, ni->attr_list_size, + sle64_to_cpu(ctx->attr->data. + non_resident.initialized_size)))) { + ntfs_error(sb, "Failed to load attribute list " + "attribute with error code %i.", + -err); + goto put_err_out; + } + } else /* if (!ctx.attr->non_resident) */ { + if ((u8*)ctx->attr + le16_to_cpu( + ctx->attr->data.resident.value_offset) + + le32_to_cpu( + ctx->attr->data.resident.value_length) > + (u8*)ctx->mrec + vol->mft_record_size) { + ntfs_error(sb, "Corrupt attribute list " + "attribute."); + goto put_err_out; + } + /* Now copy the attribute list. */ + memcpy(ni->attr_list, (u8*)ctx->attr + le16_to_cpu( + ctx->attr->data.resident.value_offset), + le32_to_cpu( + ctx->attr->data.resident.value_length)); + } + /* The attribute list is now setup in memory. */ + /* + * FIXME: I don't know if this case is actually possible. + * According to logic it is not possible but I have seen too + * many weird things in MS software to rely on logic... Thus we + * perform a manual search and make sure the first $MFT/$DATA + * extent is in the base inode. If it is not we abort with an + * error and if we ever see a report of this error we will need + * to do some magic in order to have the necessary mft record + * loaded and in the right place in the page cache. But + * hopefully logic will prevail and this never happens... + */ + al_entry = (ATTR_LIST_ENTRY*)ni->attr_list; + al_end = (u8*)al_entry + ni->attr_list_size; + for (;; al_entry = next_al_entry) { + /* Out of bounds check. */ + if ((u8*)al_entry < ni->attr_list || + (u8*)al_entry > al_end) + goto em_put_err_out; + /* Catch the end of the attribute list. */ + if ((u8*)al_entry == al_end) + goto em_put_err_out; + if (!al_entry->length) + goto em_put_err_out; + if ((u8*)al_entry + 6 > al_end || (u8*)al_entry + + le16_to_cpu(al_entry->length) > al_end) + goto em_put_err_out; + next_al_entry = (ATTR_LIST_ENTRY*)((u8*)al_entry + + le16_to_cpu(al_entry->length)); + if (le32_to_cpu(al_entry->type) > + const_le32_to_cpu(AT_DATA)) + goto em_put_err_out; + if (AT_DATA != al_entry->type) + continue; + /* We want an unnamed attribute. */ + if (al_entry->name_length) + goto em_put_err_out; + /* Want the first entry, i.e. lowest_vcn == 0. */ + if (al_entry->lowest_vcn) + goto em_put_err_out; + /* First entry has to be in the base mft record. */ + if (MREF_LE(al_entry->mft_reference) != vi->i_ino) { + /* MFT references do not match, logic fails. */ + ntfs_error(sb, "BUG: The first $DATA extent " + "of $MFT is not in the base " + "mft record. Please report " + "you saw this message to " + "linux-ntfs-dev@lists.sf.net"); + goto put_err_out; + } else { + /* Sequence numbers must match. */ + if (MSEQNO_LE(al_entry->mft_reference) != + ni->seq_no) + goto em_put_err_out; + /* Got it. All is ok. We can stop now. */ + break; + } + } + } + + reinit_attr_search_ctx(ctx); + + /* Now load all attribute extents. */ + attr = NULL; + next_vcn = last_vcn = highest_vcn = 0; + while (lookup_attr(AT_DATA, NULL, 0, 0, next_vcn, NULL, 0, ctx)) { + run_list_element *nrl; + + /* Cache the current attribute. */ + attr = ctx->attr; + /* $MFT must be non-resident. */ + if (!attr->non_resident) { + ntfs_error(sb, "$MFT must be non-resident but a " + "resident extent was found. $MFT is " + "corrupt. Run chkdsk."); + goto put_err_out; + } + /* $MFT must be uncompressed and unencrypted. */ + if (attr->flags & ATTR_COMPRESSION_MASK || + attr->flags & ATTR_IS_ENCRYPTED || + attr->flags & ATTR_IS_SPARSE) { + ntfs_error(sb, "$MFT must be uncompressed, " + "non-sparse, and unencrypted but a " + "compressed/sparse/encrypted extent " + "was found. $MFT is corrupt. Run " + "chkdsk."); + goto put_err_out; + } + /* + * Decompress the mapping pairs array of this extent and merge + * the result into the existing run list. No need for locking + * as we have exclusive access to the inode at this time and we + * are a mount in progress task, too. + */ + nrl = decompress_mapping_pairs(vol, attr, ni->run_list.rl); + if (IS_ERR(nrl)) { + ntfs_error(sb, "decompress_mapping_pairs() failed with " + "error code %ld. $MFT is corrupt.", + PTR_ERR(nrl)); + goto put_err_out; + } + ni->run_list.rl = nrl; + + /* Are we in the first extent? */ + if (!next_vcn) { + u64 ll; + + if (attr->data.non_resident.lowest_vcn) { + ntfs_error(sb, "First extent of $DATA " + "attribute has non zero " + "lowest_vcn. $MFT is corrupt. " + "You should run chkdsk."); + goto put_err_out; + } + /* Get the last vcn in the $DATA attribute. */ + last_vcn = sle64_to_cpu( + attr->data.non_resident.allocated_size) + >> vol->cluster_size_bits; + /* Fill in the inode size. */ + vi->i_size = sle64_to_cpu( + attr->data.non_resident.data_size); + ni->initialized_size = sle64_to_cpu(attr->data. + non_resident.initialized_size); + ni->allocated_size = sle64_to_cpu( + attr->data.non_resident.allocated_size); + /* Set the number of mft records. */ + ll = vi->i_size >> vol->mft_record_size_bits; + /* + * Verify the number of mft records does not exceed + * 2^32 - 1. + */ + if (ll >= (1ULL << 32)) { + ntfs_error(sb, "$MFT is too big! Aborting."); + goto put_err_out; + } + vol->nr_mft_records = ll; + /* + * We have got the first extent of the run_list for + * $MFT which means it is now relatively safe to call + * the normal ntfs_read_inode() function. Thus, take + * us out of the calling chain. Also we need to do this + * now because we need ntfs_read_inode() in place to + * get at subsequent extents. + */ + sb->s_op = &ntfs_sops; + /* + * Complete reading the inode, this will actually + * re-read the mft record for $MFT, this time entering + * it into the page cache with which we complete the + * kick start of the volume. It should be safe to do + * this now as the first extent of $MFT/$DATA is + * already known and we would hope that we don't need + * further extents in order to find the other + * attributes belonging to $MFT. Only time will tell if + * this is really the case. If not we will have to play + * magic at this point, possibly duplicating a lot of + * ntfs_read_inode() at this point. We will need to + * ensure we do enough of its work to be able to call + * ntfs_read_inode() on extents of $MFT/$DATA. But lets + * hope this never happens... + */ + ntfs_read_locked_inode(vi); + if (is_bad_inode(vi)) { + ntfs_error(sb, "ntfs_read_inode() of $MFT " + "failed. BUG or corrupt $MFT. " + "Run chkdsk and if no errors " + "are found, please report you " + "saw this message to " + "linux-ntfs-dev@lists.sf.net"); + put_attr_search_ctx(ctx); + /* Revert to the safe super operations. */ + sb->s_op = &ntfs_mount_sops; + goto out_now; + } + /* + * Re-initialize some specifics about $MFT's inode as + * ntfs_read_inode() will have set up the default ones. + */ + /* Set uid and gid to root. */ + vi->i_uid = vi->i_gid = 0; + /* Regular file. No access for anyone. */ + vi->i_mode = S_IFREG; + /* No VFS initiated operations allowed for $MFT. */ + vi->i_op = &ntfs_empty_inode_ops; + vi->i_fop = &ntfs_empty_file_ops; + /* Put back our special address space operations. */ + vi->i_mapping->a_ops = &ntfs_mft_aops; + } + + /* Get the lowest vcn for the next extent. */ + highest_vcn = sle64_to_cpu(attr->data.non_resident.highest_vcn); + next_vcn = highest_vcn + 1; + + /* Only one extent or error, which we catch below. */ + if (next_vcn <= 0) + break; + + /* Avoid endless loops due to corruption. */ + if (next_vcn < sle64_to_cpu( + attr->data.non_resident.lowest_vcn)) { + ntfs_error(sb, "$MFT has corrupt attribute list " + "attribute. Run chkdsk."); + goto put_err_out; + } + } + if (!attr) { + ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is " + "corrupt. Run chkdsk."); + goto put_err_out; + } + if (highest_vcn && highest_vcn != last_vcn - 1) { + ntfs_error(sb, "Failed to load the complete run list " + "for $MFT/$DATA. Driver bug or " + "corrupt $MFT. Run chkdsk."); + ntfs_debug("highest_vcn = 0x%Lx, last_vcn - 1 = 0x%Lx", + (long long)highest_vcn, + (long long)last_vcn - 1); + goto put_err_out; + } + put_attr_search_ctx(ctx); + ntfs_debug("Done."); +out_now: + ntfs_free(m); + return; +em_put_err_out: + ntfs_error(sb, "Couldn't find first extent of $DATA attribute in " + "attribute list. $MFT is corrupt. Run chkdsk."); +put_err_out: + put_attr_search_ctx(ctx); +err_out: + /* Make sure we revert to the safe super operations. */ + sb->s_op = &ntfs_mount_sops; + ntfs_error(sb, "Failed. Marking inode as bad."); + make_bad_inode(vi); + goto out_now; +} + +/** + * ntfs_dirty_inode - mark the inode's metadata dirty + * @vi: inode to mark dirty + * + * This is called from fs/inode.c::__mark_inode_dirty(), when the inode itself + * is being marked dirty. An example is when update_atime() is invoked. + * + * We mark the inode dirty by setting both the page in which the mft record + * resides and the buffer heads in that page which correspond to the mft record + * dirty. This ensures that the changes will eventually be propagated to disk + * when the inode is set dirty. + * + * FIXME: Can we do that with the buffer heads? I am not too sure. Because if we + * do that we need to make sure that the kernel will not write out those buffer + * heads or we are screwed as it will write corrupt data to disk. The only way + * a mft record can be written correctly is by mst protecting it, writting it + * synchronously and fast mst deprotecting it. During this period, obviously, + * the mft record must be marked as not uptodate, be locked for writing or + * whatever, so that nobody attempts anything stupid. + * + * FIXME: Do we need to check that the fs is not mounted read only? And what + * about the inode? Anything else? + * + * FIXME: As we are only a read only driver it is safe to just return here for + * the moment. + */ +void ntfs_dirty_inode(struct inode *vi) +{ + ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); + NInoSetDirty(NTFS_I(vi)); + return; +} + +/** + * ntfs_commit_inode - write out a dirty inode + * @ni: inode to write out + * + */ +int ntfs_commit_inode(ntfs_inode *ni) +{ + ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); + NInoClearDirty(ni); + return 0; +} + +/** + * ntfs_put_inode - handler for when the inode reference count is decremented + * @vi: vfs inode + * + * The VFS calls ntfs_put_inode() every time the inode reference count (i_count) + * is about to be decremented (but before the decrement itself. + * + * If the inode @vi is a directory with a single reference, we need to put the + * attribute inode for the directory index bitmap, if it is present, otherwise + * the directory inode would remain pinned for ever (or rather until umount() + * time. + */ +void ntfs_put_inode(struct inode *vi) +{ + if (S_ISDIR(vi->i_mode) && (atomic_read(&vi->i_count) == 2)) { + ntfs_inode *ni; + + ni = NTFS_I(vi); + if (NInoIndexAllocPresent(ni) && ni->itype.index.bmp_ino) { + iput(ni->itype.index.bmp_ino); + ni->itype.index.bmp_ino = NULL; + } + } + return; +} + +void __ntfs_clear_inode(ntfs_inode *ni) +{ + int err; + + ntfs_debug("Entering for inode 0x%lx.", ni->mft_no); + if (NInoDirty(ni)) { + err = ntfs_commit_inode(ni); + if (err) { + ntfs_error(ni->vol->sb, "Failed to commit dirty " + "inode synchronously."); + // FIXME: Do something!!! + } + } + /* Synchronize with ntfs_commit_inode(). */ + down(&ni->mrec_lock); + up(&ni->mrec_lock); + if (NInoDirty(ni)) { + ntfs_error(ni->vol->sb, "Failed to commit dirty inode " + "asynchronously."); + // FIXME: Do something!!! + } + /* No need to lock at this stage as no one else has a reference. */ + if (ni->nr_extents > 0) { + int i; + + // FIXME: Handle dirty case for each extent inode! + for (i = 0; i < ni->nr_extents; i++) + ntfs_clear_extent_inode(ni->ext.extent_ntfs_inos[i]); + kfree(ni->ext.extent_ntfs_inos); + } + /* Free all alocated memory. */ + down_write(&ni->run_list.lock); + if (ni->run_list.rl) { + ntfs_free(ni->run_list.rl); + ni->run_list.rl = NULL; + } + up_write(&ni->run_list.lock); + + if (ni->attr_list) { + ntfs_free(ni->attr_list); + ni->attr_list = NULL; + } + + down_write(&ni->attr_list_rl.lock); + if (ni->attr_list_rl.rl) { + ntfs_free(ni->attr_list_rl.rl); + ni->attr_list_rl.rl = NULL; + } + up_write(&ni->attr_list_rl.lock); + + if (ni->name_len && ni->name != I30) { + /* Catch bugs... */ + BUG_ON(!ni->name); + kfree(ni->name); + } +} + +void ntfs_clear_extent_inode(ntfs_inode *ni) +{ + __ntfs_clear_inode(ni); + + /* Bye, bye... */ + ntfs_destroy_extent_inode(ni); +} + +/** + * ntfs_clear_big_inode - clean up the ntfs specific part of an inode + * @vi: vfs inode pending annihilation + * + * When the VFS is going to remove an inode from memory, ntfs_clear_big_inode() + * is called, which deallocates all memory belonging to the NTFS specific part + * of the inode and returns. + * + * If the MFT record is dirty, we commit it before doing anything else. + */ +void ntfs_clear_big_inode(struct inode *vi) +{ + ntfs_inode *ni = NTFS_I(vi); + + __ntfs_clear_inode(ni); + + if (NInoAttr(ni)) { + /* Release the base inode if we are holding it. */ + if (ni->nr_extents == -1) { + iput(VFS_I(ni->ext.base_ntfs_ino)); + ni->nr_extents = 0; + ni->ext.base_ntfs_ino = NULL; + } + } + return; +} + +/** + * ntfs_show_options - show mount options in /proc/mounts + * @sf: seq_file in which to write our mount options + * @mnt: vfs mount whose mount options to display + * + * Called by the VFS once for each mounted ntfs volume when someone reads + * /proc/mounts in order to display the NTFS specific mount options of each + * mount. The mount options of the vfs mount @mnt are written to the seq file + * @sf and success is returned. + */ +int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt) +{ + ntfs_volume *vol = NTFS_SB(mnt->mnt_sb); + int i; + + seq_printf(sf, ",uid=%i", vol->uid); + seq_printf(sf, ",gid=%i", vol->gid); + if (vol->fmask == vol->dmask) + seq_printf(sf, ",umask=0%o", vol->fmask); + else { + seq_printf(sf, ",fmask=0%o", vol->fmask); + seq_printf(sf, ",dmask=0%o", vol->dmask); + } + seq_printf(sf, ",nls=%s", vol->nls_map->charset); + if (NVolCaseSensitive(vol)) + seq_printf(sf, ",case_sensitive"); + if (NVolShowSystemFiles(vol)) + seq_printf(sf, ",show_sys_files"); + for (i = 0; on_errors_arr[i].val; i++) { + if (on_errors_arr[i].val & vol->on_errors) + seq_printf(sf, ",errors=%s", on_errors_arr[i].str); + } + seq_printf(sf, ",mft_zone_multiplier=%i", vol->mft_zone_multiplier); + return 0; +} + +#ifdef NTFS_RW + +/** + * ntfs_truncate - called when the i_size of an ntfs inode is changed + * @vi: inode for which the i_size was changed + * + * We don't support i_size changes yet. + * + * Called with ->i_sem held. + */ +void ntfs_truncate(struct inode *vi) +{ + // TODO: Implement... + ntfs_warning(vi->i_sb, "Eeek: i_size may have changed! If you see " + "this right after a message from " + "ntfs_{prepare,commit}_{,nonresident_}write() then " + "just ignore it. Otherwise it is bad news."); + // TODO: reset i_size now! + return; +} + +/** + * ntfs_setattr - called from notify_change() when an attribute is being changed + * @dentry: dentry whose attributes to change + * @attr: structure describing the attributes and the changes + * + * We have to trap VFS attempts to truncate the file described by @dentry as + * soon as possible, because we do not implement changes in i_size yet. So we + * abort all i_size changes here. + * + * Called with ->i_sem held. + * + * Basically this is a copy of generic notify_change() and inode_setattr() + * functionality, except we intercept and abort changes in i_size. + */ +int ntfs_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct inode *vi; + int err; + unsigned int ia_valid = attr->ia_valid; + + vi = dentry->d_inode; + + err = inode_change_ok(vi, attr); + if (err) + return err; + + if ((ia_valid & ATTR_UID && attr->ia_uid != vi->i_uid) || + (ia_valid & ATTR_GID && attr->ia_gid != vi->i_gid)) { + err = DQUOT_TRANSFER(vi, attr) ? -EDQUOT : 0; + if (err) + return err; + } + + lock_kernel(); + + if (ia_valid & ATTR_SIZE) { + ntfs_error(vi->i_sb, "Changes in i_size are not supported " + "yet. Sorry."); + // TODO: Implement... + // err = vmtruncate(vi, attr->ia_size); + err = -EOPNOTSUPP; + if (err) + goto trunc_err; + } + + if (ia_valid & ATTR_UID) + vi->i_uid = attr->ia_uid; + if (ia_valid & ATTR_GID) + vi->i_gid = attr->ia_gid; + if (ia_valid & ATTR_ATIME) + vi->i_atime = attr->ia_atime; + if (ia_valid & ATTR_MTIME) + vi->i_mtime = attr->ia_mtime; + if (ia_valid & ATTR_CTIME) + vi->i_ctime = attr->ia_ctime; + if (ia_valid & ATTR_MODE) { + vi->i_mode = attr->ia_mode; + if (!in_group_p(vi->i_gid) && + !capable(CAP_FSETID)) + vi->i_mode &= ~S_ISGID; + } + mark_inode_dirty(vi); + +trunc_err: + + unlock_kernel(); + + return err; +} + +#endif + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/inode.h b/reactos/drivers/fs/ntfs/linux-ntfs/inode.h new file mode 100644 index 00000000000..82b3b22556a --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/inode.h @@ -0,0 +1,251 @@ +/* + * inode.h - Defines for inode structures NTFS Linux kernel driver. Part of + * the Linux-NTFS project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_INODE_H +#define _LINUX_NTFS_INODE_H + +#include + +#include "layout.h" +#include "volume.h" + +typedef struct _ntfs_inode ntfs_inode; + +/* + * The NTFS in-memory inode structure. It is just used as an extension to the + * fields already provided in the VFS inode. + */ +struct _ntfs_inode { + s64 initialized_size; /* Copy from the attribute record. */ + s64 allocated_size; /* Copy from the attribute record. */ + unsigned long state; /* NTFS specific flags describing this inode. + See ntfs_inode_state_bits below. */ + unsigned long mft_no; /* Number of the mft record / inode. */ + u16 seq_no; /* Sequence number of the mft record. */ + atomic_t count; /* Inode reference count for book keeping. */ + ntfs_volume *vol; /* Pointer to the ntfs volume of this inode. */ + /* + * If NInoAttr() is true, the below fields describe the attribute which + * this fake inode belongs to. The actual inode of this attribute is + * pointed to by base_ntfs_ino and nr_extents is always set to -1 (see + * below). For real inodes, we also set the type (AT_DATA for files and + * AT_INDEX_ALLOCATION for directories), with the name = NULL and + * name_len = 0 for files and name = I30 (global constant) and + * name_len = 4 for directories. + */ + ATTR_TYPES type; /* Attribute type of this fake inode. */ + uchar_t *name; /* Attribute name of this fake inode. */ + u32 name_len; /* Attribute name length of this fake inode. */ + run_list run_list; /* If state has the NI_NonResident bit set, + the run list of the unnamed data attribute + (if a file) or of the index allocation + attribute (directory) or of the attribute + described by the fake inode (if NInoAttr()). + If run_list.rl is NULL, the run list has not + been read in yet or has been unmapped. If + NI_NonResident is clear, the attribute is + resident (file and fake inode) or there is + no $I30 index allocation attribute + (small directory). In the latter case + run_list.rl is always NULL.*/ + /* + * The following fields are only valid for real inodes and extent + * inodes. + */ + struct semaphore mrec_lock; /* Lock for serializing access to the + mft record belonging to this inode. */ + struct page *page; /* The page containing the mft record of the + inode. This should only be touched by the + (un)map_mft_record*() functions. */ + int page_ofs; /* Offset into the page at which the mft record + begins. This should only be touched by the + (un)map_mft_record*() functions. */ + /* + * Attribute list support (only for use by the attribute lookup + * functions). Setup during read_inode for all inodes with attribute + * lists. Only valid if NI_AttrList is set in state, and attr_list_rl is + * further only valid if NI_AttrListNonResident is set. + */ + u32 attr_list_size; /* Length of attribute list value in bytes. */ + u8 *attr_list; /* Attribute list value itself. */ + run_list attr_list_rl; /* Run list for the attribute list value. */ + union { + struct { /* It is a directory or $MFT. */ + struct inode *bmp_ino; /* Attribute inode for the + directory index $BITMAP. */ + u32 block_size; /* Size of an index block. */ + u32 vcn_size; /* Size of a vcn in this + directory index. */ + u8 block_size_bits; /* Log2 of the above. */ + u8 vcn_size_bits; /* Log2 of the above. */ + } index; + struct { /* It is a compressed file or fake inode. */ + s64 size; /* Copy of compressed_size from + $DATA. */ + u32 block_size; /* Size of a compression block + (cb). */ + u8 block_size_bits; /* Log2 of the size of a cb. */ + u8 block_clusters; /* Number of clusters per cb. */ + } compressed; + } itype; + struct semaphore extent_lock; /* Lock for accessing/modifying the + below . */ + s32 nr_extents; /* For a base mft record, the number of attached extent + inodes (0 if none), for extent records and for fake + inodes describing an attribute this is -1. */ + union { /* This union is only used if nr_extents != 0. */ + ntfs_inode **extent_ntfs_inos; /* For nr_extents > 0, array of + the ntfs inodes of the extent + mft records belonging to + this base inode which have + been loaded. */ + ntfs_inode *base_ntfs_ino; /* For nr_extents == -1, the + ntfs inode of the base mft + record. For fake inodes, the + real (base) inode to which + the attribute belongs. */ + } ext; +}; + +/* + * Defined bits for the state field in the ntfs_inode structure. + * (f) = files only, (d) = directories only, (a) = attributes/fake inodes only + */ +typedef enum { + NI_Dirty, /* 1: Mft record needs to be written to disk. */ + NI_AttrList, /* 1: Mft record contains an attribute list. */ + NI_AttrListNonResident, /* 1: Attribute list is non-resident. Implies + NI_AttrList is set. */ + + NI_Attr, /* 1: Fake inode for attribute i/o. + 0: Real inode or extent inode. */ + + NI_MstProtected, /* 1: Attribute is protected by MST fixups. + 0: Attribute is not protected by fixups. */ + NI_NonResident, /* 1: Unnamed data attr is non-resident (f). + 1: Attribute is non-resident (a). */ + NI_IndexAllocPresent = NI_NonResident, /* 1: $I30 index alloc attr is + present (d). */ + NI_Compressed, /* 1: Unnamed data attr is compressed (f). + 1: Create compressed files by default (d). + 1: Attribute is compressed (a). */ + NI_Encrypted, /* 1: Unnamed data attr is encrypted (f). + 1: Create encrypted files by default (d). + 1: Attribute is encrypted (a). */ + NI_Sparse, /* 1: Unnamed data attr is sparse (f). + 1: Create sparse files by default (d). + 1: Attribute is sparse (a). */ +} ntfs_inode_state_bits; + +/* + * NOTE: We should be adding dirty mft records to a list somewhere and they + * should be independent of the (ntfs/vfs) inode structure so that an inode can + * be removed but the record can be left dirty for syncing later. + */ + +/* + * Macro tricks to expand the NInoFoo(), NInoSetFoo(), and NInoClearFoo() + * functions. + */ +#define NINO_FNS(flag) \ +static inline int NIno##flag(ntfs_inode *ni) \ +{ \ + return test_bit(NI_##flag, &(ni)->state); \ +} \ +static inline void NInoSet##flag(ntfs_inode *ni) \ +{ \ + set_bit(NI_##flag, &(ni)->state); \ +} \ +static inline void NInoClear##flag(ntfs_inode *ni) \ +{ \ + clear_bit(NI_##flag, &(ni)->state); \ +} + +/* Emit the ntfs inode bitops functions. */ +NINO_FNS(Dirty) +NINO_FNS(AttrList) +NINO_FNS(AttrListNonResident) +NINO_FNS(Attr) +NINO_FNS(MstProtected) +NINO_FNS(NonResident) +NINO_FNS(IndexAllocPresent) +NINO_FNS(Compressed) +NINO_FNS(Encrypted) +NINO_FNS(Sparse) + +/* + * The full structure containing a ntfs_inode and a vfs struct inode. Used for + * all real and fake inodes but not for extent inodes which lack the vfs struct + * inode. + */ +typedef struct { + ntfs_inode ntfs_inode; + struct inode vfs_inode; /* The vfs inode structure. */ +} big_ntfs_inode; + +/** + * NTFS_I - return the ntfs inode given a vfs inode + * @inode: VFS inode + * + * NTFS_I() returns the ntfs inode associated with the VFS @inode. + */ +static inline ntfs_inode *NTFS_I(struct inode *inode) +{ + return (ntfs_inode *)list_entry(inode, big_ntfs_inode, vfs_inode); +} + +static inline struct inode *VFS_I(ntfs_inode *ni) +{ + return &((big_ntfs_inode *)ni)->vfs_inode; +} + +extern struct inode *ntfs_iget(struct super_block *sb, unsigned long mft_no); +extern struct inode *ntfs_attr_iget(struct inode *base_vi, ATTR_TYPES type, + uchar_t *name, u32 name_len); + +extern struct inode *ntfs_alloc_big_inode(struct super_block *sb); +extern void ntfs_destroy_big_inode(struct inode *inode); +extern void ntfs_clear_big_inode(struct inode *vi); + +extern ntfs_inode *ntfs_new_extent_inode(struct super_block *sb, + unsigned long mft_no); +extern void ntfs_clear_extent_inode(ntfs_inode *ni); + +extern void ntfs_read_inode_mount(struct inode *vi); + +extern void ntfs_dirty_inode(struct inode *vi); + +extern void ntfs_put_inode(struct inode *vi); + +extern int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt); + +#ifdef NTFS_RW + +extern void ntfs_truncate(struct inode *vi); + +extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr); + +#endif + +#endif /* _LINUX_NTFS_FS_INODE_H */ + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/layout.h b/reactos/drivers/fs/ntfs/linux-ntfs/layout.h new file mode 100644 index 00000000000..14c41b651d1 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/layout.h @@ -0,0 +1,2258 @@ +/* + * layout.h - All NTFS associated on-disk structures. Part of the Linux-NTFS + * project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_LAYOUT_H +#define _LINUX_NTFS_LAYOUT_H + +#include +#include +#include +#include + +#include "types.h" + +/* + * Constant endianness conversion defines. + */ +#define const_le16_to_cpu(x) __constant_le16_to_cpu(x) +#define const_le32_to_cpu(x) __constant_le32_to_cpu(x) +#define const_le64_to_cpu(x) __constant_le64_to_cpu(x) + +#define const_cpu_to_le16(x) __constant_cpu_to_le16(x) +#define const_cpu_to_le32(x) __constant_cpu_to_le32(x) +#define const_cpu_to_le64(x) __constant_cpu_to_le64(x) + +/* The NTFS oem_id "NTFS " */ +#define magicNTFS const_cpu_to_le64(0x202020205346544eULL) + +/* + * Location of bootsector on partition: + * The standard NTFS_BOOT_SECTOR is on sector 0 of the partition. + * On NT4 and above there is one backup copy of the boot sector to + * be found on the last sector of the partition (not normally accessible + * from within Windows as the bootsector contained number of sectors + * value is one less than the actual value!). + * On versions of NT 3.51 and earlier, the backup copy was located at + * number of sectors/2 (integer divide), i.e. in the middle of the volume. + */ + +/* + * BIOS parameter block (bpb) structure. + */ +typedef struct { + u16 bytes_per_sector; /* Size of a sector in bytes. */ + u8 sectors_per_cluster; /* Size of a cluster in sectors. */ + u16 reserved_sectors; /* zero */ + u8 fats; /* zero */ + u16 root_entries; /* zero */ + u16 sectors; /* zero */ + u8 media_type; /* 0xf8 = hard disk */ + u16 sectors_per_fat; /* zero */ + u16 sectors_per_track; /* irrelevant */ + u16 heads; /* irrelevant */ + u32 hidden_sectors; /* zero */ + u32 large_sectors; /* zero */ +} __attribute__ ((__packed__)) BIOS_PARAMETER_BLOCK; + +/* + * NTFS boot sector structure. + */ +typedef struct { + u8 jump[3]; /* Irrelevant (jump to boot up code).*/ + u64 oem_id; /* Magic "NTFS ". */ + BIOS_PARAMETER_BLOCK bpb; /* See BIOS_PARAMETER_BLOCK. */ + u8 unused[4]; /* zero, NTFS diskedit.exe states that + this is actually: + __u8 physical_drive; // 0x80 + __u8 current_head; // zero + __u8 extended_boot_signature; + // 0x80 + __u8 unused; // zero + */ +/*0x28*/s64 number_of_sectors; /* Number of sectors in volume. Gives + maximum volume size of 2^63 sectors. + Assuming standard sector size of 512 + bytes, the maximum byte size is + approx. 4.7x10^21 bytes. (-; */ + s64 mft_lcn; /* Cluster location of mft data. */ + s64 mftmirr_lcn; /* Cluster location of copy of mft. */ + s8 clusters_per_mft_record; /* Mft record size in clusters. */ + u8 reserved0[3]; /* zero */ + s8 clusters_per_index_record; /* Index block size in clusters. */ + u8 reserved1[3]; /* zero */ + u64 volume_serial_number; /* Irrelevant (serial number). */ + u32 checksum; /* Boot sector checksum. */ +/*0x54*/u8 bootstrap[426]; /* Irrelevant (boot up code). */ + u16 end_of_sector_marker; /* End of bootsector magic. Always is + 0xaa55 in little endian. */ +/* sizeof() = 512 (0x200) bytes */ +} __attribute__ ((__packed__)) NTFS_BOOT_SECTOR; + +/* + * Magic identifiers present at the beginning of all ntfs record containing + * records (like mft records for example). + */ +typedef enum { + magic_BAAD = const_cpu_to_le32(0x44414142), /* BAAD == corrupt record */ + magic_CHKD = const_cpu_to_le32(0x424b4843), /* CHKD == chkdsk ??? */ + magic_FILE = const_cpu_to_le32(0x454c4946), /* FILE == mft entry */ + magic_HOLE = const_cpu_to_le32(0x454c4f48), /* HOLE == ? (NTFS 3.0+?) */ + magic_INDX = const_cpu_to_le32(0x58444e49), /* INDX == index buffer */ +} NTFS_RECORD_TYPES; + +/* + * Generic magic comparison macros. Finally found a use for the ## preprocessor + * operator! (-8 + */ +#define is_magic(x, m) ( (u32)(x) == magic_##m ) +#define is_magicp(p, m) ( *(u32*)(p) == magic_##m ) + +/* + * Specialised magic comparison macros. + */ +#define is_baad_record(x) ( is_magic (x, BAAD) ) +#define is_baad_recordp(p) ( is_magicp(p, BAAD) ) +#define is_chkd_record(x) ( is_magic (x, CHKD) ) +#define is_chkd_recordp(p) ( is_magicp(p, CHKD) ) +#define is_file_record(x) ( is_magic (x, FILE) ) +#define is_file_recordp(p) ( is_magicp(p, FILE) ) +#define is_hole_record(x) ( is_magic (x, HOLE) ) +#define is_hole_recordp(p) ( is_magicp(p, HOLE) ) +#define is_indx_record(x) ( is_magic (x, INDX) ) +#define is_indx_recordp(p) ( is_magicp(p, INDX) ) + +#define is_mft_record(x) ( is_file_record(x) ) +#define is_mft_recordp(p) ( is_file_recordp(p) ) + +/* + * The Update Sequence Array (usa) is an array of the u16 values which belong + * to the end of each sector protected by the update sequence record in which + * this array is contained. Note that the first entry is the Update Sequence + * Number (usn), a cyclic counter of how many times the protected record has + * been written to disk. The values 0 and -1 (ie. 0xffff) are not used. All + * last u16's of each sector have to be equal to the usn (during reading) or + * are set to it (during writing). If they are not, an incomplete multi sector + * transfer has occurred when the data was written. + * The maximum size for the update sequence array is fixed to: + * maximum size = usa_ofs + (usa_count * 2) = 510 bytes + * The 510 bytes comes from the fact that the last u16 in the array has to + * (obviously) finish before the last u16 of the first 512-byte sector. + * This formula can be used as a consistency check in that usa_ofs + + * (usa_count * 2) has to be less than or equal to 510. + */ +typedef struct { + NTFS_RECORD_TYPES magic; /* A four-byte magic identifying the + record type and/or status. */ + u16 usa_ofs; /* Offset to the Update Sequence Array (usa) + from the start of the ntfs record. */ + u16 usa_count; /* Number of u16 sized entries in the usa + including the Update Sequence Number (usn), + thus the number of fixups is the usa_count + minus 1. */ +} __attribute__ ((__packed__)) NTFS_RECORD; + +/* + * System files mft record numbers. All these files are always marked as used + * in the bitmap attribute of the mft; presumably in order to avoid accidental + * allocation for random other mft records. Also, the sequence number for each + * of the system files is always equal to their mft record number and it is + * never modified. + */ +typedef enum { + FILE_MFT = 0, /* Master file table (mft). Data attribute + contains the entries and bitmap attribute + records which ones are in use (bit==1). */ + FILE_MFTMirr = 1, /* Mft mirror: copy of first four mft records + in data attribute. If cluster size > 4kiB, + copy of first N mft records, with + N = cluster_size / mft_record_size. */ + FILE_LogFile = 2, /* Journalling log in data attribute. */ + FILE_Volume = 3, /* Volume name attribute and volume information + attribute (flags and ntfs version). Windows + refers to this file as volume DASD (Direct + Access Storage Device). */ + FILE_AttrDef = 4, /* Array of attribute definitions in data + attribute. */ + FILE_root = 5, /* Root directory. */ + FILE_Bitmap = 6, /* Allocation bitmap of all clusters (lcns) in + data attribute. */ + FILE_Boot = 7, /* Boot sector (always at cluster 0) in data + attribute. */ + FILE_BadClus = 8, /* Contains all bad clusters in the non-resident + data attribute. */ + FILE_Secure = 9, /* Shared security descriptors in data attribute + and two indexes into the descriptors. + Appeared in Windows 2000. Before that, this + file was named $Quota but was unused. */ + FILE_UpCase = 10, /* Uppercase equivalents of all 65536 Unicode + characters in data attribute. */ + FILE_Extend = 11, /* Directory containing other system files (eg. + $ObjId, $Quota, $Reparse and $UsnJrnl). This + is new to NTFS3.0. */ + FILE_reserved12 = 12, /* Reserved for future use (records 12-15). */ + FILE_reserved13 = 13, + FILE_reserved14 = 14, + FILE_reserved15 = 15, + FILE_first_user = 16, /* First user file, used as test limit for + whether to allow opening a file or not. */ +} NTFS_SYSTEM_FILES; + +/* + * These are the so far known MFT_RECORD_* flags (16-bit) which contain + * information about the mft record in which they are present. + */ +typedef enum { + MFT_RECORD_IN_USE = const_cpu_to_le16(0x0001), + MFT_RECORD_IS_DIRECTORY = const_cpu_to_le16(0x0002), + MFT_REC_SPACE_FILLER = 0xffff /* Just to make flags 16-bit. */ +} __attribute__ ((__packed__)) MFT_RECORD_FLAGS; + +/* + * mft references (aka file references or file record segment references) are + * used whenever a structure needs to refer to a record in the mft. + * + * A reference consists of a 48-bit index into the mft and a 16-bit sequence + * number used to detect stale references. + * + * For error reporting purposes we treat the 48-bit index as a signed quantity. + * + * The sequence number is a circular counter (skipping 0) describing how many + * times the referenced mft record has been (re)used. This has to match the + * sequence number of the mft record being referenced, otherwise the reference + * is considered stale and removed (FIXME: only ntfsck or the driver itself?). + * + * If the sequence number is zero it is assumed that no sequence number + * consistency checking should be performed. + * + * FIXME: Since inodes are 32-bit as of now, the driver needs to always check + * for high_part being 0 and if not either BUG(), cause a panic() or handle + * the situation in some other way. This shouldn't be a problem as a volume has + * to become HUGE in order to need more than 32-bits worth of mft records. + * Assuming the standard mft record size of 1kb only the records (never mind + * the non-resident attributes, etc.) would require 4Tb of space on their own + * for the first 32 bits worth of records. This is only if some strange person + * doesn't decide to foul play and make the mft sparse which would be a really + * horrible thing to do as it would trash our current driver implementation. )-: + * Do I hear screams "we want 64-bit inodes!" ?!? (-; + * + * FIXME: The mft zone is defined as the first 12% of the volume. This space is + * reserved so that the mft can grow contiguously and hence doesn't become + * fragmented. Volume free space includes the empty part of the mft zone and + * when the volume's free 88% are used up, the mft zone is shrunk by a factor + * of 2, thus making more space available for more files/data. This process is + * repeated everytime there is no more free space except for the mft zone until + * there really is no more free space. + */ + +/* + * Typedef the MFT_REF as a 64-bit value for easier handling. + * Also define two unpacking macros to get to the reference (MREF) and + * sequence number (MSEQNO) respectively. + * The _LE versions are to be applied on little endian MFT_REFs. + * Note: The _LE versions will return a CPU endian formatted value! + */ +typedef enum { + MFT_REF_MASK_CPU = 0x0000ffffffffffffULL, + MFT_REF_MASK_LE = const_cpu_to_le64(0x0000ffffffffffffULL), +} MFT_REF_CONSTS; + +typedef u64 MFT_REF; + +#define MREF(x) ((unsigned long)((x) & MFT_REF_MASK_CPU)) +#define MSEQNO(x) ((u16)(((x) >> 48) & 0xffff)) +#define MREF_LE(x) ((unsigned long)(le64_to_cpu(x) & MFT_REF_MASK_CPU)) +#define MSEQNO_LE(x) ((u16)((le64_to_cpu(x) >> 48) & 0xffff)) + +#define IS_ERR_MREF(x) (((x) & 0x0000800000000000ULL) ? 1 : 0) +#define ERR_MREF(x) ((u64)((s64)(x))) +#define MREF_ERR(x) ((int)((s64)(x))) + +/* + * The mft record header present at the beginning of every record in the mft. + * This is followed by a sequence of variable length attribute records which + * is terminated by an attribute of type AT_END which is a truncated attribute + * in that it only consists of the attribute type code AT_END and none of the + * other members of the attribute structure are present. + */ +typedef struct { +/*Ofs*/ +/* 0 NTFS_RECORD; -- Unfolded here as gcc doesn't like unnamed structs. */ + NTFS_RECORD_TYPES magic;/* Usually the magic is "FILE". */ + u16 usa_ofs; /* See NTFS_RECORD definition above. */ + u16 usa_count; /* See NTFS_RECORD definition above. */ + +/* 8*/ u64 lsn; /* $LogFile sequence number for this record. + Changed every time the record is modified. */ +/* 16*/ u16 sequence_number; /* Number of times this mft record has been + reused. (See description for MFT_REF + above.) NOTE: The increment (skipping zero) + is done when the file is deleted. NOTE: If + this is zero it is left zero. */ +/* 18*/ u16 link_count; /* Number of hard links, i.e. the number of + directory entries referencing this record. + NOTE: Only used in mft base records. + NOTE: When deleting a directory entry we + check the link_count and if it is 1 we + delete the file. Otherwise we delete the + FILE_NAME_ATTR being referenced by the + directory entry from the mft record and + decrement the link_count. + FIXME: Careful with Win32 + DOS names! */ +/* 20*/ u16 attrs_offset; /* Byte offset to the first attribute in this + mft record from the start of the mft record. + NOTE: Must be aligned to 8-byte boundary. */ +/* 22*/ MFT_RECORD_FLAGS flags; /* Bit array of MFT_RECORD_FLAGS. When a file + is deleted, the MFT_RECORD_IN_USE flag is + set to zero. */ +/* 24*/ u32 bytes_in_use; /* Number of bytes used in this mft record. + NOTE: Must be aligned to 8-byte boundary. */ +/* 28*/ u32 bytes_allocated; /* Number of bytes allocated for this mft + record. This should be equal to the mft + record size. */ +/* 32*/ MFT_REF base_mft_record; /* This is zero for base mft records. + When it is not zero it is a mft reference + pointing to the base mft record to which + this record belongs (this is then used to + locate the attribute list attribute present + in the base record which describes this + extension record and hence might need + modification when the extension record + itself is modified, also locating the + attribute list also means finding the other + potential extents, belonging to the non-base + mft record). */ +/* 40*/ u16 next_attr_instance; /* The instance number that will be + assigned to the next attribute added to this + mft record. NOTE: Incremented each time + after it is used. NOTE: Every time the mft + record is reused this number is set to zero. + NOTE: The first instance number is always 0. + */ +/* sizeof() = 42 bytes */ +/* NTFS 3.1+ (Windows XP and above) introduce the following additions. */ +/* 42*/ //u16 reserved; /* Reserved/alignment. */ +/* 44*/ //u32 mft_record_number;/* Number of this mft record. */ +/* sizeof() = 48 bytes */ +/* + * When (re)using the mft record, we place the update sequence array at this + * offset, i.e. before we start with the attributes. This also makes sense, + * otherwise we could run into problems with the update sequence array + * containing in itself the last two bytes of a sector which would mean that + * multi sector transfer protection wouldn't work. As you can't protect data + * by overwriting it since you then can't get it back... + * When reading we obviously use the data from the ntfs record header. + */ +} __attribute__ ((__packed__)) MFT_RECORD; + +/* + * System defined attributes (32-bit). Each attribute type has a corresponding + * attribute name (Unicode string of maximum 64 character length) as described + * by the attribute definitions present in the data attribute of the $AttrDef + * system file. On NTFS 3.0 volumes the names are just as the types are named + * in the below enum exchanging AT_ for the dollar sign ($). If that isn't a + * revealing choice of symbol... (-; + */ +typedef enum { + AT_UNUSED = const_cpu_to_le32( 0), + AT_STANDARD_INFORMATION = const_cpu_to_le32( 0x10), + AT_ATTRIBUTE_LIST = const_cpu_to_le32( 0x20), + AT_FILE_NAME = const_cpu_to_le32( 0x30), + AT_OBJECT_ID = const_cpu_to_le32( 0x40), + AT_SECURITY_DESCRIPTOR = const_cpu_to_le32( 0x50), + AT_VOLUME_NAME = const_cpu_to_le32( 0x60), + AT_VOLUME_INFORMATION = const_cpu_to_le32( 0x70), + AT_DATA = const_cpu_to_le32( 0x80), + AT_INDEX_ROOT = const_cpu_to_le32( 0x90), + AT_INDEX_ALLOCATION = const_cpu_to_le32( 0xa0), + AT_BITMAP = const_cpu_to_le32( 0xb0), + AT_REPARSE_POINT = const_cpu_to_le32( 0xc0), + AT_EA_INFORMATION = const_cpu_to_le32( 0xd0), + AT_EA = const_cpu_to_le32( 0xe0), + AT_PROPERTY_SET = const_cpu_to_le32( 0xf0), + AT_LOGGED_UTILITY_STREAM = const_cpu_to_le32( 0x100), + AT_FIRST_USER_DEFINED_ATTRIBUTE = const_cpu_to_le32( 0x1000), + AT_END = const_cpu_to_le32(0xffffffff), +} ATTR_TYPES; + +/* + * The collation rules for sorting views/indexes/etc (32-bit). + * + * COLLATION_UNICODE_STRING - Collate Unicode strings by comparing their binary + * Unicode values, except that when a character can be uppercased, the + * upper case value collates before the lower case one. + * COLLATION_FILE_NAME - Collate file names as Unicode strings. The collation + * is done very much like COLLATION_UNICODE_STRING. In fact I have no idea + * what the difference is. Perhaps the difference is that file names + * would treat some special characters in an odd way (see + * unistr.c::ntfs_collate_names() and unistr.c::legal_ansi_char_array[] + * for what I mean but COLLATION_UNICODE_STRING would not give any special + * treatment to any characters at all, but this is speculation. + * COLLATION_NTOFS_ULONG - Sorting is done according to ascending u32 key + * values. E.g. used for $SII index in FILE_Secure, which sorts by + * security_id (u32). + * COLLATION_NTOFS_SID - Sorting is done according to ascending SID values. + * E.g. used for $O index in FILE_Extend/$Quota. + * COLLATION_NTOFS_SECURITY_HASH - Sorting is done first by ascending hash + * values and second by ascending security_id values. E.g. used for $SDH + * index in FILE_Secure. + * COLLATION_NTOFS_ULONGS - Sorting is done according to a sequence of ascending + * u32 key values. E.g. used for $O index in FILE_Extend/$ObjId, which + * sorts by object_id (16-byte), by splitting up the object_id in four + * u32 values and using them as individual keys. E.g. take the following + * two security_ids, stored as follows on disk: + * 1st: a1 61 65 b7 65 7b d4 11 9e 3d 00 e0 81 10 42 59 + * 2nd: 38 14 37 d2 d2 f3 d4 11 a5 21 c8 6b 79 b1 97 45 + * To compare them, they are split into four u32 values each, like so: + * 1st: 0xb76561a1 0x11d47b65 0xe0003d9e 0x59421081 + * 2nd: 0xd2371438 0x11d4f3d2 0x6bc821a5 0x4597b179 + * Now, it is apparent why the 2nd object_id collates after the 1st: the + * first u32 value of the 1st object_id is less than the first u32 of + * the 2nd object_id. If the first u32 values of both object_ids were + * equal then the second u32 values would be compared, etc. + */ +typedef enum { + COLLATION_BINARY = const_cpu_to_le32(0), /* Collate by binary + compare where the first byte is most + significant. */ + COLLATION_FILE_NAME = const_cpu_to_le32(1), /* Collate file names + as Unicode strings. */ + COLLATION_UNICODE_STRING = const_cpu_to_le32(2), /* Collate Unicode + strings by comparing their binary + Unicode values, except that when a + character can be uppercased, the upper + case value collates before the lower + case one. */ + COLLATION_NTOFS_ULONG = const_cpu_to_le32(16), + COLLATION_NTOFS_SID = const_cpu_to_le32(17), + COLLATION_NTOFS_SECURITY_HASH = const_cpu_to_le32(18), + COLLATION_NTOFS_ULONGS = const_cpu_to_le32(19), +} COLLATION_RULES; + +/* + * The flags (32-bit) describing attribute properties in the attribute + * definition structure. FIXME: This information is from Regis's information + * and, according to him, it is not certain and probably incomplete. + * The INDEXABLE flag is fairly certainly correct as only the file name + * attribute has this flag set and this is the only attribute indexed in NT4. + */ +typedef enum { + INDEXABLE = const_cpu_to_le32(0x02), /* Attribute can be + indexed. */ + NEED_TO_REGENERATE = const_cpu_to_le32(0x40), /* Need to regenerate + during regeneration + phase. */ + CAN_BE_NON_RESIDENT = const_cpu_to_le32(0x80), /* Attribute can be + non-resident. */ +} ATTR_DEF_FLAGS; + +/* + * The data attribute of FILE_AttrDef contains a sequence of attribute + * definitions for the NTFS volume. With this, it is supposed to be safe for an + * older NTFS driver to mount a volume containing a newer NTFS version without + * damaging it (that's the theory. In practice it's: not damaging it too much). + * Entries are sorted by attribute type. The flags describe whether the + * attribute can be resident/non-resident and possibly other things, but the + * actual bits are unknown. + */ +typedef struct { +/*hex ofs*/ +/* 0*/ uchar_t name[0x40]; /* Unicode name of the attribute. Zero + terminated. */ +/* 80*/ ATTR_TYPES type; /* Type of the attribute. */ +/* 84*/ u32 display_rule; /* Default display rule. + FIXME: What does it mean? (AIA) */ +/* 88*/ COLLATION_RULES collation_rule; /* Default collation rule. */ +/* 8c*/ ATTR_DEF_FLAGS flags; /* Flags describing the attribute. */ +/* 90*/ u64 min_size; /* Optional minimum attribute size. */ +/* 98*/ u64 max_size; /* Maximum size of attribute. */ +/* sizeof() = 0xa0 or 160 bytes */ +} __attribute__ ((__packed__)) ATTR_DEF; + +/* + * Attribute flags (16-bit). + */ +typedef enum { + ATTR_IS_COMPRESSED = const_cpu_to_le16(0x0001), + ATTR_COMPRESSION_MASK = const_cpu_to_le16(0x00ff), /* Compression + method mask. Also, first + illegal value. */ + ATTR_IS_ENCRYPTED = const_cpu_to_le16(0x4000), + ATTR_IS_SPARSE = const_cpu_to_le16(0x8000), +} __attribute__ ((__packed__)) ATTR_FLAGS; + +/* + * Attribute compression. + * + * Only the data attribute is ever compressed in the current ntfs driver in + * Windows. Further, compression is only applied when the data attribute is + * non-resident. Finally, to use compression, the maximum allowed cluster size + * on a volume is 4kib. + * + * The compression method is based on independently compressing blocks of X + * clusters, where X is determined from the compression_unit value found in the + * non-resident attribute record header (more precisely: X = 2^compression_unit + * clusters). On Windows NT/2k, X always is 16 clusters (compression_unit = 4). + * + * There are three different cases of how a compression block of X clusters + * can be stored: + * + * 1) The data in the block is all zero (a sparse block): + * This is stored as a sparse block in the run list, i.e. the run list + * entry has length = X and lcn = -1. The mapping pairs array actually + * uses a delta_lcn value length of 0, i.e. delta_lcn is not present at + * all, which is then interpreted by the driver as lcn = -1. + * NOTE: Even uncompressed files can be sparse on NTFS 3.0 volumes, then + * the same principles apply as above, except that the length is not + * restricted to being any particular value. + * + * 2) The data in the block is not compressed: + * This happens when compression doesn't reduce the size of the block + * in clusters. I.e. if compression has a small effect so that the + * compressed data still occupies X clusters, then the uncompressed data + * is stored in the block. + * This case is recognised by the fact that the run list entry has + * length = X and lcn >= 0. The mapping pairs array stores this as + * normal with a run length of X and some specific delta_lcn, i.e. + * delta_lcn has to be present. + * + * 3) The data in the block is compressed: + * The common case. This case is recognised by the fact that the run + * list entry has length L < X and lcn >= 0. The mapping pairs array + * stores this as normal with a run length of X and some specific + * delta_lcn, i.e. delta_lcn has to be present. This run list entry is + * immediately followed by a sparse entry with length = X - L and + * lcn = -1. The latter entry is to make up the vcn counting to the + * full compression block size X. + * + * In fact, life is more complicated because adjacent entries of the same type + * can be coalesced. This means that one has to keep track of the number of + * clusters handled and work on a basis of X clusters at a time being one + * block. An example: if length L > X this means that this particular run list + * entry contains a block of length X and part of one or more blocks of length + * L - X. Another example: if length L < X, this does not necessarily mean that + * the block is compressed as it might be that the lcn changes inside the block + * and hence the following run list entry describes the continuation of the + * potentially compressed block. The block would be compressed if the + * following run list entry describes at least X - L sparse clusters, thus + * making up the compression block length as described in point 3 above. (Of + * course, there can be several run list entries with small lengths so that the + * sparse entry does not follow the first data containing entry with + * length < X.) + * + * NOTE: At the end of the compressed attribute value, there most likely is not + * just the right amount of data to make up a compression block, thus this data + * is not even attempted to be compressed. It is just stored as is, unless + * the number of clusters it occupies is reduced when compressed in which case + * it is stored as a compressed compression block, complete with sparse + * clusters at the end. + */ + +/* + * Flags of resident attributes (8-bit). + */ +typedef enum { + RESIDENT_ATTR_IS_INDEXED = 0x01, /* Attribute is referenced in an index + (has implications for deleting and + modifying the attribute). */ +} __attribute__ ((__packed__)) RESIDENT_ATTR_FLAGS; + +/* + * Attribute record header. Always aligned to 8-byte boundary. + */ +typedef struct { +/*Ofs*/ +/* 0*/ ATTR_TYPES type; /* The (32-bit) type of the attribute. */ +/* 4*/ u32 length; /* Byte size of the resident part of the + attribute (aligned to 8-byte boundary). + Used to get to the next attribute. */ +/* 8*/ u8 non_resident; /* If 0, attribute is resident. + If 1, attribute is non-resident. */ +/* 9*/ u8 name_length; /* Unicode character size of name of attribute. + 0 if unnamed. */ +/* 10*/ u16 name_offset; /* If name_length != 0, the byte offset to the + beginning of the name from the attribute + record. Note that the name is stored as a + Unicode string. When creating, place offset + just at the end of the record header. Then, + follow with attribute value or mapping pairs + array, resident and non-resident attributes + respectively, aligning to an 8-byte + boundary. */ +/* 12*/ ATTR_FLAGS flags; /* Flags describing the attribute. */ +/* 14*/ u16 instance; /* The instance of this attribute record. This + number is unique within this mft record (see + MFT_RECORD/next_attribute_instance notes in + in mft.h for more details). */ +/* 16*/ union { + /* Resident attributes. */ + struct { +/* 16 */ u32 value_length; /* Byte size of attribute value. */ +/* 20 */ u16 value_offset; /* Byte offset of the attribute + value from the start of the + attribute record. When creating, + align to 8-byte boundary if we + have a name present as this might + not have a length of a multiple + of 8-bytes. */ +/* 22 */ RESIDENT_ATTR_FLAGS flags; /* See above. */ +/* 23 */ s8 reserved; /* Reserved/alignment to 8-byte + boundary. */ + } __attribute__ ((__packed__)) resident; + /* Non-resident attributes. */ + struct { +/* 16*/ VCN lowest_vcn; /* Lowest valid virtual cluster number + for this portion of the attribute value or + 0 if this is the only extent (usually the + case). - Only when an attribute list is used + does lowest_vcn != 0 ever occur. */ +/* 24*/ VCN highest_vcn; /* Highest valid vcn of this extent of + the attribute value. - Usually there is only one + portion, so this usually equals the attribute + value size in clusters minus 1. Can be -1 for + zero length files. Can be 0 for "single extent" + attributes. */ +/* 32*/ u16 mapping_pairs_offset; /* Byte offset from the + beginning of the structure to the mapping pairs + array which contains the mappings between the + vcns and the logical cluster numbers (lcns). + When creating, place this at the end of this + record header aligned to 8-byte boundary. */ +/* 34*/ u8 compression_unit; /* The compression unit expressed + as the log to the base 2 of the number of + clusters in a compression unit. 0 means not + compressed. (This effectively limits the + compression unit size to be a power of two + clusters.) WinNT4 only uses a value of 4. */ +/* 35*/ u8 reserved[5]; /* Align to 8-byte boundary. */ +/* The sizes below are only used when lowest_vcn is zero, as otherwise it would + be difficult to keep them up-to-date.*/ +/* 40*/ s64 allocated_size; /* Byte size of disk space + allocated to hold the attribute value. Always + is a multiple of the cluster size. When a file + is compressed, this field is a multiple of the + compression block size (2^compression_unit) and + it represents the logically allocated space + rather than the actual on disk usage. For this + use the compressed_size (see below). */ +/* 48*/ s64 data_size; /* Byte size of the attribute + value. Can be larger than allocated_size if + attribute value is compressed or sparse. */ +/* 56*/ s64 initialized_size; /* Byte size of initialized + portion of the attribute value. Usually equals + data_size. */ +/* sizeof(uncompressed attr) = 64*/ +/* 64*/ s64 compressed_size; /* Byte size of the attribute + value after compression. Only present when + compressed. Always is a multiple of the + cluster size. Represents the actual amount of + disk space being used on the disk. */ +/* sizeof(compressed attr) = 72*/ + } __attribute__ ((__packed__)) non_resident; + } __attribute__ ((__packed__)) data; +} __attribute__ ((__packed__)) ATTR_RECORD; + +typedef ATTR_RECORD ATTR_REC; + +/* + * File attribute flags (32-bit). + */ +typedef enum { + /* + * These flags are only present in the STANDARD_INFORMATION attribute + * (in the field file_attributes). + */ + FILE_ATTR_READONLY = const_cpu_to_le32(0x00000001), + FILE_ATTR_HIDDEN = const_cpu_to_le32(0x00000002), + FILE_ATTR_SYSTEM = const_cpu_to_le32(0x00000004), + /* Old DOS volid. Unused in NT. = cpu_to_le32(0x00000008), */ + + FILE_ATTR_DIRECTORY = const_cpu_to_le32(0x00000010), + /* FILE_ATTR_DIRECTORY is not considered valid in NT. It is reserved + for the DOS SUBDIRECTORY flag. */ + FILE_ATTR_ARCHIVE = const_cpu_to_le32(0x00000020), + FILE_ATTR_DEVICE = const_cpu_to_le32(0x00000040), + FILE_ATTR_NORMAL = const_cpu_to_le32(0x00000080), + + FILE_ATTR_TEMPORARY = const_cpu_to_le32(0x00000100), + FILE_ATTR_SPARSE_FILE = const_cpu_to_le32(0x00000200), + FILE_ATTR_REPARSE_POINT = const_cpu_to_le32(0x00000400), + FILE_ATTR_COMPRESSED = const_cpu_to_le32(0x00000800), + + FILE_ATTR_OFFLINE = const_cpu_to_le32(0x00001000), + FILE_ATTR_NOT_CONTENT_INDEXED = const_cpu_to_le32(0x00002000), + FILE_ATTR_ENCRYPTED = const_cpu_to_le32(0x00004000), + + FILE_ATTR_VALID_FLAGS = const_cpu_to_le32(0x00007fb7), + /* FILE_ATTR_VALID_FLAGS masks out the old DOS VolId and the + FILE_ATTR_DEVICE and preserves everything else. This mask + is used to obtain all flags that are valid for reading. */ + FILE_ATTR_VALID_SET_FLAGS = const_cpu_to_le32(0x000031a7), + /* FILE_ATTR_VALID_SET_FLAGS masks out the old DOS VolId, the + F_A_DEVICE, F_A_DIRECTORY, F_A_SPARSE_FILE, F_A_REPARSE_POINT, + F_A_COMPRESSED and F_A_ENCRYPTED and preserves the rest. This mask + is used to to obtain all flags that are valid for setting. */ + + /* + * These flags are only present in the FILE_NAME attribute (in the + * field file_attributes). + */ + FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT = const_cpu_to_le32(0x10000000), + /* This is a copy of the corresponding bit from the mft record, telling + us whether this is a directory or not, i.e. whether it has an + index root attribute or not. */ + FILE_ATTR_DUP_VIEW_INDEX_PRESENT = const_cpu_to_le32(0x20000000), + /* This is a copy of the corresponding bit from the mft record, telling + us whether this file has a view index present (eg. object id index, + quota index, one of the security indexes or the encrypting file + system related indexes). */ +} FILE_ATTR_FLAGS; + +/* + * NOTE on times in NTFS: All times are in MS standard time format, i.e. they + * are the number of 100-nanosecond intervals since 1st January 1601, 00:00:00 + * universal coordinated time (UTC). (In Linux time starts 1st January 1970, + * 00:00:00 UTC and is stored as the number of 1-second intervals since then.) + */ + +/* + * Attribute: Standard information (0x10). + * + * NOTE: Always resident. + * NOTE: Present in all base file records on a volume. + * NOTE: There is conflicting information about the meaning of each of the time + * fields but the meaning as defined below has been verified to be + * correct by practical experimentation on Windows NT4 SP6a and is hence + * assumed to be the one and only correct interpretation. + */ +typedef struct { +/*Ofs*/ +/* 0*/ s64 creation_time; /* Time file was created. Updated when + a filename is changed(?). */ +/* 8*/ s64 last_data_change_time; /* Time the data attribute was last + modified. */ +/* 16*/ s64 last_mft_change_time; /* Time this mft record was last + modified. */ +/* 24*/ s64 last_access_time; /* Approximate time when the file was + last accessed (obviously this is not + updated on read-only volumes). In + Windows this is only updated when + accessed if some time delta has + passed since the last update. Also, + last access times updates can be + disabled altogether for speed. */ +/* 32*/ FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */ +/* 36*/ union { + /* NTFS 1.2 */ + struct { + /* 36*/ u8 reserved12[12]; /* Reserved/alignment to 8-byte + boundary. */ + } __attribute__ ((__packed__)) v1; + /* sizeof() = 48 bytes */ + /* NTFS 3.x */ + struct { +/* + * If a volume has been upgraded from a previous NTFS version, then these + * fields are present only if the file has been accessed since the upgrade. + * Recognize the difference by comparing the length of the resident attribute + * value. If it is 48, then the following fields are missing. If it is 72 then + * the fields are present. Maybe just check like this: + * if (resident.ValueLength < sizeof(STANDARD_INFORMATION)) { + * Assume NTFS 1.2- format. + * If (volume version is 3.x) + * Upgrade attribute to NTFS 3.x format. + * else + * Use NTFS 1.2- format for access. + * } else + * Use NTFS 3.x format for access. + * Only problem is that it might be legal to set the length of the value to + * arbitrarily large values thus spoiling this check. - But chkdsk probably + * views that as a corruption, assuming that it behaves like this for all + * attributes. + */ + /* 36*/ u32 maximum_versions; /* Maximum allowed versions for + file. Zero if version numbering is disabled. */ + /* 40*/ u32 version_number; /* This file's version (if any). + Set to zero if maximum_versions is zero. */ + /* 44*/ u32 class_id; /* Class id from bidirectional + class id index (?). */ + /* 48*/ u32 owner_id; /* Owner_id of the user owning + the file. Translate via $Q index in FILE_Extend + /$Quota to the quota control entry for the user + owning the file. Zero if quotas are disabled. */ + /* 52*/ u32 security_id; /* Security_id for the file. + Translate via $SII index and $SDS data stream + in FILE_Secure to the security descriptor. */ + /* 56*/ u64 quota_charged; /* Byte size of the charge to + the quota for all streams of the file. Note: Is + zero if quotas are disabled. */ + /* 64*/ u64 usn; /* Last update sequence number + of the file. This is a direct index into the + change (aka usn) journal file. It is zero if + the usn journal is disabled. + NOTE: To disable the journal need to delete + the journal file itself and to then walk the + whole mft and set all Usn entries in all mft + records to zero! (This can take a while!) + The journal is FILE_Extend/$UsnJrnl. Win2k + will recreate the journal and initiate + logging if necessary when mounting the + partition. This, in contrast to disabling the + journal is a very fast process, so the user + won't even notice it. */ + } __attribute__ ((__packed__)) v3; + /* sizeof() = 72 bytes (NTFS 3.x) */ + } __attribute__ ((__packed__)) ver; +} __attribute__ ((__packed__)) STANDARD_INFORMATION; + +/* + * Attribute: Attribute list (0x20). + * + * - Can be either resident or non-resident. + * - Value consists of a sequence of variable length, 8-byte aligned, + * ATTR_LIST_ENTRY records. + * - The list is not terminated by anything at all! The only way to know when + * the end is reached is to keep track of the current offset and compare it to + * the attribute value size. + * - The attribute list attribute contains one entry for each attribute of + * the file in which the list is located, except for the list attribute + * itself. The list is sorted: first by attribute type, second by attribute + * name (if present), third by instance number. The extents of one + * non-resident attribute (if present) immediately follow after the initial + * extent. They are ordered by lowest_vcn and have their instace set to zero. + * It is not allowed to have two attributes with all sorting keys equal. + * - Further restrictions: + * - If not resident, the vcn to lcn mapping array has to fit inside the + * base mft record. + * - The attribute list attribute value has a maximum size of 256kb. This + * is imposed by the Windows cache manager. + * - Attribute lists are only used when the attributes of mft record do not + * fit inside the mft record despite all attributes (that can be made + * non-resident) having been made non-resident. This can happen e.g. when: + * - File has a large number of hard links (lots of file name + * attributes present). + * - The mapping pairs array of some non-resident attribute becomes so + * large due to fragmentation that it overflows the mft record. + * - The security descriptor is very complex (not applicable to + * NTFS 3.0 volumes). + * - There are many named streams. + */ +typedef struct { +/*Ofs*/ +/* 0*/ ATTR_TYPES type; /* Type of referenced attribute. */ +/* 4*/ u16 length; /* Byte size of this entry (8-byte aligned). */ +/* 6*/ u8 name_length; /* Size in Unicode chars of the name of the + attribute or 0 if unnamed. */ +/* 7*/ u8 name_offset; /* Byte offset to beginning of attribute name + (always set this to where the name would + start even if unnamed). */ +/* 8*/ VCN lowest_vcn; /* Lowest virtual cluster number of this portion + of the attribute value. This is usually 0. It + is non-zero for the case where one attribute + does not fit into one mft record and thus + several mft records are allocated to hold + this attribute. In the latter case, each mft + record holds one extent of the attribute and + there is one attribute list entry for each + extent. NOTE: This is DEFINITELY a signed + value! The windows driver uses cmp, followed + by jg when comparing this, thus it treats it + as signed. */ +/* 16*/ MFT_REF mft_reference; /* The reference of the mft record holding + the ATTR_RECORD for this portion of the + attribute value. */ +/* 24*/ u16 instance; /* If lowest_vcn = 0, the instance of the + attribute being referenced; otherwise 0. */ +/* 26*/ uchar_t name[0]; /* Use when creating only. When reading use + name_offset to determine the location of the + name. */ +/* sizeof() = 26 + (attribute_name_length * 2) bytes */ +} __attribute__ ((__packed__)) ATTR_LIST_ENTRY; + +/* + * The maximum allowed length for a file name. + */ +#define MAXIMUM_FILE_NAME_LENGTH 255 + +/* + * Possible namespaces for filenames in ntfs (8-bit). + */ +typedef enum { + FILE_NAME_POSIX = 0x00, + /* This is the largest namespace. It is case sensitive and + allows all Unicode characters except for: '\0' and '/'. + Beware that in WinNT/2k files which eg have the same name + except for their case will not be distinguished by the + standard utilities and thus a "del filename" will delete + both "filename" and "fileName" without warning. */ + FILE_NAME_WIN32 = 0x01, + /* The standard WinNT/2k NTFS long filenames. Case insensitive. + All Unicode chars except: '\0', '"', '*', '/', ':', '<', + '>', '?', '\' and '|'. Further, names cannot end with a '.' + or a space. */ + FILE_NAME_DOS = 0x02, + /* The standard DOS filenames (8.3 format). Uppercase only. + All 8-bit characters greater space, except: '"', '*', '+', + ',', '/', ':', ';', '<', '=', '>', '?' and '\'. */ + FILE_NAME_WIN32_AND_DOS = 0x03, + /* 3 means that both the Win32 and the DOS filenames are + identical and hence have been saved in this single filename + record. */ +} __attribute__ ((__packed__)) FILE_NAME_TYPE_FLAGS; + +/* + * Attribute: Filename (0x30). + * + * NOTE: Always resident. + * NOTE: All fields, except the parent_directory, are only updated when the + * filename is changed. Until then, they just become out of sync with + * reality and the more up to date values are present in the standard + * information attribute. + * NOTE: There is conflicting information about the meaning of each of the time + * fields but the meaning as defined below has been verified to be + * correct by practical experimentation on Windows NT4 SP6a and is hence + * assumed to be the one and only correct interpretation. + */ +typedef struct { +/*hex ofs*/ +/* 0*/ MFT_REF parent_directory; /* Directory this filename is + referenced from. */ +/* 8*/ s64 creation_time; /* Time file was created. */ +/* 10*/ s64 last_data_change_time; /* Time the data attribute was last + modified. */ +/* 18*/ s64 last_mft_change_time; /* Time this mft record was last + modified. */ +/* 20*/ s64 last_access_time; /* Last time this mft record was + accessed. */ +/* 28*/ s64 allocated_size; /* Byte size of allocated space for the + data attribute. NOTE: Is a multiple + of the cluster size. */ +/* 30*/ s64 data_size; /* Byte size of actual data in data + attribute. */ +/* 38*/ FILE_ATTR_FLAGS file_attributes; /* Flags describing the file. */ +/* 3c*/ union { + /* 3c*/ struct { + /* 3c*/ u16 packed_ea_size; /* Size of the buffer needed to + pack the extended attributes + (EAs), if such are present.*/ + /* 3e*/ u16 reserved; /* Reserved for alignment. */ + } __attribute__ ((__packed__)) ea; + /* 3c*/ struct { + /* 3c*/ u32 reparse_point_tag; /* Type of reparse point, + present only in reparse + points and only if there are + no EAs. */ + } __attribute__ ((__packed__)) rp; + } __attribute__ ((__packed__)) type; +/* 40*/ u8 file_name_length; /* Length of file name in + (Unicode) characters. */ +/* 41*/ FILE_NAME_TYPE_FLAGS file_name_type; /* Namespace of the file name.*/ +/* 42*/ uchar_t file_name[0]; /* File name in Unicode. */ +} __attribute__ ((__packed__)) FILE_NAME_ATTR; + +/* + * GUID structures store globally unique identifiers (GUID). A GUID is a + * 128-bit value consisting of one group of eight hexadecimal digits, followed + * by three groups of four hexadecimal digits each, followed by one group of + * twelve hexadecimal digits. GUIDs are Microsoft's implementation of the + * distributed computing environment (DCE) universally unique identifier (UUID). + * Example of a GUID: + * 1F010768-5A73-BC91-0010A52216A7 + */ +typedef struct { + u32 data1; /* The first eight hexadecimal digits of the GUID. */ + u16 data2; /* The first group of four hexadecimal digits. */ + u16 data3; /* The second group of four hexadecimal digits. */ + u8 data4[8]; /* The first two bytes are the third group of four + hexadecimal digits. The remaining six bytes are the + final 12 hexadecimal digits. */ +} __attribute__ ((__packed__)) GUID; + +/* + * FILE_Extend/$ObjId contains an index named $O. This index contains all + * object_ids present on the volume as the index keys and the corresponding + * mft_record numbers as the index entry data parts. The data part (defined + * below) also contains three other object_ids: + * birth_volume_id - object_id of FILE_Volume on which the file was first + * created. Optional (i.e. can be zero). + * birth_object_id - object_id of file when it was first created. Usually + * equals the object_id. Optional (i.e. can be zero). + * domain_id - Reserved (always zero). + */ +typedef struct { + MFT_REF mft_reference; /* Mft record containing the object_id in + the index entry key. */ + union { + struct { + GUID birth_volume_id; + GUID birth_object_id; + GUID domain_id; + } __attribute__ ((__packed__)) origin; + u8 extended_info[48]; + } __attribute__ ((__packed__)) opt; +} __attribute__ ((__packed__)) OBJ_ID_INDEX_DATA; + +/* + * Attribute: Object id (NTFS 3.0+) (0x40). + * + * NOTE: Always resident. + */ +typedef struct { + GUID object_id; /* Unique id assigned to the + file.*/ + /* The following fields are optional. The attribute value size is 16 + bytes, i.e. sizeof(GUID), if these are not present at all. Note, + the entries can be present but one or more (or all) can be zero + meaning that that particular value(s) is(are) not defined. */ + union { + struct { + GUID birth_volume_id; /* Unique id of volume on which + the file was first created.*/ + GUID birth_object_id; /* Unique id of file when it was + first created. */ + GUID domain_id; /* Reserved, zero. */ + } __attribute__ ((__packed__)) origin; + u8 extended_info[48]; + } __attribute__ ((__packed__)) opt; +} __attribute__ ((__packed__)) OBJECT_ID_ATTR; + +/* + * The pre-defined IDENTIFIER_AUTHORITIES used as SID_IDENTIFIER_AUTHORITY in + * the SID structure (see below). + */ +//typedef enum { /* SID string prefix. */ +// SECURITY_NULL_SID_AUTHORITY = {0, 0, 0, 0, 0, 0}, /* S-1-0 */ +// SECURITY_WORLD_SID_AUTHORITY = {0, 0, 0, 0, 0, 1}, /* S-1-1 */ +// SECURITY_LOCAL_SID_AUTHORITY = {0, 0, 0, 0, 0, 2}, /* S-1-2 */ +// SECURITY_CREATOR_SID_AUTHORITY = {0, 0, 0, 0, 0, 3}, /* S-1-3 */ +// SECURITY_NON_UNIQUE_AUTHORITY = {0, 0, 0, 0, 0, 4}, /* S-1-4 */ +// SECURITY_NT_SID_AUTHORITY = {0, 0, 0, 0, 0, 5}, /* S-1-5 */ +//} IDENTIFIER_AUTHORITIES; + +/* + * These relative identifiers (RIDs) are used with the above identifier + * authorities to make up universal well-known SIDs. + * + * Note: The relative identifier (RID) refers to the portion of a SID, which + * identifies a user or group in relation to the authority that issued the SID. + * For example, the universal well-known SID Creator Owner ID (S-1-3-0) is + * made up of the identifier authority SECURITY_CREATOR_SID_AUTHORITY (3) and + * the relative identifier SECURITY_CREATOR_OWNER_RID (0). + */ +typedef enum { /* Identifier authority. */ + SECURITY_NULL_RID = 0, /* S-1-0 */ + SECURITY_WORLD_RID = 0, /* S-1-1 */ + SECURITY_LOCAL_RID = 0, /* S-1-2 */ + + SECURITY_CREATOR_OWNER_RID = 0, /* S-1-3 */ + SECURITY_CREATOR_GROUP_RID = 1, /* S-1-3 */ + + SECURITY_CREATOR_OWNER_SERVER_RID = 2, /* S-1-3 */ + SECURITY_CREATOR_GROUP_SERVER_RID = 3, /* S-1-3 */ + + SECURITY_DIALUP_RID = 1, + SECURITY_NETWORK_RID = 2, + SECURITY_BATCH_RID = 3, + SECURITY_INTERACTIVE_RID = 4, + SECURITY_SERVICE_RID = 6, + SECURITY_ANONYMOUS_LOGON_RID = 7, + SECURITY_PROXY_RID = 8, + SECURITY_ENTERPRISE_CONTROLLERS_RID=9, + SECURITY_SERVER_LOGON_RID = 9, + SECURITY_PRINCIPAL_SELF_RID = 0xa, + SECURITY_AUTHENTICATED_USER_RID = 0xb, + SECURITY_RESTRICTED_CODE_RID = 0xc, + SECURITY_TERMINAL_SERVER_RID = 0xd, + + SECURITY_LOGON_IDS_RID = 5, + SECURITY_LOGON_IDS_RID_COUNT = 3, + + SECURITY_LOCAL_SYSTEM_RID = 0x12, + + SECURITY_NT_NON_UNIQUE = 0x15, + + SECURITY_BUILTIN_DOMAIN_RID = 0x20, + + /* + * Well-known domain relative sub-authority values (RIDs). + */ + + /* Users. */ + DOMAIN_USER_RID_ADMIN = 0x1f4, + DOMAIN_USER_RID_GUEST = 0x1f5, + DOMAIN_USER_RID_KRBTGT = 0x1f6, + + /* Groups. */ + DOMAIN_GROUP_RID_ADMINS = 0x200, + DOMAIN_GROUP_RID_USERS = 0x201, + DOMAIN_GROUP_RID_GUESTS = 0x202, + DOMAIN_GROUP_RID_COMPUTERS = 0x203, + DOMAIN_GROUP_RID_CONTROLLERS = 0x204, + DOMAIN_GROUP_RID_CERT_ADMINS = 0x205, + DOMAIN_GROUP_RID_SCHEMA_ADMINS = 0x206, + DOMAIN_GROUP_RID_ENTERPRISE_ADMINS= 0x207, + DOMAIN_GROUP_RID_POLICY_ADMINS = 0x208, + + /* Aliases. */ + DOMAIN_ALIAS_RID_ADMINS = 0x220, + DOMAIN_ALIAS_RID_USERS = 0x221, + DOMAIN_ALIAS_RID_GUESTS = 0x222, + DOMAIN_ALIAS_RID_POWER_USERS = 0x223, + + DOMAIN_ALIAS_RID_ACCOUNT_OPS = 0x224, + DOMAIN_ALIAS_RID_SYSTEM_OPS = 0x225, + DOMAIN_ALIAS_RID_PRINT_OPS = 0x226, + DOMAIN_ALIAS_RID_BACKUP_OPS = 0x227, + + DOMAIN_ALIAS_RID_REPLICATOR = 0x228, + DOMAIN_ALIAS_RID_RAS_SERVERS = 0x229, + DOMAIN_ALIAS_RID_PREW2KCOMPACCESS = 0x22a, +} RELATIVE_IDENTIFIERS; + +/* + * The universal well-known SIDs: + * + * NULL_SID S-1-0-0 + * WORLD_SID S-1-1-0 + * LOCAL_SID S-1-2-0 + * CREATOR_OWNER_SID S-1-3-0 + * CREATOR_GROUP_SID S-1-3-1 + * CREATOR_OWNER_SERVER_SID S-1-3-2 + * CREATOR_GROUP_SERVER_SID S-1-3-3 + * + * (Non-unique IDs) S-1-4 + * + * NT well-known SIDs: + * + * NT_AUTHORITY_SID S-1-5 + * DIALUP_SID S-1-5-1 + * + * NETWORD_SID S-1-5-2 + * BATCH_SID S-1-5-3 + * INTERACTIVE_SID S-1-5-4 + * SERVICE_SID S-1-5-6 + * ANONYMOUS_LOGON_SID S-1-5-7 (aka null logon session) + * PROXY_SID S-1-5-8 + * SERVER_LOGON_SID S-1-5-9 (aka domain controller account) + * SELF_SID S-1-5-10 (self RID) + * AUTHENTICATED_USER_SID S-1-5-11 + * RESTRICTED_CODE_SID S-1-5-12 (running restricted code) + * TERMINAL_SERVER_SID S-1-5-13 (running on terminal server) + * + * (Logon IDs) S-1-5-5-X-Y + * + * (NT non-unique IDs) S-1-5-0x15-... + * + * (Built-in domain) S-1-5-0x20 + */ + +/* + * The SID_IDENTIFIER_AUTHORITY is a 48-bit value used in the SID structure. + */ +typedef union { + struct { + u32 low; /* Low 32-bits. */ + u16 high; /* High 16-bits. */ + } __attribute__ ((__packed__)) parts; + u8 value[6]; /* Value as individual bytes. */ +} __attribute__ ((__packed__)) SID_IDENTIFIER_AUTHORITY; + +/* + * The SID structure is a variable-length structure used to uniquely identify + * users or groups. SID stands for security identifier. + * + * The standard textual representation of the SID is of the form: + * S-R-I-S-S... + * Where: + * - The first "S" is the literal character 'S' identifying the following + * digits as a SID. + * - R is the revision level of the SID expressed as a sequence of digits + * either in decimal or hexadecimal (if the later, prefixed by "0x"). + * - I is the 48-bit identifier_authority, expressed as digits as R above. + * - S... is one or more sub_authority values, expressed as digits as above. + * + * Example SID; the domain-relative SID of the local Administrators group on + * Windows NT/2k: + * S-1-5-32-544 + * This translates to a SID with: + * revision = 1, + * sub_authority_count = 2, + * identifier_authority = {0,0,0,0,0,5}, // SECURITY_NT_AUTHORITY + * sub_authority[0] = 32, // SECURITY_BUILTIN_DOMAIN_RID + * sub_authority[1] = 544 // DOMAIN_ALIAS_RID_ADMINS + */ +typedef struct { + u8 revision; + u8 sub_authority_count; + SID_IDENTIFIER_AUTHORITY identifier_authority; + u32 sub_authority[1]; /* At least one sub_authority. */ +} __attribute__ ((__packed__)) SID; + +/* + * Current constants for SIDs. + */ +typedef enum { + SID_REVISION = 1, /* Current revision level. */ + SID_MAX_SUB_AUTHORITIES = 15, /* Maximum number of those. */ + SID_RECOMMENDED_SUB_AUTHORITIES = 1, /* Will change to around 6 in + a future revision. */ +} SID_CONSTANTS; + +/* + * The predefined ACE types (8-bit, see below). + */ +typedef enum { + ACCESS_MIN_MS_ACE_TYPE = 0, + ACCESS_ALLOWED_ACE_TYPE = 0, + ACCESS_DENIED_ACE_TYPE = 1, + SYSTEM_AUDIT_ACE_TYPE = 2, + SYSTEM_ALARM_ACE_TYPE = 3, /* Not implemented as of Win2k. */ + ACCESS_MAX_MS_V2_ACE_TYPE = 3, + + ACCESS_ALLOWED_COMPOUND_ACE_TYPE= 4, + ACCESS_MAX_MS_V3_ACE_TYPE = 4, + + /* The following are Win2k only. */ + ACCESS_MIN_MS_OBJECT_ACE_TYPE = 5, + ACCESS_ALLOWED_OBJECT_ACE_TYPE = 5, + ACCESS_DENIED_OBJECT_ACE_TYPE = 6, + SYSTEM_AUDIT_OBJECT_ACE_TYPE = 7, + SYSTEM_ALARM_OBJECT_ACE_TYPE = 8, + ACCESS_MAX_MS_OBJECT_ACE_TYPE = 8, + + ACCESS_MAX_MS_V4_ACE_TYPE = 8, + + /* This one is for WinNT&2k. */ + ACCESS_MAX_MS_ACE_TYPE = 8, +} __attribute__ ((__packed__)) ACE_TYPES; + +/* + * The ACE flags (8-bit) for audit and inheritance (see below). + * + * SUCCESSFUL_ACCESS_ACE_FLAG is only used with system audit and alarm ACE + * types to indicate that a message is generated (in Windows!) for successful + * accesses. + * + * FAILED_ACCESS_ACE_FLAG is only used with system audit and alarm ACE types + * to indicate that a message is generated (in Windows!) for failed accesses. + */ +typedef enum { + /* The inheritance flags. */ + OBJECT_INHERIT_ACE = 0x01, + CONTAINER_INHERIT_ACE = 0x02, + NO_PROPAGATE_INHERIT_ACE = 0x04, + INHERIT_ONLY_ACE = 0x08, + INHERITED_ACE = 0x10, /* Win2k only. */ + VALID_INHERIT_FLAGS = 0x1f, + + /* The audit flags. */ + SUCCESSFUL_ACCESS_ACE_FLAG = 0x40, + FAILED_ACCESS_ACE_FLAG = 0x80, +} __attribute__ ((__packed__)) ACE_FLAGS; + +/* + * An ACE is an access-control entry in an access-control list (ACL). + * An ACE defines access to an object for a specific user or group or defines + * the types of access that generate system-administration messages or alarms + * for a specific user or group. The user or group is identified by a security + * identifier (SID). + * + * Each ACE starts with an ACE_HEADER structure (aligned on 4-byte boundary), + * which specifies the type and size of the ACE. The format of the subsequent + * data depends on the ACE type. + */ +typedef struct { +/*Ofs*/ +/* 0*/ ACE_TYPES type; /* Type of the ACE. */ +/* 1*/ ACE_FLAGS flags; /* Flags describing the ACE. */ +/* 2*/ u16 size; /* Size in bytes of the ACE. */ +} __attribute__ ((__packed__)) ACE_HEADER; + +/* + * The access mask (32-bit). Defines the access rights. + */ +typedef enum { + /* + * The specific rights (bits 0 to 15). Depend on the type of the + * object being secured by the ACE. + */ + + /* Specific rights for files and directories are as follows: */ + + /* Right to read data from the file. (FILE) */ + FILE_READ_DATA = const_cpu_to_le32(0x00000001), + /* Right to list contents of a directory. (DIRECTORY) */ + FILE_LIST_DIRECTORY = const_cpu_to_le32(0x00000001), + + /* Right to write data to the file. (FILE) */ + FILE_WRITE_DATA = const_cpu_to_le32(0x00000002), + /* Right to create a file in the directory. (DIRECTORY) */ + FILE_ADD_FILE = const_cpu_to_le32(0x00000002), + + /* Right to append data to the file. (FILE) */ + FILE_APPEND_DATA = const_cpu_to_le32(0x00000004), + /* Right to create a subdirectory. (DIRECTORY) */ + FILE_ADD_SUBDIRECTORY = const_cpu_to_le32(0x00000004), + + /* Right to read extended attributes. (FILE/DIRECTORY) */ + FILE_READ_EA = const_cpu_to_le32(0x00000008), + + /* Right to write extended attributes. (FILE/DIRECTORY) */ + FILE_WRITE_EA = const_cpu_to_le32(0x00000010), + + /* Right to execute a file. (FILE) */ + FILE_EXECUTE = const_cpu_to_le32(0x00000020), + /* Right to traverse the directory. (DIRECTORY) */ + FILE_TRAVERSE = const_cpu_to_le32(0x00000020), + + /* + * Right to delete a directory and all the files it contains (its + * children), even if the files are read-only. (DIRECTORY) + */ + FILE_DELETE_CHILD = const_cpu_to_le32(0x00000040), + + /* Right to read file attributes. (FILE/DIRECTORY) */ + FILE_READ_ATTRIBUTES = const_cpu_to_le32(0x00000080), + + /* Right to change file attributes. (FILE/DIRECTORY) */ + FILE_WRITE_ATTRIBUTES = const_cpu_to_le32(0x00000100), + + /* + * The standard rights (bits 16 to 23). Are independent of the type of + * object being secured. + */ + + /* Right to delete the object. */ + DELETE = const_cpu_to_le32(0x00010000), + + /* + * Right to read the information in the object's security descriptor, + * not including the information in the SACL. I.e. right to read the + * security descriptor and owner. + */ + READ_CONTROL = const_cpu_to_le32(0x00020000), + + /* Right to modify the DACL in the object's security descriptor. */ + WRITE_DAC = const_cpu_to_le32(0x00040000), + + /* Right to change the owner in the object's security descriptor. */ + WRITE_OWNER = const_cpu_to_le32(0x00080000), + + /* + * Right to use the object for synchronization. Enables a process to + * wait until the object is in the signalled state. Some object types + * do not support this access right. + */ + SYNCHRONIZE = const_cpu_to_le32(0x00100000), + + /* + * The following STANDARD_RIGHTS_* are combinations of the above for + * convenience and are defined by the Win32 API. + */ + + /* These are currently defined to READ_CONTROL. */ + STANDARD_RIGHTS_READ = const_cpu_to_le32(0x00020000), + STANDARD_RIGHTS_WRITE = const_cpu_to_le32(0x00020000), + STANDARD_RIGHTS_EXECUTE = const_cpu_to_le32(0x00020000), + + /* Combines DELETE, READ_CONTROL, WRITE_DAC, and WRITE_OWNER access. */ + STANDARD_RIGHTS_REQUIRED = const_cpu_to_le32(0x000f0000), + + /* + * Combines DELETE, READ_CONTROL, WRITE_DAC, WRITE_OWNER, and + * SYNCHRONIZE access. + */ + STANDARD_RIGHTS_ALL = const_cpu_to_le32(0x001f0000), + + /* + * The access system ACL and maximum allowed access types (bits 24 to + * 25, bits 26 to 27 are reserved). + */ + ACCESS_SYSTEM_SECURITY = const_cpu_to_le32(0x01000000), + MAXIMUM_ALLOWED = const_cpu_to_le32(0x02000000), + + /* + * The generic rights (bits 28 to 31). These map onto the standard and + * specific rights. + */ + + /* Read, write, and execute access. */ + GENERIC_ALL = const_cpu_to_le32(0x10000000), + + /* Execute access. */ + GENERIC_EXECUTE = const_cpu_to_le32(0x20000000), + + /* + * Write access. For files, this maps onto: + * FILE_APPEND_DATA | FILE_WRITE_ATTRIBUTES | FILE_WRITE_DATA | + * FILE_WRITE_EA | STANDARD_RIGHTS_WRITE | SYNCHRONIZE + * For directories, the mapping has the same numberical value. See + * above for the descriptions of the rights granted. + */ + GENERIC_WRITE = const_cpu_to_le32(0x40000000), + + /* + * Read access. For files, this maps onto: + * FILE_READ_ATTRIBUTES | FILE_READ_DATA | FILE_READ_EA | + * STANDARD_RIGHTS_READ | SYNCHRONIZE + * For directories, the mapping has the same numberical value. See + * above for the descriptions of the rights granted. + */ + GENERIC_READ = const_cpu_to_le32(0x80000000), +} ACCESS_MASK; + +/* + * The generic mapping array. Used to denote the mapping of each generic + * access right to a specific access mask. + * + * FIXME: What exactly is this and what is it for? (AIA) + */ +typedef struct { + ACCESS_MASK generic_read; + ACCESS_MASK generic_write; + ACCESS_MASK generic_execute; + ACCESS_MASK generic_all; +} __attribute__ ((__packed__)) GENERIC_MAPPING; + +/* + * The predefined ACE type structures are as defined below. + */ + +/* + * ACCESS_ALLOWED_ACE, ACCESS_DENIED_ACE, SYSTEM_AUDIT_ACE, SYSTEM_ALARM_ACE + */ +typedef struct { +/* 0 ACE_HEADER; -- Unfolded here as gcc doesn't like unnamed structs. */ + ACE_TYPES type; /* Type of the ACE. */ + ACE_FLAGS flags; /* Flags describing the ACE. */ + u16 size; /* Size in bytes of the ACE. */ +/* 4*/ ACCESS_MASK mask; /* Access mask associated with the ACE. */ + +/* 8*/ SID sid; /* The SID associated with the ACE. */ +} __attribute__ ((__packed__)) ACCESS_ALLOWED_ACE, ACCESS_DENIED_ACE, + SYSTEM_AUDIT_ACE, SYSTEM_ALARM_ACE; + +/* + * The object ACE flags (32-bit). + */ +typedef enum { + ACE_OBJECT_TYPE_PRESENT = const_cpu_to_le32(1), + ACE_INHERITED_OBJECT_TYPE_PRESENT = const_cpu_to_le32(2), +} OBJECT_ACE_FLAGS; + +typedef struct { +/* 0 ACE_HEADER; -- Unfolded here as gcc doesn't like unnamed structs. */ + ACE_TYPES type; /* Type of the ACE. */ + ACE_FLAGS flags; /* Flags describing the ACE. */ + u16 size; /* Size in bytes of the ACE. */ +/* 4*/ ACCESS_MASK mask; /* Access mask associated with the ACE. */ + +/* 8*/ OBJECT_ACE_FLAGS object_flags; /* Flags describing the object ACE. */ +/* 12*/ GUID object_type; +/* 28*/ GUID inherited_object_type; + +/* 44*/ SID sid; /* The SID associated with the ACE. */ +} __attribute__ ((__packed__)) ACCESS_ALLOWED_OBJECT_ACE, + ACCESS_DENIED_OBJECT_ACE, + SYSTEM_AUDIT_OBJECT_ACE, + SYSTEM_ALARM_OBJECT_ACE; + +/* + * An ACL is an access-control list (ACL). + * An ACL starts with an ACL header structure, which specifies the size of + * the ACL and the number of ACEs it contains. The ACL header is followed by + * zero or more access control entries (ACEs). The ACL as well as each ACE + * are aligned on 4-byte boundaries. + */ +typedef struct { + u8 revision; /* Revision of this ACL. */ + u8 alignment1; + u16 size; /* Allocated space in bytes for ACL. Includes this + header, the ACEs and the remaining free space. */ + u16 ace_count; /* Number of ACEs in the ACL. */ + u16 alignment2; +/* sizeof() = 8 bytes */ +} __attribute__ ((__packed__)) ACL; + +/* + * Current constants for ACLs. + */ +typedef enum { + /* Current revision. */ + ACL_REVISION = 2, + ACL_REVISION_DS = 4, + + /* History of revisions. */ + ACL_REVISION1 = 1, + MIN_ACL_REVISION = 2, + ACL_REVISION2 = 2, + ACL_REVISION3 = 3, + ACL_REVISION4 = 4, + MAX_ACL_REVISION = 4, +} ACL_CONSTANTS; + +/* + * The security descriptor control flags (16-bit). + * + * SE_OWNER_DEFAULTED - This boolean flag, when set, indicates that the + * SID pointed to by the Owner field was provided by a + * defaulting mechanism rather than explicitly provided by the + * original provider of the security descriptor. This may + * affect the treatment of the SID with respect to inheritence + * of an owner. + * + * SE_GROUP_DEFAULTED - This boolean flag, when set, indicates that the + * SID in the Group field was provided by a defaulting mechanism + * rather than explicitly provided by the original provider of + * the security descriptor. This may affect the treatment of + * the SID with respect to inheritence of a primary group. + * + * SE_DACL_PRESENT - This boolean flag, when set, indicates that the + * security descriptor contains a discretionary ACL. If this + * flag is set and the Dacl field of the SECURITY_DESCRIPTOR is + * null, then a null ACL is explicitly being specified. + * + * SE_DACL_DEFAULTED - This boolean flag, when set, indicates that the + * ACL pointed to by the Dacl field was provided by a defaulting + * mechanism rather than explicitly provided by the original + * provider of the security descriptor. This may affect the + * treatment of the ACL with respect to inheritence of an ACL. + * This flag is ignored if the DaclPresent flag is not set. + * + * SE_SACL_PRESENT - This boolean flag, when set, indicates that the + * security descriptor contains a system ACL pointed to by the + * Sacl field. If this flag is set and the Sacl field of the + * SECURITY_DESCRIPTOR is null, then an empty (but present) + * ACL is being specified. + * + * SE_SACL_DEFAULTED - This boolean flag, when set, indicates that the + * ACL pointed to by the Sacl field was provided by a defaulting + * mechanism rather than explicitly provided by the original + * provider of the security descriptor. This may affect the + * treatment of the ACL with respect to inheritence of an ACL. + * This flag is ignored if the SaclPresent flag is not set. + * + * SE_SELF_RELATIVE - This boolean flag, when set, indicates that the + * security descriptor is in self-relative form. In this form, + * all fields of the security descriptor are contiguous in memory + * and all pointer fields are expressed as offsets from the + * beginning of the security descriptor. + */ +typedef enum { + SE_OWNER_DEFAULTED = const_cpu_to_le16(0x0001), + SE_GROUP_DEFAULTED = const_cpu_to_le16(0x0002), + SE_DACL_PRESENT = const_cpu_to_le16(0x0004), + SE_DACL_DEFAULTED = const_cpu_to_le16(0x0008), + SE_SACL_PRESENT = const_cpu_to_le16(0x0010), + SE_SACL_DEFAULTED = const_cpu_to_le16(0x0020), + SE_DACL_AUTO_INHERIT_REQ = const_cpu_to_le16(0x0100), + SE_SACL_AUTO_INHERIT_REQ = const_cpu_to_le16(0x0200), + SE_DACL_AUTO_INHERITED = const_cpu_to_le16(0x0400), + SE_SACL_AUTO_INHERITED = const_cpu_to_le16(0x0800), + SE_DACL_PROTECTED = const_cpu_to_le16(0x1000), + SE_SACL_PROTECTED = const_cpu_to_le16(0x2000), + SE_RM_CONTROL_VALID = const_cpu_to_le16(0x4000), + SE_SELF_RELATIVE = const_cpu_to_le16(0x8000), +} __attribute__ ((__packed__)) SECURITY_DESCRIPTOR_CONTROL; + +/* + * Self-relative security descriptor. Contains the owner and group SIDs as well + * as the sacl and dacl ACLs inside the security descriptor itself. + */ +typedef struct { + u8 revision; /* Revision level of the security descriptor. */ + u8 alignment; + SECURITY_DESCRIPTOR_CONTROL control; /* Flags qualifying the type of + the descriptor as well as the following fields. */ + u32 owner; /* Byte offset to a SID representing an object's + owner. If this is NULL, no owner SID is present in + the descriptor. */ + u32 group; /* Byte offset to a SID representing an object's + primary group. If this is NULL, no primary group + SID is present in the descriptor. */ + u32 sacl; /* Byte offset to a system ACL. Only valid, if + SE_SACL_PRESENT is set in the control field. If + SE_SACL_PRESENT is set but sacl is NULL, a NULL ACL + is specified. */ + u32 dacl; /* Byte offset to a discretionary ACL. Only valid, if + SE_DACL_PRESENT is set in the control field. If + SE_DACL_PRESENT is set but dacl is NULL, a NULL ACL + (unconditionally granting access) is specified. */ +/* sizeof() = 0x14 bytes */ +} __attribute__ ((__packed__)) SECURITY_DESCRIPTOR_RELATIVE; + +/* + * Absolute security descriptor. Does not contain the owner and group SIDs, nor + * the sacl and dacl ACLs inside the security descriptor. Instead, it contains + * pointers to these structures in memory. Obviously, absolute security + * descriptors are only useful for in memory representations of security + * descriptors. On disk, a self-relative security descriptor is used. + */ +typedef struct { + u8 revision; /* Revision level of the security descriptor. */ + u8 alignment; + SECURITY_DESCRIPTOR_CONTROL control; /* Flags qualifying the type of + the descriptor as well as the following fields. */ + SID *owner; /* Points to a SID representing an object's owner. If + this is NULL, no owner SID is present in the + descriptor. */ + SID *group; /* Points to a SID representing an object's primary + group. If this is NULL, no primary group SID is + present in the descriptor. */ + ACL *sacl; /* Points to a system ACL. Only valid, if + SE_SACL_PRESENT is set in the control field. If + SE_SACL_PRESENT is set but sacl is NULL, a NULL ACL + is specified. */ + ACL *dacl; /* Points to a discretionary ACL. Only valid, if + SE_DACL_PRESENT is set in the control field. If + SE_DACL_PRESENT is set but dacl is NULL, a NULL ACL + (unconditionally granting access) is specified. */ +} __attribute__ ((__packed__)) SECURITY_DESCRIPTOR; + +/* + * Current constants for security descriptors. + */ +typedef enum { + /* Current revision. */ + SECURITY_DESCRIPTOR_REVISION = 1, + SECURITY_DESCRIPTOR_REVISION1 = 1, + + /* The sizes of both the absolute and relative security descriptors is + the same as pointers, at least on ia32 architecture are 32-bit. */ + SECURITY_DESCRIPTOR_MIN_LENGTH = sizeof(SECURITY_DESCRIPTOR), +} SECURITY_DESCRIPTOR_CONSTANTS; + +/* + * Attribute: Security descriptor (0x50). A standard self-relative security + * descriptor. + * + * NOTE: Can be resident or non-resident. + * NOTE: Not used in NTFS 3.0+, as security descriptors are stored centrally + * in FILE_Secure and the correct descriptor is found using the security_id + * from the standard information attribute. + */ +typedef SECURITY_DESCRIPTOR_RELATIVE SECURITY_DESCRIPTOR_ATTR; + +/* + * On NTFS 3.0+, all security descriptors are stored in FILE_Secure. Only one + * referenced instance of each unique security descriptor is stored. + * + * FILE_Secure contains no unnamed data attribute, i.e. it has zero length. It + * does, however, contain two indexes ($SDH and $SII) as well as a named data + * stream ($SDS). + * + * Every unique security descriptor is assigned a unique security identifier + * (security_id, not to be confused with a SID). The security_id is unique for + * the NTFS volume and is used as an index into the $SII index, which maps + * security_ids to the security descriptor's storage location within the $SDS + * data attribute. The $SII index is sorted by ascending security_id. + * + * A simple hash is computed from each security descriptor. This hash is used + * as an index into the $SDH index, which maps security descriptor hashes to + * the security descriptor's storage location within the $SDS data attribute. + * The $SDH index is sorted by security descriptor hash and is stored in a B+ + * tree. When searching $SDH (with the intent of determining whether or not a + * new security descriptor is already present in the $SDS data stream), if a + * matching hash is found, but the security descriptors do not match, the + * search in the $SDH index is continued, searching for a next matching hash. + * + * When a precise match is found, the security_id coresponding to the security + * descriptor in the $SDS attribute is read from the found $SDH index entry and + * is stored in the $STANDARD_INFORMATION attribute of the file/directory to + * which the security descriptor is being applied. The $STANDARD_INFORMATION + * attribute is present in all base mft records (i.e. in all files and + * directories). + * + * If a match is not found, the security descriptor is assigned a new unique + * security_id and is added to the $SDS data attribute. Then, entries + * referencing the this security descriptor in the $SDS data attribute are + * added to the $SDH and $SII indexes. + * + * Note: Entries are never deleted from FILE_Secure, even if nothing + * references an entry any more. + */ + +/* + * This header precedes each security descriptor in the $SDS data stream. + * This is also the index entry data part of both the $SII and $SDH indexes. + */ +typedef struct { + u32 hash; /* Hash of the security descriptor. */ + u32 security_id; /* The security_id assigned to the descriptor. */ + u64 offset; /* Byte offset of this entry in the $SDS stream. */ + u32 length; /* Size in bytes of this entry in $SDS stream. */ +} __attribute__ ((__packed__)) SECURITY_DESCRIPTOR_HEADER; + +/* + * The $SDS data stream contains the security descriptors, aligned on 16-byte + * boundaries, sorted by security_id in a B+ tree. Security descriptors cannot + * cross 256kib boundaries (this restriction is imposed by the Windows cache + * manager). Each security descriptor is contained in a SDS_ENTRY structure. + * Also, each security descriptor is stored twice in the $SDS stream with a + * fixed offset of 0x40000 bytes (256kib, the Windows cache manager's max size) + * between them; i.e. if a SDS_ENTRY specifies an offset of 0x51d0, then the + * the first copy of the security descriptor will be at offset 0x51d0 in the + * $SDS data stream and the second copy will be at offset 0x451d0. + */ +typedef struct { +/*Ofs*/ +/* 0 SECURITY_DESCRIPTOR_HEADER; -- Unfolded here as gcc doesn't like + unnamed structs. */ + u32 hash; /* Hash of the security descriptor. */ + u32 security_id; /* The security_id assigned to the descriptor. */ + u64 offset; /* Byte offset of this entry in the $SDS stream. */ + u32 length; /* Size in bytes of this entry in $SDS stream. */ +/* 20*/ SECURITY_DESCRIPTOR_RELATIVE sid; /* The self-relative security + descriptor. */ +} __attribute__ ((__packed__)) SDS_ENTRY; + +/* + * The index entry key used in the $SII index. The collation type is + * COLLATION_NTOFS_ULONG. + */ +typedef struct { + u32 security_id; /* The security_id assigned to the descriptor. */ +} __attribute__ ((__packed__)) SII_INDEX_KEY; + +/* + * The index entry key used in the $SDH index. The keys are sorted first by + * hash and then by security_id. The collation rule is + * COLLATION_NTOFS_SECURITY_HASH. + */ +typedef struct { + u32 hash; /* Hash of the security descriptor. */ + u32 security_id; /* The security_id assigned to the descriptor. */ +} __attribute__ ((__packed__)) SDH_INDEX_KEY; + +/* + * Attribute: Volume name (0x60). + * + * NOTE: Always resident. + * NOTE: Present only in FILE_Volume. + */ +typedef struct { + uchar_t name[0]; /* The name of the volume in Unicode. */ +} __attribute__ ((__packed__)) VOLUME_NAME; + +/* + * Possible flags for the volume (16-bit). + */ +typedef enum { + VOLUME_IS_DIRTY = const_cpu_to_le16(0x0001), + VOLUME_RESIZE_LOG_FILE = const_cpu_to_le16(0x0002), + VOLUME_UPGRADE_ON_MOUNT = const_cpu_to_le16(0x0004), + VOLUME_MOUNTED_ON_NT4 = const_cpu_to_le16(0x0008), + VOLUME_DELETE_USN_UNDERWAY = const_cpu_to_le16(0x0010), + VOLUME_REPAIR_OBJECT_ID = const_cpu_to_le16(0x0020), + VOLUME_MODIFIED_BY_CHKDSK = const_cpu_to_le16(0x8000), + VOLUME_FLAGS_MASK = const_cpu_to_le16(0x803f), +} __attribute__ ((__packed__)) VOLUME_FLAGS; + +/* + * Attribute: Volume information (0x70). + * + * NOTE: Always resident. + * NOTE: Present only in FILE_Volume. + * NOTE: Windows 2000 uses NTFS 3.0 while Windows NT4 service pack 6a uses + * NTFS 1.2. I haven't personally seen other values yet. + */ +typedef struct { + u64 reserved; /* Not used (yet?). */ + u8 major_ver; /* Major version of the ntfs format. */ + u8 minor_ver; /* Minor version of the ntfs format. */ + VOLUME_FLAGS flags; /* Bit array of VOLUME_* flags. */ +} __attribute__ ((__packed__)) VOLUME_INFORMATION; + +/* + * Attribute: Data attribute (0x80). + * + * NOTE: Can be resident or non-resident. + * + * Data contents of a file (i.e. the unnamed stream) or of a named stream. + */ +typedef struct { + u8 data[0]; /* The file's data contents. */ +} __attribute__ ((__packed__)) DATA_ATTR; + +/* + * Index header flags (8-bit). + */ +typedef enum { + /* When index header is in an index root attribute: */ + SMALL_INDEX = 0, /* The index is small enough to fit inside the + index root attribute and there is no index + allocation attribute present. */ + LARGE_INDEX = 1, /* The index is too large to fit in the index + root attribute and/or an index allocation + attribute is present. */ + /* + * When index header is in an index block, i.e. is part of index + * allocation attribute: + */ + LEAF_NODE = 0, /* This is a leaf node, i.e. there are no more + nodes branching off it. */ + INDEX_NODE = 1, /* This node indexes other nodes, i.e. is not a + leaf node. */ + NODE_MASK = 1, /* Mask for accessing the *_NODE bits. */ +} __attribute__ ((__packed__)) INDEX_HEADER_FLAGS; + +/* + * This is the header for indexes, describing the INDEX_ENTRY records, which + * follow the INDEX_HEADER. Together the index header and the index entries + * make up a complete index. + * + * IMPORTANT NOTE: The offset, length and size structure members are counted + * relative to the start of the index header structure and not relative to the + * start of the index root or index allocation structures themselves. + */ +typedef struct { + u32 entries_offset; /* Byte offset to first INDEX_ENTRY + aligned to 8-byte boundary. */ + u32 index_length; /* Data size of the index in bytes, + i.e. bytes used from allocated + size, aligned to 8-byte boundary. */ + u32 allocated_size; /* Byte size of this index (block), + multiple of 8 bytes. */ + /* NOTE: For the index root attribute, the above two numbers are always + equal, as the attribute is resident and it is resized as needed. In + the case of the index allocation attribute the attribute is not + resident and hence the allocated_size is a fixed value and must + equal the index_block_size specified by the INDEX_ROOT attribute + corresponding to the INDEX_ALLOCATION attribute this INDEX_BLOCK + belongs to. */ + INDEX_HEADER_FLAGS flags; /* Bit field of INDEX_HEADER_FLAGS. */ + u8 reserved[3]; /* Reserved/align to 8-byte boundary. */ +} __attribute__ ((__packed__)) INDEX_HEADER; + +/* + * Attribute: Index root (0x90). + * + * NOTE: Always resident. + * + * This is followed by a sequence of index entries (INDEX_ENTRY structures) + * as described by the index header. + * + * When a directory is small enough to fit inside the index root then this + * is the only attribute describing the directory. When the directory is too + * large to fit in the index root, on the other hand, two aditional attributes + * are present: an index allocation attribute, containing sub-nodes of the B+ + * directory tree (see below), and a bitmap attribute, describing which virtual + * cluster numbers (vcns) in the index allocation attribute are in use by an + * index block. + * + * NOTE: The root directory (FILE_root) contains an entry for itself. Other + * dircetories do not contain entries for themselves, though. + */ +typedef struct { + ATTR_TYPES type; /* Type of the indexed attribute. Is + $FILE_NAME for directories, zero + for view indexes. No other values + allowed. */ + COLLATION_RULES collation_rule; /* Collation rule used to sort the + index entries. If type is $FILE_NAME, + this must be COLLATION_FILE_NAME. */ + u32 index_block_size; /* Size of each index block in bytes (in + the index allocation attribute). */ + u8 clusters_per_index_block; /* Cluster size of each index block (in + the index allocation attribute), when + an index block is >= than a cluster, + otherwise this will be the log of + the size (like how the encoding of + the mft record size and the index + record size found in the boot sector + work). Has to be a power of 2. */ + u8 reserved[3]; /* Reserved/align to 8-byte boundary. */ + INDEX_HEADER index; /* Index header describing the + following index entries. */ +} __attribute__ ((__packed__)) INDEX_ROOT; + +/* + * Attribute: Index allocation (0xa0). + * + * NOTE: Always non-resident (doesn't make sense to be resident anyway!). + * + * This is an array of index blocks. Each index block starts with an + * INDEX_BLOCK structure containing an index header, followed by a sequence of + * index entries (INDEX_ENTRY structures), as described by the INDEX_HEADER. + */ +typedef struct { +/* 0 NTFS_RECORD; -- Unfolded here as gcc doesn't like unnamed structs. */ + NTFS_RECORD_TYPES magic;/* Magic is "INDX". */ + u16 usa_ofs; /* See NTFS_RECORD definition. */ + u16 usa_count; /* See NTFS_RECORD definition. */ + +/* 8*/ s64 lsn; /* $LogFile sequence number of the last + modification of this index block. */ +/* 16*/ VCN index_block_vcn; /* Virtual cluster number of the index block. + If the cluster_size on the volume is <= the + index_block_size of the directory, + index_block_vcn counts in units of clusters, + and in units of sectors otherwise. */ +/* 24*/ INDEX_HEADER index; /* Describes the following index entries. */ +/* sizeof()= 40 (0x28) bytes */ +/* + * When creating the index block, we place the update sequence array at this + * offset, i.e. before we start with the index entries. This also makes sense, + * otherwise we could run into problems with the update sequence array + * containing in itself the last two bytes of a sector which would mean that + * multi sector transfer protection wouldn't work. As you can't protect data + * by overwriting it since you then can't get it back... + * When reading use the data from the ntfs record header. + */ +} __attribute__ ((__packed__)) INDEX_BLOCK; + +typedef INDEX_BLOCK INDEX_ALLOCATION; + +/* + * The system file FILE_Extend/$Reparse contains an index named $R listing + * all reparse points on the volume. The index entry keys are as defined + * below. Note, that there is no index data associated with the index entries. + * + * The index entries are sorted by the index key file_id. The collation rule is + * COLLATION_NTOFS_ULONGS. FIXME: Verify whether the reparse_tag is not the + * primary key / is not a key at all. (AIA) + */ +typedef struct { + u32 reparse_tag; /* Reparse point type (inc. flags). */ + MFT_REF file_id; /* Mft record of the file containing the + reparse point attribute. */ +} __attribute__ ((__packed__)) REPARSE_INDEX_KEY; + +/* + * Quota flags (32-bit). + */ +typedef enum { + /* The user quota flags. Names explain meaning. */ + QUOTA_FLAG_DEFAULT_LIMITS = const_cpu_to_le32(0x00000001), + QUOTA_FLAG_LIMIT_REACHED = const_cpu_to_le32(0x00000002), + QUOTA_FLAG_ID_DELETED = const_cpu_to_le32(0x00000004), + + QUOTA_FLAG_USER_MASK = const_cpu_to_le32(0x00000007), + /* Bit mask for user quota flags. */ + + /* These flags are only present in the quota defaults index entry, + i.e. in the entry where owner_id = QUOTA_DEFAULTS_ID. */ + QUOTA_FLAG_TRACKING_ENABLED = const_cpu_to_le32(0x00000010), + QUOTA_FLAG_ENFORCEMENT_ENABLED = const_cpu_to_le32(0x00000020), + QUOTA_FLAG_TRACKING_REQUESTED = const_cpu_to_le32(0x00000040), + QUOTA_FLAG_LOG_THRESHOLD = const_cpu_to_le32(0x00000080), + QUOTA_FLAG_LOG_LIMIT = const_cpu_to_le32(0x00000100), + QUOTA_FLAG_OUT_OF_DATE = const_cpu_to_le32(0x00000200), + QUOTA_FLAG_CORRUPT = const_cpu_to_le32(0x00000400), + QUOTA_FLAG_PENDING_DELETES = const_cpu_to_le32(0x00000800), +} QUOTA_FLAGS; + +/* + * The system file FILE_Extend/$Quota contains two indexes $O and $Q. Quotas + * are on a per volume and per user basis. + * + * The $Q index contains one entry for each existing user_id on the volume. The + * index key is the user_id of the user/group owning this quota control entry, + * i.e. the key is the owner_id. The user_id of the owner of a file, i.e. the + * owner_id, is found in the standard information attribute. The collation rule + * for $Q is COLLATION_NTOFS_ULONG. + * + * The $O index contains one entry for each user/group who has been assigned + * a quota on that volume. The index key holds the SID of the user_id the + * entry belongs to, i.e. the owner_id. The collation rule for $O is + * COLLATION_NTOFS_SID. + * + * The $O index entry data is the user_id of the user corresponding to the SID. + * This user_id is used as an index into $Q to find the quota control entry + * associated with the SID. + * + * The $Q index entry data is the quota control entry and is defined below. + */ +typedef struct { + u32 version; /* Currently equals 2. */ + QUOTA_FLAGS flags; /* Flags describing this quota entry. */ + u64 bytes_used; /* How many bytes of the quota are in use. */ + s64 change_time; /* Last time this quota entry was changed. */ + s64 threshold; /* Soft quota (-1 if not limited). */ + s64 limit; /* Hard quota (-1 if not limited). */ + s64 exceeded_time; /* How long the soft quota has been exceeded. */ + SID sid; /* The SID of the user/object associated with + this quota entry. Equals zero for the quota + defaults entry. */ +} __attribute__ ((__packed__)) QUOTA_CONTROL_ENTRY; + +/* + * Predefined owner_id values (32-bit). + */ +typedef enum { + QUOTA_INVALID_ID = const_cpu_to_le32(0x00000000), + QUOTA_DEFAULTS_ID = const_cpu_to_le32(0x00000001), + QUOTA_FIRST_USER_ID = const_cpu_to_le32(0x00000100), +} PREDEFINED_OWNER_IDS; + +/* + * Index entry flags (16-bit). + */ +typedef enum { + INDEX_ENTRY_NODE = const_cpu_to_le16(1), /* This entry contains a sub-node, + i.e. a reference to an index + block in form of a virtual + cluster number (see below). */ + INDEX_ENTRY_END = const_cpu_to_le16(2), /* This signifies the last entry in + an index block. The index entry + does not represent a file but it + can point to a sub-node. */ + INDEX_ENTRY_SPACE_FILLER = 0xffff, /* Just to force 16-bit width. */ +} __attribute__ ((__packed__)) INDEX_ENTRY_FLAGS; + +/* + * This the index entry header (see below). + */ +typedef struct { +/* 0*/ union { + struct { /* Only valid when INDEX_ENTRY_END is not set. */ + MFT_REF indexed_file; /* The mft reference of the file + described by this index + entry. Used for directory + indexes. */ + } __attribute__ ((__packed__)) dir; + struct { /* Used for views/indexes to find the entry's data. */ + u16 data_offset; /* Data byte offset from this + INDEX_ENTRY. Follows the + index key. */ + u16 data_length; /* Data length in bytes. */ + u32 reservedV; /* Reserved (zero). */ + } __attribute__ ((__packed__)) vi; + } __attribute__ ((__packed__)) data; +/* 8*/ u16 length; /* Byte size of this index entry, multiple of + 8-bytes. */ +/* 10*/ u16 key_length; /* Byte size of the key value, which is in the + index entry. It follows field reserved. Not + multiple of 8-bytes. */ +/* 12*/ INDEX_ENTRY_FLAGS flags; /* Bit field of INDEX_ENTRY_* flags. */ +/* 14*/ u16 reserved; /* Reserved/align to 8-byte boundary. */ +/* sizeof() = 16 bytes */ +} __attribute__ ((__packed__)) INDEX_ENTRY_HEADER; + +/* + * This is an index entry. A sequence of such entries follows each INDEX_HEADER + * structure. Together they make up a complete index. The index follows either + * an index root attribute or an index allocation attribute. + * + * NOTE: Before NTFS 3.0 only filename attributes were indexed. + */ +typedef struct { +/*Ofs*/ +/* 0 INDEX_ENTRY_HEADER; -- Unfolded here as gcc dislikes unnamed structs. */ + union { + struct { /* Only valid when INDEX_ENTRY_END is not set. */ + MFT_REF indexed_file; /* The mft reference of the file + described by this index + entry. Used for directory + indexes. */ + } __attribute__ ((__packed__)) dir; + struct { /* Used for views/indexes to find the entry's data. */ + u16 data_offset; /* Data byte offset from this + INDEX_ENTRY. Follows the + index key. */ + u16 data_length; /* Data length in bytes. */ + u32 reservedV; /* Reserved (zero). */ + } __attribute__ ((__packed__)) vi; + } __attribute__ ((__packed__)) data; + u16 length; /* Byte size of this index entry, multiple of + 8-bytes. */ + u16 key_length; /* Byte size of the key value, which is in the + index entry. It follows field reserved. Not + multiple of 8-bytes. */ + INDEX_ENTRY_FLAGS flags; /* Bit field of INDEX_ENTRY_* flags. */ + u16 reserved; /* Reserved/align to 8-byte boundary. */ + +/* 16*/ union { /* The key of the indexed attribute. NOTE: Only present + if INDEX_ENTRY_END bit in flags is not set. NOTE: On + NTFS versions before 3.0 the only valid key is the + FILE_NAME_ATTR. On NTFS 3.0+ the following + additional index keys are defined: */ + FILE_NAME_ATTR file_name;/* $I30 index in directories. */ + SII_INDEX_KEY sii; /* $SII index in $Secure. */ + SDH_INDEX_KEY sdh; /* $SDH index in $Secure. */ + GUID object_id; /* $O index in FILE_Extend/$ObjId: The + object_id of the mft record found in + the data part of the index. */ + REPARSE_INDEX_KEY reparse; /* $R index in + FILE_Extend/$Reparse. */ + SID sid; /* $O index in FILE_Extend/$Quota: + SID of the owner of the user_id. */ + u32 owner_id; /* $Q index in FILE_Extend/$Quota: + user_id of the owner of the quota + control entry in the data part of + the index. */ + } __attribute__ ((__packed__)) key; + /* The (optional) index data is inserted here when creating. */ + // VCN vcn; /* If INDEX_ENTRY_NODE bit in flags is set, the last + // eight bytes of this index entry contain the virtual + // cluster number of the index block that holds the + // entries immediately preceding the current entry (the + // vcn references the corresponding cluster in the data + // of the non-resident index allocation attribute). If + // the key_length is zero, then the vcn immediately + // follows the INDEX_ENTRY_HEADER. Regardless of + // key_length, the address of the 8-byte boundary + // alligned vcn of INDEX_ENTRY{_HEADER} *ie is given by + // (char*)ie + le16_to_cpu(ie*)->length) - sizeof(VCN), + // where sizeof(VCN) can be hardcoded as 8 if wanted. */ +} __attribute__ ((__packed__)) INDEX_ENTRY; + +/* + * Attribute: Bitmap (0xb0). + * + * Contains an array of bits (aka a bitfield). + * + * When used in conjunction with the index allocation attribute, each bit + * corresponds to one index block within the index allocation attribute. Thus + * the number of bits in the bitmap * index block size / cluster size is the + * number of clusters in the index allocation attribute. + */ +typedef struct { + u8 bitmap[0]; /* Array of bits. */ +} __attribute__ ((__packed__)) BITMAP_ATTR; + +/* + * The reparse point tag defines the type of the reparse point. It also + * includes several flags, which further describe the reparse point. + * + * The reparse point tag is an unsigned 32-bit value divided in three parts: + * + * 1. The least significant 16 bits (i.e. bits 0 to 15) specifiy the type of + * the reparse point. + * 2. The 13 bits after this (i.e. bits 16 to 28) are reserved for future use. + * 3. The most significant three bits are flags describing the reparse point. + * They are defined as follows: + * bit 29: Name surrogate bit. If set, the filename is an alias for + * another object in the system. + * bit 30: High-latency bit. If set, accessing the first byte of data will + * be slow. (E.g. the data is stored on a tape drive.) + * bit 31: Microsoft bit. If set, the tag is owned by Microsoft. User + * defined tags have to use zero here. + */ +typedef enum { + IO_REPARSE_TAG_IS_ALIAS = const_cpu_to_le32(0x20000000), + IO_REPARSE_TAG_IS_HIGH_LATENCY = const_cpu_to_le32(0x40000000), + IO_REPARSE_TAG_IS_MICROSOFT = const_cpu_to_le32(0x80000000), + + IO_REPARSE_TAG_RESERVED_ZERO = const_cpu_to_le32(0x00000000), + IO_REPARSE_TAG_RESERVED_ONE = const_cpu_to_le32(0x00000001), + IO_REPARSE_TAG_RESERVED_RANGE = const_cpu_to_le32(0x00000001), + + IO_REPARSE_TAG_NSS = const_cpu_to_le32(0x68000005), + IO_REPARSE_TAG_NSS_RECOVER = const_cpu_to_le32(0x68000006), + IO_REPARSE_TAG_SIS = const_cpu_to_le32(0x68000007), + IO_REPARSE_TAG_DFS = const_cpu_to_le32(0x68000008), + + IO_REPARSE_TAG_MOUNT_POINT = const_cpu_to_le32(0x88000003), + + IO_REPARSE_TAG_HSM = const_cpu_to_le32(0xa8000004), + + IO_REPARSE_TAG_SYMBOLIC_LINK = const_cpu_to_le32(0xe8000000), + + IO_REPARSE_TAG_VALID_VALUES = const_cpu_to_le32(0xe000ffff), +} PREDEFINED_REPARSE_TAGS; + +/* + * Attribute: Reparse point (0xc0). + * + * NOTE: Can be resident or non-resident. + */ +typedef struct { + u32 reparse_tag; /* Reparse point type (inc. flags). */ + u16 reparse_data_length; /* Byte size of reparse data. */ + u16 reserved; /* Align to 8-byte boundary. */ + u8 reparse_data[0]; /* Meaning depends on reparse_tag. */ +} __attribute__ ((__packed__)) REPARSE_POINT; + +/* + * Attribute: Extended attribute (EA) information (0xd0). + * + * NOTE: Always resident. (Is this true???) + */ +typedef struct { + u16 ea_length; /* Byte size of the packed extended + attributes. */ + u16 need_ea_count; /* The number of extended attributes which have + the NEED_EA bit set. */ + u32 ea_query_length; /* Byte size of the buffer required to query + the extended attributes when calling + ZwQueryEaFile() in Windows NT/2k. I.e. the + byte size of the unpacked extended + attributes. */ +} __attribute__ ((__packed__)) EA_INFORMATION; + +/* + * Extended attribute flags (8-bit). + */ +typedef enum { + NEED_EA = 0x80, +} __attribute__ ((__packed__)) EA_FLAGS; + +/* + * Attribute: Extended attribute (EA) (0xe0). + * + * NOTE: Always non-resident. (Is this true?) + * + * Like the attribute list and the index buffer list, the EA attribute value is + * a sequence of EA_ATTR variable length records. + * + * FIXME: It appears weird that the EA name is not unicode. Is it true? + */ +typedef struct { + u32 next_entry_offset; /* Offset to the next EA_ATTR. */ + EA_FLAGS flags; /* Flags describing the EA. */ + u8 ea_name_length; /* Length of the name of the EA in bytes. */ + u16 ea_value_length; /* Byte size of the EA's value. */ + u8 ea_name[0]; /* Name of the EA. */ + u8 ea_value[0]; /* The value of the EA. Immediately follows + the name. */ +} __attribute__ ((__packed__)) EA_ATTR; + +/* + * Attribute: Property set (0xf0). + * + * Intended to support Native Structure Storage (NSS) - a feature removed from + * NTFS 3.0 during beta testing. + */ +typedef struct { + /* Irrelevant as feature unused. */ +} __attribute__ ((__packed__)) PROPERTY_SET; + +/* + * Attribute: Logged utility stream (0x100). + * + * NOTE: Can be resident or non-resident. + * + * Operations on this attribute are logged to the journal ($LogFile) like + * normal metadata changes. + * + * Used by the Encrypting File System (EFS). All encrypted files have this + * attribute with the name $EFS. + */ +typedef struct { + /* Can be anything the creator chooses. */ + /* EFS uses it as follows: */ + // FIXME: Type this info, verifying it along the way. (AIA) +} __attribute__ ((__packed__)) LOGGED_UTILITY_STREAM, EFS_ATTR; + +#endif /* _LINUX_NTFS_LAYOUT_H */ + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/malloc.h b/reactos/drivers/fs/ntfs/linux-ntfs/malloc.h new file mode 100644 index 00000000000..98170fb3773 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/malloc.h @@ -0,0 +1,64 @@ +/* + * malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project. + * + * Copyright (c) 2001,2002 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_MALLOC_H +#define _LINUX_NTFS_MALLOC_H + +#include +#include + +/** + * ntfs_malloc_nofs - allocate memory in multiples of pages + * @size number of bytes to allocate + * + * Allocates @size bytes of memory, rounded up to multiples of PAGE_SIZE and + * returns a pointer to the allocated memory. + * + * If there was insufficient memory to complete the request, return NULL. + */ +static inline void *ntfs_malloc_nofs(unsigned long size) +{ + if (likely(size <= PAGE_SIZE)) { + if (likely(size)) { + /* kmalloc() has per-CPU caches so if faster for now. */ + return kmalloc(PAGE_SIZE, GFP_NOFS); + /* return (void *)__get_free_page(GFP_NOFS | + __GFP_HIGHMEM); */ + } + BUG(); + } + if (likely(size >> PAGE_SHIFT < num_physpages)) + return __vmalloc(size, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL); + return NULL; +} + +static inline void ntfs_free(void *addr) +{ + if (likely(((unsigned long)addr < VMALLOC_START) || + ((unsigned long)addr >= VMALLOC_END ))) { + return kfree(addr); + /* return free_page((unsigned long)addr); */ + } + vfree(addr); +} + +#endif /* _LINUX_NTFS_MALLOC_H */ + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/mft.c b/reactos/drivers/fs/ntfs/linux-ntfs/mft.c new file mode 100644 index 00000000000..ed793fbc5b3 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/mft.c @@ -0,0 +1,432 @@ +/** + * mft.c - NTFS kernel mft record operations. Part of the Linux-NTFS project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include + +#include "ntfs.h" + +/** + * __format_mft_record - initialize an empty mft record + * @m: mapped, pinned and locked for writing mft record + * @size: size of the mft record + * @rec_no: mft record number / inode number + * + * Private function to initialize an empty mft record. Use one of the two + * provided format_mft_record() functions instead. + */ +static void __format_mft_record(MFT_RECORD *m, const int size, + const unsigned long rec_no) +{ + ATTR_RECORD *a; + + memset(m, 0, size); + m->magic = magic_FILE; + /* Aligned to 2-byte boundary. */ + m->usa_ofs = cpu_to_le16((sizeof(MFT_RECORD) + 1) & ~1); + m->usa_count = cpu_to_le16(size / NTFS_BLOCK_SIZE + 1); + /* Set the update sequence number to 1. */ + *(u16*)((char*)m + ((sizeof(MFT_RECORD) + 1) & ~1)) = cpu_to_le16(1); + m->lsn = cpu_to_le64(0LL); + m->sequence_number = cpu_to_le16(1); + m->link_count = cpu_to_le16(0); + /* Aligned to 8-byte boundary. */ + m->attrs_offset = cpu_to_le16((le16_to_cpu(m->usa_ofs) + + (le16_to_cpu(m->usa_count) << 1) + 7) & ~7); + m->flags = cpu_to_le16(0); + /* + * Using attrs_offset plus eight bytes (for the termination attribute), + * aligned to 8-byte boundary. + */ + m->bytes_in_use = cpu_to_le32((le16_to_cpu(m->attrs_offset) + 8 + 7) & + ~7); + m->bytes_allocated = cpu_to_le32(size); + m->base_mft_record = cpu_to_le64((MFT_REF)0); + m->next_attr_instance = cpu_to_le16(0); + a = (ATTR_RECORD*)((char*)m + le16_to_cpu(m->attrs_offset)); + a->type = AT_END; + a->length = cpu_to_le32(0); +} + +/** + * format_mft_record - initialize an empty mft record + * @ni: ntfs inode of mft record + * @mft_rec: mapped, pinned and locked mft record (optional) + * + * Initialize an empty mft record. This is used when extending the MFT. + * + * If @mft_rec is NULL, we call map_mft_record() to obtain the + * record and we unmap it again when finished. + * + * We return 0 on success or -errno on error. + */ +int format_mft_record(ntfs_inode *ni, MFT_RECORD *mft_rec) +{ + MFT_RECORD *m; + + if (mft_rec) + m = mft_rec; + else { + m = map_mft_record(ni); + if (IS_ERR(m)) + return PTR_ERR(m); + } + __format_mft_record(m, ni->vol->mft_record_size, ni->mft_no); + if (!mft_rec) { + // FIXME: Need to set the mft record dirty! + unmap_mft_record(ni); + } + return 0; +} + +/** + * ntfs_readpage - external declaration, function is in fs/ntfs/aops.c + */ +extern int ntfs_readpage(struct file *, struct page *); + +/** + * ntfs_mft_aops - address space operations for access to $MFT + * + * Address space operations for access to $MFT. This allows us to simply use + * ntfs_map_page() in map_mft_record_page(). + */ +struct address_space_operations ntfs_mft_aops = { + .readpage = ntfs_readpage, /* Fill page with data. */ + .sync_page = block_sync_page, /* Currently, just unplugs the + disk request queue. */ +}; + +/** + * map_mft_record_page - map the page in which a specific mft record resides + * @ni: ntfs inode whose mft record page to map + * + * This maps the page in which the mft record of the ntfs inode @ni is situated + * and returns a pointer to the mft record within the mapped page. + * + * Return value needs to be checked with IS_ERR() and if that is true PTR_ERR() + * contains the negative error code returned. + */ +static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni) +{ + ntfs_volume *vol = ni->vol; + struct inode *mft_vi = vol->mft_ino; + struct page *page; + unsigned long index, ofs, end_index; + + BUG_ON(ni->page); + /* + * The index into the page cache and the offset within the page cache + * page of the wanted mft record. FIXME: We need to check for + * overflowing the unsigned long, but I don't think we would ever get + * here if the volume was that big... + */ + index = ni->mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT; + ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK; + + /* The maximum valid index into the page cache for $MFT's data. */ + end_index = mft_vi->i_size >> PAGE_CACHE_SHIFT; + + /* If the wanted index is out of bounds the mft record doesn't exist. */ + if (unlikely(index >= end_index)) { + if (index > end_index || (mft_vi->i_size & ~PAGE_CACHE_MASK) < + ofs + vol->mft_record_size) { + page = ERR_PTR(-ENOENT); + goto err_out; + } + } + /* Read, map, and pin the page. */ + page = ntfs_map_page(mft_vi->i_mapping, index); + if (likely(!IS_ERR(page))) { + ni->page = page; + ni->page_ofs = ofs; + return page_address(page) + ofs; + } +err_out: + ni->page = NULL; + ni->page_ofs = 0; + ntfs_error(vol->sb, "Failed with error code %lu.", -PTR_ERR(page)); + return (void*)page; +} + +/** + * map_mft_record - map, pin and lock an mft record + * @ni: ntfs inode whose MFT record to map + * + * First, take the mrec_lock semaphore. We might now be sleeping, while waiting + * for the semaphore if it was already locked by someone else. + * + * The page of the record is mapped using map_mft_record_page() before being + * returned to the caller. + * + * This in turn uses ntfs_map_page() to get the page containing the wanted mft + * record (it in turn calls read_cache_page() which reads it in from disk if + * necessary, increments the use count on the page so that it cannot disappear + * under us and returns a reference to the page cache page). + * + * If read_cache_page() invokes ntfs_readpage() to load the page from disk, it + * sets PG_locked and clears PG_uptodate on the page. Once I/O has completed + * and the post-read mst fixups on each mft record in the page have been + * performed, the page gets PG_uptodate set and PG_locked cleared (this is done + * in our asynchronous I/O completion handler end_buffer_read_mft_async()). + * ntfs_map_page() waits for PG_locked to become clear and checks if + * PG_uptodate is set and returns an error code if not. This provides + * sufficient protection against races when reading/using the page. + * + * However there is the write mapping to think about. Doing the above described + * checking here will be fine, because when initiating the write we will set + * PG_locked and clear PG_uptodate making sure nobody is touching the page + * contents. Doing the locking this way means that the commit to disk code in + * the page cache code paths is automatically sufficiently locked with us as + * we will not touch a page that has been locked or is not uptodate. The only + * locking problem then is them locking the page while we are accessing it. + * + * So that code will end up having to own the mrec_lock of all mft + * records/inodes present in the page before I/O can proceed. In that case we + * wouldn't need to bother with PG_locked and PG_uptodate as nobody will be + * accessing anything without owning the mrec_lock semaphore. But we do need + * to use them because of the read_cache_page() invocation and the code becomes + * so much simpler this way that it is well worth it. + * + * The mft record is now ours and we return a pointer to it. You need to check + * the returned pointer with IS_ERR() and if that is true, PTR_ERR() will return + * the error code. + * + * NOTE: Caller is responsible for setting the mft record dirty before calling + * unmap_mft_record(). This is obviously only necessary if the caller really + * modified the mft record... + * Q: Do we want to recycle one of the VFS inode state bits instead? + * A: No, the inode ones mean we want to change the mft record, not we want to + * write it out. + */ +MFT_RECORD *map_mft_record(ntfs_inode *ni) +{ + MFT_RECORD *m; + + ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no); + + /* Make sure the ntfs inode doesn't go away. */ + atomic_inc(&ni->count); + + /* Serialize access to this mft record. */ + down(&ni->mrec_lock); + + m = map_mft_record_page(ni); + if (likely(!IS_ERR(m))) + return m; + + up(&ni->mrec_lock); + atomic_dec(&ni->count); + ntfs_error(ni->vol->sb, "Failed with error code %lu.", -PTR_ERR(m)); + return m; +} + +/** + * unmap_mft_record_page - unmap the page in which a specific mft record resides + * @ni: ntfs inode whose mft record page to unmap + * + * This unmaps the page in which the mft record of the ntfs inode @ni is + * situated and returns. This is a NOOP if highmem is not configured. + * + * The unmap happens via ntfs_unmap_page() which in turn decrements the use + * count on the page thus releasing it from the pinned state. + * + * We do not actually unmap the page from memory of course, as that will be + * done by the page cache code itself when memory pressure increases or + * whatever. + */ +static inline void unmap_mft_record_page(ntfs_inode *ni) +{ + BUG_ON(!ni->page); + + // TODO: If dirty, blah... + ntfs_unmap_page(ni->page); + ni->page = NULL; + ni->page_ofs = 0; + return; +} + +/** + * unmap_mft_record - release a mapped mft record + * @ni: ntfs inode whose MFT record to unmap + * + * We release the page mapping and the mrec_lock mutex which unmaps the mft + * record and releases it for others to get hold of. We also release the ntfs + * inode by decrementing the ntfs inode reference count. + * + * NOTE: If caller has modified the mft record, it is imperative to set the mft + * record dirty BEFORE calling unmap_mft_record(). + */ +void unmap_mft_record(ntfs_inode *ni) +{ + struct page *page = ni->page; + + BUG_ON(!page); + + ntfs_debug("Entering for mft_no 0x%lx.", ni->mft_no); + + unmap_mft_record_page(ni); + up(&ni->mrec_lock); + atomic_dec(&ni->count); + /* + * If pure ntfs_inode, i.e. no vfs inode attached, we leave it to + * ntfs_clear_extent_inode() in the extent inode case, and to the + * caller in the non-extent, yet pure ntfs inode case, to do the actual + * tear down of all structures and freeing of all allocated memory. + */ + return; +} + +/** + * map_extent_mft_record - load an extent inode and attach it to its base + * @base_ni: base ntfs inode + * @mref: mft reference of the extent inode to load (in little endian) + * @ntfs_ino: on successful return, pointer to the ntfs_inode structure + * + * Load the extent mft record @mref and attach it to its base inode @base_ni. + * Return the mapped extent mft record if IS_ERR(result) is false. Otherwise + * PTR_ERR(result) gives the negative error code. + * + * On successful return, @ntfs_ino contains a pointer to the ntfs_inode + * structure of the mapped extent inode. + */ +MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, + ntfs_inode **ntfs_ino) +{ + MFT_RECORD *m; + ntfs_inode *ni = NULL; + ntfs_inode **extent_nis = NULL; + int i; + unsigned long mft_no = MREF_LE(mref); + u16 seq_no = MSEQNO_LE(mref); + BOOL destroy_ni = FALSE; + + ntfs_debug("Mapping extent mft record 0x%lx (base mft record 0x%lx).", + mft_no, base_ni->mft_no); + /* Make sure the base ntfs inode doesn't go away. */ + atomic_inc(&base_ni->count); + /* + * Check if this extent inode has already been added to the base inode, + * in which case just return it. If not found, add it to the base + * inode before returning it. + */ + down(&base_ni->extent_lock); + if (base_ni->nr_extents > 0) { + extent_nis = base_ni->ext.extent_ntfs_inos; + for (i = 0; i < base_ni->nr_extents; i++) { + if (mft_no != extent_nis[i]->mft_no) + continue; + ni = extent_nis[i]; + /* Make sure the ntfs inode doesn't go away. */ + atomic_inc(&ni->count); + break; + } + } + if (likely(ni != NULL)) { + up(&base_ni->extent_lock); + atomic_dec(&base_ni->count); + /* We found the record; just have to map and return it. */ + m = map_mft_record(ni); + /* map_mft_record() has incremented this on success. */ + atomic_dec(&ni->count); + if (likely(!IS_ERR(m))) { + /* Verify the sequence number. */ + if (likely(le16_to_cpu(m->sequence_number) == seq_no)) { + ntfs_debug("Done 1."); + *ntfs_ino = ni; + return m; + } + unmap_mft_record(ni); + ntfs_error(base_ni->vol->sb, "Found stale extent mft " + "reference! Corrupt file system. " + "Run chkdsk."); + return ERR_PTR(-EIO); + } +map_err_out: + ntfs_error(base_ni->vol->sb, "Failed to map extent " + "mft record, error code %ld.", -PTR_ERR(m)); + return m; + } + /* Record wasn't there. Get a new ntfs inode and initialize it. */ + ni = ntfs_new_extent_inode(base_ni->vol->sb, mft_no); + if (unlikely(!ni)) { + up(&base_ni->extent_lock); + atomic_dec(&base_ni->count); + return ERR_PTR(-ENOMEM); + } + ni->vol = base_ni->vol; + ni->seq_no = seq_no; + ni->nr_extents = -1; + ni->ext.base_ntfs_ino = base_ni; + /* Now map the record. */ + m = map_mft_record(ni); + if (unlikely(IS_ERR(m))) { + up(&base_ni->extent_lock); + atomic_dec(&base_ni->count); + ntfs_clear_extent_inode(ni); + goto map_err_out; + } + /* Verify the sequence number. */ + if (unlikely(le16_to_cpu(m->sequence_number) != seq_no)) { + ntfs_error(base_ni->vol->sb, "Found stale extent mft " + "reference! Corrupt file system. Run chkdsk."); + destroy_ni = TRUE; + m = ERR_PTR(-EIO); + goto unm_err_out; + } + /* Attach extent inode to base inode, reallocating memory if needed. */ + if (!(base_ni->nr_extents & 3)) { + ntfs_inode **tmp; + int new_size = (base_ni->nr_extents + 4) * sizeof(ntfs_inode *); + + tmp = (ntfs_inode **)kmalloc(new_size, GFP_NOFS); + if (unlikely(!tmp)) { + ntfs_error(base_ni->vol->sb, "Failed to allocate " + "internal buffer."); + destroy_ni = TRUE; + m = ERR_PTR(-ENOMEM); + goto unm_err_out; + } + if (base_ni->ext.extent_ntfs_inos) { + memcpy(tmp, base_ni->ext.extent_ntfs_inos, new_size - + 4 * sizeof(ntfs_inode *)); + kfree(base_ni->ext.extent_ntfs_inos); + } + base_ni->ext.extent_ntfs_inos = tmp; + } + base_ni->ext.extent_ntfs_inos[base_ni->nr_extents++] = ni; + up(&base_ni->extent_lock); + atomic_dec(&base_ni->count); + ntfs_debug("Done 2."); + *ntfs_ino = ni; + return m; +unm_err_out: + unmap_mft_record(ni); + up(&base_ni->extent_lock); + atomic_dec(&base_ni->count); + /* + * If the extent inode was not attached to the base inode we need to + * release it or we will leak memory. + */ + if (destroy_ni) + ntfs_clear_extent_inode(ni); + return m; +} + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/mft.h b/reactos/drivers/fs/ntfs/linux-ntfs/mft.h new file mode 100644 index 00000000000..a275edbe1a8 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/mft.h @@ -0,0 +1,61 @@ +/* + * mft.h - Defines for mft record handling in NTFS Linux kernel driver. + * Part of the Linux-NTFS project. + * + * Copyright (c) 2001,2002 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_MFT_H +#define _LINUX_NTFS_MFT_H + +#include + +#include "inode.h" + +extern int format_mft_record(ntfs_inode *ni, MFT_RECORD *m); +//extern int format_mft_record2(struct super_block *vfs_sb, +// const unsigned long inum, MFT_RECORD *m); + +extern MFT_RECORD *map_mft_record(ntfs_inode *ni); +extern void unmap_mft_record(ntfs_inode *ni); + +extern MFT_RECORD *map_extent_mft_record(ntfs_inode *base_ni, MFT_REF mref, + ntfs_inode **ntfs_ino); + +static inline void unmap_extent_mft_record(ntfs_inode *ni) +{ + unmap_mft_record(ni); + return; +} + +/* + * flush_dcache_mft_record_page - flush_dcache_page() for mft records + * @ni: ntfs inode structure of mft record + * + * Call flush_dcache_page() for the page in which an mft record resides. + * + * This must be called every time an mft record is modified, just after the + * modification. + */ +static inline void flush_dcache_mft_record_page(ntfs_inode *ni) +{ + flush_dcache_page(ni->page); +} + +#endif /* _LINUX_NTFS_MFT_H */ + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/mst.c b/reactos/drivers/fs/ntfs/linux-ntfs/mst.c new file mode 100644 index 00000000000..00f63c1f352 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/mst.c @@ -0,0 +1,202 @@ +/* + * mst.c - NTFS multi sector transfer protection handling code. Part of the + * Linux-NTFS project. + * + * Copyright (c) 2001 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "ntfs.h" + +/** + * post_read_mst_fixup - deprotect multi sector transfer protected data + * @b: pointer to the data to deprotect + * @size: size in bytes of @b + * + * Perform the necessary post read multi sector transfer fixup and detect the + * presence of incomplete multi sector transfers. - In that case, overwrite the + * magic of the ntfs record header being processed with "BAAD" (in memory only!) + * and abort processing. + * + * Return 0 on success and -EINVAL on error ("BAAD" magic will be present). + * + * NOTE: We consider the absence / invalidity of an update sequence array to + * mean that the structure is not protected at all and hence doesn't need to + * be fixed up. Thus, we return success and not failure in this case. This is + * in contrast to pre_write_mst_fixup(), see below. + */ +int post_read_mst_fixup(NTFS_RECORD *b, const u32 size) +{ + u16 usa_ofs, usa_count, usn; + u16 *usa_pos, *data_pos; + + /* Setup the variables. */ + usa_ofs = le16_to_cpu(b->usa_ofs); + /* Decrement usa_count to get number of fixups. */ + usa_count = le16_to_cpu(b->usa_count) - 1; + /* Size and alignment checks. */ + if ( size & (NTFS_BLOCK_SIZE - 1) || + usa_ofs & 1 || + usa_ofs + (usa_count * 2) > size || + (size >> NTFS_BLOCK_SIZE_BITS) != usa_count) + return 0; + /* Position of usn in update sequence array. */ + usa_pos = (u16*)b + usa_ofs/sizeof(u16); + /* + * The update sequence number which has to be equal to each of the + * u16 values before they are fixed up. Note no need to care for + * endianness since we are comparing and moving data for on disk + * structures which means the data is consistent. - If it is + * consistenty the wrong endianness it doesn't make any difference. + */ + usn = *usa_pos; + /* + * Position in protected data of first u16 that needs fixing up. + */ + data_pos = (u16*)b + NTFS_BLOCK_SIZE/sizeof(u16) - 1; + /* + * Check for incomplete multi sector transfer(s). + */ + while (usa_count--) { + if (*data_pos != usn) { + /* + * Incomplete multi sector transfer detected! )-: + * Set the magic to "BAAD" and return failure. + * Note that magic_BAAD is already converted to le32. + */ + b->magic = magic_BAAD; + return -EINVAL; + } + data_pos += NTFS_BLOCK_SIZE/sizeof(u16); + } + /* Re-setup the variables. */ + usa_count = le16_to_cpu(b->usa_count) - 1; + data_pos = (u16*)b + NTFS_BLOCK_SIZE/sizeof(u16) - 1; + /* Fixup all sectors. */ + while (usa_count--) { + /* + * Increment position in usa and restore original data from + * the usa into the data buffer. + */ + *data_pos = *(++usa_pos); + /* Increment position in data as well. */ + data_pos += NTFS_BLOCK_SIZE/sizeof(u16); + } + return 0; +} + +/** + * pre_write_mst_fixup - apply multi sector transfer protection + * @b: pointer to the data to protect + * @size: size in bytes of @b + * + * Perform the necessary pre write multi sector transfer fixup on the data + * pointer to by @b of @size. + * + * Return 0 if fixup applied (success) or -EINVAL if no fixup was performed + * (assumed not needed). This is in contrast to post_read_mst_fixup() above. + * + * NOTE: We consider the absence / invalidity of an update sequence array to + * mean that the structure is not subject to protection and hence doesn't need + * to be fixed up. This means that you have to create a valid update sequence + * array header in the ntfs record before calling this function, otherwise it + * will fail (the header needs to contain the position of the update sequence + * array together with the number of elements in the array). You also need to + * initialise the update sequence number before calling this function + * otherwise a random word will be used (whatever was in the record at that + * position at that time). + */ +int pre_write_mst_fixup(NTFS_RECORD *b, const u32 size) +{ + u16 usa_ofs, usa_count, usn; + u16 *usa_pos, *data_pos; + + /* Sanity check + only fixup if it makes sense. */ + if (!b || is_baad_record(b->magic) || is_hole_record(b->magic)) + return -EINVAL; + /* Setup the variables. */ + usa_ofs = le16_to_cpu(b->usa_ofs); + /* Decrement usa_count to get number of fixups. */ + usa_count = le16_to_cpu(b->usa_count) - 1; + /* Size and alignment checks. */ + if ( size & (NTFS_BLOCK_SIZE - 1) || + usa_ofs & 1 || + usa_ofs + (usa_count * 2) > size || + (size >> NTFS_BLOCK_SIZE_BITS) != usa_count) + return -EINVAL; + /* Position of usn in update sequence array. */ + usa_pos = (u16*)((u8*)b + usa_ofs); + /* + * Cyclically increment the update sequence number + * (skipping 0 and -1, i.e. 0xffff). + */ + usn = le16_to_cpup(usa_pos) + 1; + if (usn == 0xffff || !usn) + usn = 1; + usn = cpu_to_le16(usn); + *usa_pos = usn; + /* Position in data of first u16 that needs fixing up. */ + data_pos = (u16*)b + NTFS_BLOCK_SIZE/sizeof(u16) - 1; + /* Fixup all sectors. */ + while (usa_count--) { + /* + * Increment the position in the usa and save the + * original data from the data buffer into the usa. + */ + *(++usa_pos) = *data_pos; + /* Apply fixup to data. */ + *data_pos = usn; + /* Increment position in data as well. */ + data_pos += NTFS_BLOCK_SIZE/sizeof(u16); + } + return 0; +} + +/** + * post_write_mst_fixup - fast deprotect multi sector transfer protected data + * @b: pointer to the data to deprotect + * + * Perform the necessary post write multi sector transfer fixup, not checking + * for any errors, because we assume we have just used pre_write_mst_fixup(), + * thus the data will be fine or we would never have gotten here. + */ +void post_write_mst_fixup(NTFS_RECORD *b) +{ + u16 *usa_pos, *data_pos; + + u16 usa_ofs = le16_to_cpu(b->usa_ofs); + u16 usa_count = le16_to_cpu(b->usa_count) - 1; + + /* Position of usn in update sequence array. */ + usa_pos = (u16*)b + usa_ofs/sizeof(u16); + + /* Position in protected data of first u16 that needs fixing up. */ + data_pos = (u16*)b + NTFS_BLOCK_SIZE/sizeof(u16) - 1; + + /* Fixup all sectors. */ + while (usa_count--) { + /* + * Increment position in usa and restore original data from + * the usa into the data buffer. + */ + *data_pos = *(++usa_pos); + + /* Increment position in data as well. */ + data_pos += NTFS_BLOCK_SIZE/sizeof(u16); + } +} + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/namei.c b/reactos/drivers/fs/ntfs/linux-ntfs/namei.c new file mode 100644 index 00000000000..a8c6e8a4e3b --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/namei.c @@ -0,0 +1,297 @@ +/* + * namei.c - NTFS kernel directory inode operations. Part of the Linux-NTFS + * project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include + +#include "ntfs.h" +#include "dir.h" + +/** + * ntfs_lookup - find the inode represented by a dentry in a directory inode + * @dir_ino: directory inode in which to look for the inode + * @dent: dentry representing the inode to look for + * @nd: lookup nameidata + * + * In short, ntfs_lookup() looks for the inode represented by the dentry @dent + * in the directory inode @dir_ino and if found attaches the inode to the + * dentry @dent. + * + * In more detail, the dentry @dent specifies which inode to look for by + * supplying the name of the inode in @dent->d_name.name. ntfs_lookup() + * converts the name to Unicode and walks the contents of the directory inode + * @dir_ino looking for the converted Unicode name. If the name is found in the + * directory, the corresponding inode is loaded by calling ntfs_iget() on its + * inode number and the inode is associated with the dentry @dent via a call to + * d_add(). + * + * If the name is not found in the directory, a NULL inode is inserted into the + * dentry @dent. The dentry is then termed a negative dentry. + * + * Only if an actual error occurs, do we return an error via ERR_PTR(). + * + * In order to handle the case insensitivity issues of NTFS with regards to the + * dcache and the dcache requiring only one dentry per directory, we deal with + * dentry aliases that only differ in case in ->ntfs_lookup() while maintining + * a case sensitive dcache. This means that we get the full benefit of dcache + * speed when the file/directory is looked up with the same case as returned by + * ->ntfs_readdir() but that a lookup for any other case (or for the short file + * name) will not find anything in dcache and will enter ->ntfs_lookup() + * instead, where we search the directory for a fully matching file name + * (including case) and if that is not found, we search for a file name that + * matches with different case and if that has non-POSIX semantics we return + * that. We actually do only one search (case sensitive) and keep tabs on + * whether we have found a case insensitive match in the process. + * + * To simplify matters for us, we do not treat the short vs long filenames as + * two hard links but instead if the lookup matches a short filename, we + * return the dentry for the corresponding long filename instead. + * + * There are three cases we need to distinguish here: + * + * 1) @dent perfectly matches (i.e. including case) a directory entry with a + * file name in the WIN32 or POSIX namespaces. In this case + * ntfs_lookup_inode_by_name() will return with name set to NULL and we + * just d_add() @dent. + * 2) @dent matches (not including case) a directory entry with a file name in + * the WIN32 namespace. In this case ntfs_lookup_inode_by_name() will return + * with name set to point to a kmalloc()ed ntfs_name structure containing + * the properly cased little endian Unicode name. We convert the name to the + * current NLS code page, search if a dentry with this name already exists + * and if so return that instead of @dent. The VFS will then destroy the old + * @dent and use the one we returned. If a dentry is not found, we allocate + * a new one, d_add() it, and return it as above. + * 3) @dent matches either perfectly or not (i.e. we don't care about case) a + * directory entry with a file name in the DOS namespace. In this case + * ntfs_lookup_inode_by_name() will return with name set to point to a + * kmalloc()ed ntfs_name structure containing the mft reference (cpu endian) + * of the inode. We use the mft reference to read the inode and to find the + * file name in the WIN32 namespace corresponding to the matched short file + * name. We then convert the name to the current NLS code page, and proceed + * searching for a dentry with this name, etc, as in case 2), above. + */ +static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, struct nameidata *nd) +{ + ntfs_volume *vol = NTFS_SB(dir_ino->i_sb); + struct inode *dent_inode; + uchar_t *uname; + ntfs_name *name = NULL; + MFT_REF mref; + unsigned long dent_ino; + int uname_len; + + ntfs_debug("Looking up %s in directory inode 0x%lx.", + dent->d_name.name, dir_ino->i_ino); + /* Convert the name of the dentry to Unicode. */ + uname_len = ntfs_nlstoucs(vol, dent->d_name.name, dent->d_name.len, + &uname); + if (uname_len < 0) { + ntfs_error(vol->sb, "Failed to convert name to Unicode."); + return ERR_PTR(uname_len); + } + mref = ntfs_lookup_inode_by_name(NTFS_I(dir_ino), uname, uname_len, + &name); + kmem_cache_free(ntfs_name_cache, uname); + if (!IS_ERR_MREF(mref)) { + dent_ino = MREF(mref); + ntfs_debug("Found inode 0x%lx. Calling ntfs_iget.", dent_ino); + dent_inode = ntfs_iget(vol->sb, dent_ino); + if (likely(!IS_ERR(dent_inode))) { + /* Consistency check. */ + if (MSEQNO(mref) == NTFS_I(dent_inode)->seq_no || + dent_ino == FILE_MFT) { + /* Perfect WIN32/POSIX match. -- Case 1. */ + if (!name) { + d_add(dent, dent_inode); + ntfs_debug("Done."); + return NULL; + } + /* + * We are too indented. Handle imperfect + * matches and short file names further below. + */ + goto handle_name; + } + ntfs_error(vol->sb, "Found stale reference to inode " + "0x%lx (reference sequence number = " + "0x%x, inode sequence number = 0x%x, " + "returning -EIO. Run chkdsk.", + dent_ino, MSEQNO(mref), + NTFS_I(dent_inode)->seq_no); + iput(dent_inode); + dent_inode = ERR_PTR(-EIO); + } else + ntfs_error(vol->sb, "ntfs_iget(0x%lx) failed with " + "error code %li.", dent_ino, + PTR_ERR(dent_inode)); + if (name) + kfree(name); + /* Return the error code. */ + return (struct dentry *)dent_inode; + } + /* It is guaranteed that name is no longer allocated at this point. */ + if (MREF_ERR(mref) == -ENOENT) { + ntfs_debug("Entry was not found, adding negative dentry."); + /* The dcache will handle negative entries. */ + d_add(dent, NULL); + ntfs_debug("Done."); + return NULL; + } + ntfs_error(vol->sb, "ntfs_lookup_ino_by_name() failed with error " + "code %i.", -MREF_ERR(mref)); + return ERR_PTR(MREF_ERR(mref)); + + // TODO: Consider moving this lot to a separate function! (AIA) +handle_name: + { + struct dentry *real_dent; + MFT_RECORD *m; + attr_search_context *ctx; + ntfs_inode *ni = NTFS_I(dent_inode); + int err; + struct qstr nls_name; + + nls_name.name = NULL; + if (name->type != FILE_NAME_DOS) { /* Case 2. */ + nls_name.len = (unsigned)ntfs_ucstonls(vol, + (uchar_t*)&name->name, name->len, + (unsigned char**)&nls_name.name, + name->len * 3 + 1); + kfree(name); + } else /* if (name->type == FILE_NAME_DOS) */ { /* Case 3. */ + FILE_NAME_ATTR *fn; + + kfree(name); + + /* Find the WIN32 name corresponding to the matched DOS name. */ + ni = NTFS_I(dent_inode); + m = map_mft_record(ni); + if (IS_ERR(m)) { + err = PTR_ERR(m); + m = NULL; + ctx = NULL; + goto err_out; + } + ctx = get_attr_search_ctx(ni, m); + if (!ctx) { + err = -ENOMEM; + goto err_out; + } + do { + ATTR_RECORD *a; + u32 val_len; + + if (!lookup_attr(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0, + ctx)) { + ntfs_error(vol->sb, "Inode corrupt: No WIN32 " + "namespace counterpart to DOS " + "file name. Run chkdsk."); + err = -EIO; + goto err_out; + } + /* Consistency checks. */ + a = ctx->attr; + if (a->non_resident || a->flags) + goto eio_err_out; + val_len = le32_to_cpu(a->data.resident.value_length); + if (le16_to_cpu(a->data.resident.value_offset) + + val_len > le32_to_cpu(a->length)) + goto eio_err_out; + fn = (FILE_NAME_ATTR*)((u8*)ctx->attr + le16_to_cpu( + ctx->attr->data.resident.value_offset)); + if ((u32)(fn->file_name_length * sizeof(uchar_t) + + sizeof(FILE_NAME_ATTR)) > val_len) + goto eio_err_out; + } while (fn->file_name_type != FILE_NAME_WIN32); + + /* Convert the found WIN32 name to current NLS code page. */ + nls_name.len = (unsigned)ntfs_ucstonls(vol, + (uchar_t*)&fn->file_name, fn->file_name_length, + (unsigned char**)&nls_name.name, + fn->file_name_length * 3 + 1); + + put_attr_search_ctx(ctx); + unmap_mft_record(ni); + } + m = NULL; + ctx = NULL; + + /* Check if a conversion error occurred. */ + if ((signed)nls_name.len < 0) { + err = (signed)nls_name.len; + goto err_out; + } + nls_name.hash = full_name_hash(nls_name.name, nls_name.len); + + /* + * Note: No need for dent->d_lock lock as i_sem is held on the + * parent inode. + */ + + /* Does a dentry matching the nls_name exist already? */ + real_dent = d_lookup(dent->d_parent, &nls_name); + /* If not, create it now. */ + if (!real_dent) { + real_dent = d_alloc(dent->d_parent, &nls_name); + kfree(nls_name.name); + if (!real_dent) { + err = -ENOMEM; + goto err_out; + } + d_add(real_dent, dent_inode); + return real_dent; + } + kfree(nls_name.name); + /* Matching dentry exists, check if it is negative. */ + if (real_dent->d_inode) { + BUG_ON(real_dent->d_inode != dent_inode); + /* + * Already have the inode and the dentry attached, decrement + * the reference count to balance the ntfs_iget() we did + * earlier on. + */ + iput(dent_inode); + return real_dent; + } + /* Negative dentry: instantiate it. */ + d_instantiate(real_dent, dent_inode); + return real_dent; + +eio_err_out: + ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); + err = -EIO; +err_out: + if (ctx) + put_attr_search_ctx(ctx); + if (m) + unmap_mft_record(ni); + iput(dent_inode); + return ERR_PTR(err); + } +} + +/* + * Inode operations for directories. + */ +struct inode_operations ntfs_dir_inode_ops = { + .lookup = ntfs_lookup, /* VFS: Lookup directory. */ +}; + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/ntfs.h b/reactos/drivers/fs/ntfs/linux-ntfs/ntfs.h new file mode 100644 index 00000000000..fb28bde1ec9 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/ntfs.h @@ -0,0 +1,217 @@ +/* + * ntfs.h - Defines for NTFS Linux kernel driver. Part of the Linux-NTFS + * project. + * + * Copyright (c) 2001,2002 Anton Altaparmakov. + * Copyright (C) 2002 Richard Russon. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_H +#define _LINUX_NTFS_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "types.h" +#include "debug.h" +#include "malloc.h" +#include "endian.h" +#include "volume.h" +#include "inode.h" +#include "layout.h" +#include "attrib.h" +#include "mft.h" + +typedef enum { + NTFS_BLOCK_SIZE = 512, + NTFS_BLOCK_SIZE_BITS = 9, + NTFS_SB_MAGIC = 0x5346544e, /* 'NTFS' */ + NTFS_MAX_NAME_LEN = 255, +} NTFS_CONSTANTS; + +/* Global variables. */ + +/* Slab caches (from super.c). */ +extern kmem_cache_t *ntfs_name_cache; +extern kmem_cache_t *ntfs_inode_cache; +extern kmem_cache_t *ntfs_big_inode_cache; +extern kmem_cache_t *ntfs_attr_ctx_cache; + +/* The various operations structs defined throughout the driver files. */ +extern struct super_operations ntfs_sops; +extern struct super_operations ntfs_mount_sops; + +extern struct address_space_operations ntfs_aops; +extern struct address_space_operations ntfs_mft_aops; + +extern struct file_operations ntfs_file_ops; +extern struct inode_operations ntfs_file_inode_ops; + +extern struct file_operations ntfs_dir_ops; +extern struct inode_operations ntfs_dir_inode_ops; + +extern struct file_operations ntfs_empty_file_ops; +extern struct inode_operations ntfs_empty_inode_ops; + +/* Generic macro to convert pointers to values for comparison purposes. */ +#ifndef p2n +#define p2n(p) ((ptrdiff_t)((ptrdiff_t*)(p))) +#endif + +/** + * NTFS_SB - return the ntfs volume given a vfs super block + * @sb: VFS super block + * + * NTFS_SB() returns the ntfs volume associated with the VFS super block @sb. + */ +static inline ntfs_volume *NTFS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +/** + * ntfs_unmap_page - release a page that was mapped using ntfs_map_page() + * @page: the page to release + * + * Unpin, unmap and release a page that was obtained from ntfs_map_page(). + */ +static inline void ntfs_unmap_page(struct page *page) +{ + kunmap(page); + page_cache_release(page); +} + +/** + * ntfs_map_page - map a page into accessible memory, reading it if necessary + * @mapping: address space for which to obtain the page + * @index: index into the page cache for @mapping of the page to map + * + * Read a page from the page cache of the address space @mapping at position + * @index, where @index is in units of PAGE_CACHE_SIZE, and not in bytes. + * + * If the page is not in memory it is loaded from disk first using the readpage + * method defined in the address space operations of @mapping and the page is + * added to the page cache of @mapping in the process. + * + * If the page is in high memory it is mapped into memory directly addressible + * by the kernel. + * + * Finally the page count is incremented, thus pinning the page into place. + * + * The above means that page_address(page) can be used on all pages obtained + * with ntfs_map_page() to get the kernel virtual address of the page. + * + * When finished with the page, the caller has to call ntfs_unmap_page() to + * unpin, unmap and release the page. + * + * Note this does not grant exclusive access. If such is desired, the caller + * must provide it independently of the ntfs_{un}map_page() calls by using + * a {rw_}semaphore or other means of serialization. A spin lock cannot be + * used as ntfs_map_page() can block. + * + * The unlocked and uptodate page is returned on success or an encoded error + * on failure. Caller has to test for error using the IS_ERR() macro on the + * return value. If that evaluates to TRUE, the negative error code can be + * obtained using PTR_ERR() on the return value of ntfs_map_page(). + */ +static inline struct page *ntfs_map_page(struct address_space *mapping, + unsigned long index) +{ + struct page *page = read_cache_page(mapping, index, + (filler_t*)mapping->a_ops->readpage, NULL); + + if (!IS_ERR(page)) { + wait_on_page_locked(page); + kmap(page); + if (PageUptodate(page) && !PageError(page)) + return page; + ntfs_unmap_page(page); + return ERR_PTR(-EIO); + } + return page; +} + +/* Declarations of functions and global variables. */ + +/* From fs/ntfs/compress.c */ +extern int ntfs_read_compressed_block(struct page *page); + +/* From fs/ntfs/super.c */ +#define default_upcase_len 0x10000 +extern wchar_t *default_upcase; +extern unsigned long ntfs_nr_upcase_users; +extern unsigned long ntfs_nr_mounts; +extern struct semaphore ntfs_lock; + +typedef struct { + int val; + char *str; +} option_t; +extern const option_t on_errors_arr[]; + +/* From fs/ntfs/compress.c */ +extern int allocate_compression_buffers(void); +extern void free_compression_buffers(void); + +/* From fs/ntfs/mst.c */ +extern int post_read_mst_fixup(NTFS_RECORD *b, const u32 size); +extern int pre_write_mst_fixup(NTFS_RECORD *b, const u32 size); +extern void post_write_mst_fixup(NTFS_RECORD *b); + +/* From fs/ntfs/time.c */ +extern inline s64 utc2ntfs(const time_t time); +extern inline s64 get_current_ntfs_time(void); +extern inline time_t ntfs2utc(const s64 time); + +/* From fs/ntfs/unistr.c */ +extern BOOL ntfs_are_names_equal(const uchar_t *s1, size_t s1_len, + const uchar_t *s2, size_t s2_len, + const IGNORE_CASE_BOOL ic, + const uchar_t *upcase, const u32 upcase_size); +extern int ntfs_collate_names(const uchar_t *name1, const u32 name1_len, + const uchar_t *name2, const u32 name2_len, + const int err_val, const IGNORE_CASE_BOOL ic, + const uchar_t *upcase, const u32 upcase_len); +extern int ntfs_ucsncmp(const uchar_t *s1, const uchar_t *s2, size_t n); +extern int ntfs_ucsncasecmp(const uchar_t *s1, const uchar_t *s2, size_t n, + const uchar_t *upcase, const u32 upcase_size); +extern void ntfs_upcase_name(uchar_t *name, u32 name_len, + const uchar_t *upcase, const u32 upcase_len); +extern void ntfs_file_upcase_value(FILE_NAME_ATTR *file_name_attr, + const uchar_t *upcase, const u32 upcase_len); +extern int ntfs_file_compare_values(FILE_NAME_ATTR *file_name_attr1, + FILE_NAME_ATTR *file_name_attr2, + const int err_val, const IGNORE_CASE_BOOL ic, + const uchar_t *upcase, const u32 upcase_len); +extern int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins, + const int ins_len, uchar_t **outs); +extern int ntfs_ucstonls(const ntfs_volume *vol, const uchar_t *ins, + const int ins_len, unsigned char **outs, int outs_len); + +/* From fs/ntfs/upcase.c */ +extern uchar_t *generate_default_upcase(void); + +#endif /* _LINUX_NTFS_H */ + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/super.c b/reactos/drivers/fs/ntfs/linux-ntfs/super.c new file mode 100644 index 00000000000..76b77649242 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/super.c @@ -0,0 +1,1806 @@ +/* + * super.c - NTFS kernel super block handling. Part of the Linux-NTFS project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * Copyright (c) 2001,2002 Richard Russon + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include /* For bdev_hardsect_size(). */ +#include +#include +#include + +#include "ntfs.h" +#include "sysctl.h" + +/* Number of mounted file systems which have compression enabled. */ +static unsigned long ntfs_nr_compression_users; + +/* Error constants/strings used in inode.c::ntfs_show_options(). */ +typedef enum { + /* One of these must be present, default is ON_ERRORS_CONTINUE. */ + ON_ERRORS_PANIC = 0x01, + ON_ERRORS_REMOUNT_RO = 0x02, + ON_ERRORS_CONTINUE = 0x04, + /* Optional, can be combined with any of the above. */ + ON_ERRORS_RECOVER = 0x10, +} ON_ERRORS_ACTIONS; + +const option_t on_errors_arr[] = { + { ON_ERRORS_PANIC, "panic" }, + { ON_ERRORS_REMOUNT_RO, "remount-ro", }, + { ON_ERRORS_CONTINUE, "continue", }, + { ON_ERRORS_RECOVER, "recover" }, + { 0, NULL } +}; + +/** + * simple_getbool - + * + * Copied from old ntfs driver (which copied from vfat driver). + */ +static int simple_getbool(char *s, BOOL *setval) +{ + if (s) { + if (!strcmp(s, "1") || !strcmp(s, "yes") || !strcmp(s, "true")) + *setval = TRUE; + else if (!strcmp(s, "0") || !strcmp(s, "no") || + !strcmp(s, "false")) + *setval = FALSE; + else + return 0; + } else + *setval = TRUE; + return 1; +} + +/** + * parse_options - parse the (re)mount options + * @vol: ntfs volume + * @opt: string containing the (re)mount options + * + * Parse the recognized options in @opt for the ntfs volume described by @vol. + */ +static BOOL parse_options(ntfs_volume *vol, char *opt) +{ + char *p, *v, *ov; + static char *utf8 = "utf8"; + int errors = 0, sloppy = 0; + uid_t uid = (uid_t)-1; + gid_t gid = (gid_t)-1; + mode_t fmask = (mode_t)-1, dmask = (mode_t)-1; + int mft_zone_multiplier = -1, on_errors = -1; + int show_sys_files = -1, case_sensitive = -1; + struct nls_table *nls_map = NULL, *old_nls; + + /* I am lazy... (-8 */ +#define NTFS_GETOPT_WITH_DEFAULT(option, variable, default_value) \ + if (!strcmp(p, option)) { \ + if (!v || !*v) \ + variable = default_value; \ + else { \ + variable = simple_strtoul(ov = v, &v, 0); \ + if (*v) \ + goto needs_val; \ + } \ + } +#define NTFS_GETOPT(option, variable) \ + if (!strcmp(p, option)) { \ + if (!v || !*v) \ + goto needs_arg; \ + variable = simple_strtoul(ov = v, &v, 0); \ + if (*v) \ + goto needs_val; \ + } +#define NTFS_GETOPT_BOOL(option, variable) \ + if (!strcmp(p, option)) { \ + BOOL val; \ + if (!simple_getbool(v, &val)) \ + goto needs_bool; \ + variable = val; \ + } +#define NTFS_GETOPT_OPTIONS_ARRAY(option, variable, opt_array) \ + if (!strcmp(p, option)) { \ + int _i; \ + if (!v || !*v) \ + goto needs_arg; \ + ov = v; \ + if (variable == -1) \ + variable = 0; \ + for (_i = 0; opt_array[_i].str && *opt_array[_i].str; _i++) \ + if (!strcmp(opt_array[_i].str, v)) { \ + variable |= opt_array[_i].val; \ + break; \ + } \ + if (!opt_array[_i].str || !*opt_array[_i].str) \ + goto needs_val; \ + } + if (!opt || !*opt) + goto no_mount_options; + ntfs_debug("Entering with mount options string: %s", opt); + while ((p = strsep(&opt, ","))) { + if ((v = strchr(p, '='))) + *v++ = '\0'; + NTFS_GETOPT("uid", uid) + else NTFS_GETOPT("gid", gid) + else NTFS_GETOPT("umask", fmask = dmask) + else NTFS_GETOPT("fmask", fmask) + else NTFS_GETOPT("dmask", dmask) + else NTFS_GETOPT("mft_zone_multiplier", mft_zone_multiplier) + else NTFS_GETOPT_WITH_DEFAULT("sloppy", sloppy, TRUE) + else NTFS_GETOPT_BOOL("show_sys_files", show_sys_files) + else NTFS_GETOPT_BOOL("case_sensitive", case_sensitive) + else NTFS_GETOPT_OPTIONS_ARRAY("errors", on_errors, + on_errors_arr) + else if (!strcmp(p, "posix") || !strcmp(p, "show_inodes")) + ntfs_warning(vol->sb, "Ignoring obsolete option %s.", + p); + else if (!strcmp(p, "nls") || !strcmp(p, "iocharset")) { + if (!strcmp(p, "iocharset")) + ntfs_warning(vol->sb, "Option iocharset is " + "deprecated. Please use " + "option nls= in " + "the future."); + if (!v || !*v) + goto needs_arg; +use_utf8: + old_nls = nls_map; + nls_map = load_nls(v); + if (!nls_map) { + if (!old_nls) { + ntfs_error(vol->sb, "NLS character set " + "%s not found.", v); + return FALSE; + } + ntfs_error(vol->sb, "NLS character set %s not " + "found. Using previous one %s.", + v, old_nls->charset); + nls_map = old_nls; + } else /* nls_map */ { + if (old_nls) + unload_nls(old_nls); + } + } else if (!strcmp(p, "utf8")) { + BOOL val = FALSE; + ntfs_warning(vol->sb, "Option utf8 is no longer " + "supported, using option nls=utf8. Please " + "use option nls=utf8 in the future and " + "make sure utf8 is compiled either as a " + "module or into the kernel."); + if (!v || !*v) + val = TRUE; + else if (!simple_getbool(v, &val)) + goto needs_bool; + if (val) { + v = utf8; + goto use_utf8; + } + } else { + ntfs_error(vol->sb, "Unrecognized mount option %s.", p); + if (errors < INT_MAX) + errors++; + } +#undef NTFS_GETOPT_OPTIONS_ARRAY +#undef NTFS_GETOPT_BOOL +#undef NTFS_GETOPT +#undef NTFS_GETOPT_WITH_DEFAULT + } +no_mount_options: + if (errors && !sloppy) + return FALSE; + if (sloppy) + ntfs_warning(vol->sb, "Sloppy option given. Ignoring " + "unrecognized mount option(s) and continuing."); + /* Keep this first! */ + if (on_errors != -1) { + if (!on_errors) { + ntfs_error(vol->sb, "Invalid errors option argument " + "or bug in options parser."); + return FALSE; + } + } + if (nls_map) { + if (vol->nls_map && vol->nls_map != nls_map) { + ntfs_error(vol->sb, "Cannot change NLS character set " + "on remount."); + return FALSE; + } /* else (!vol->nls_map) */ + ntfs_debug("Using NLS character set %s.", nls_map->charset); + vol->nls_map = nls_map; + } else /* (!nls_map) */ { + if (!vol->nls_map) { + vol->nls_map = load_nls_default(); + if (!vol->nls_map) { + ntfs_error(vol->sb, "Failed to load default " + "NLS character set."); + return FALSE; + } + ntfs_debug("Using default NLS character set (%s).", + vol->nls_map->charset); + } + } + if (mft_zone_multiplier != -1) { + if (vol->mft_zone_multiplier && vol->mft_zone_multiplier != + mft_zone_multiplier) { + ntfs_error(vol->sb, "Cannot change mft_zone_multiplier " + "on remount."); + return FALSE; + } + if (mft_zone_multiplier < 1 || mft_zone_multiplier > 4) { + ntfs_error(vol->sb, "Invalid mft_zone_multiplier. " + "Using default value, i.e. 1."); + mft_zone_multiplier = 1; + } + vol->mft_zone_multiplier = mft_zone_multiplier; + } + if (!vol->mft_zone_multiplier) + vol->mft_zone_multiplier = 1; + if (on_errors != -1) + vol->on_errors = on_errors; + if (!vol->on_errors || vol->on_errors == ON_ERRORS_RECOVER) + vol->on_errors |= ON_ERRORS_CONTINUE; + if (uid != (uid_t)-1) + vol->uid = uid; + if (gid != (gid_t)-1) + vol->gid = gid; + if (fmask != (mode_t)-1) + vol->fmask = fmask; + if (dmask != (mode_t)-1) + vol->dmask = dmask; + if (show_sys_files != -1) { + if (show_sys_files) + NVolSetShowSystemFiles(vol); + else + NVolClearShowSystemFiles(vol); + } + if (case_sensitive != -1) { + if (case_sensitive) + NVolSetCaseSensitive(vol); + else + NVolClearCaseSensitive(vol); + } + return TRUE; +needs_arg: + ntfs_error(vol->sb, "The %s option requires an argument.", p); + return FALSE; +needs_bool: + ntfs_error(vol->sb, "The %s option requires a boolean argument.", p); + return FALSE; +needs_val: + ntfs_error(vol->sb, "Invalid %s option argument: %s", p, ov); + return FALSE; +} + +/** + * ntfs_remount - change the mount options of a mounted ntfs filesystem + * @sb: superblock of mounted ntfs filesystem + * @flags: remount flags + * @opt: remount options string + * + * Change the mount options of an already mounted ntfs filesystem. + * + * NOTE: The VFS set the @sb->s_flags remount flags to @flags after + * ntfs_remount() returns successfully (i.e. returns 0). Otherwise, + * @sb->s_flags are not changed. + */ +static int ntfs_remount(struct super_block *sb, int *flags, char *opt) +{ + ntfs_volume *vol = NTFS_SB(sb); + + ntfs_debug("Entering with remount options string: %s", opt); + +#ifndef NTFS_RW + /* For read-only compiled driver, enforce all read-only flags. */ + *flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; +#else + /* + * For the read-write compiled driver, if we are remounting read-write, + * make sure there aren't any volume errors. + */ + if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { + if (NVolErrors(vol)) { + ntfs_error(sb, "Volume has errors and is read-only." + "Cannot remount read-write."); + return -EROFS; + } + } +#endif + + // FIXME/TODO: If left like this we will have problems with rw->ro and + // ro->rw, as well as with sync->async and vice versa remounts. + // Note: The VFS already checks that there are no pending deletes and + // no open files for writing. So we only need to worry about dirty + // inode pages and dirty system files (which include dirty inodes). + // Either handle by flushing the whole volume NOW or by having the + // write routines work on MS_RDONLY fs and guarantee we don't mark + // anything as dirty if MS_RDONLY is set. That way the dirty data + // would get flushed but no new dirty data would appear. This is + // probably best but we need to be careful not to mark anything dirty + // or the MS_RDONLY will be leaking writes. + + // TODO: Deal with *flags. + + if (!parse_options(vol, opt)) + return -EINVAL; + + return 0; +} + +/** + * is_boot_sector_ntfs - check whether a boot sector is a valid NTFS boot sector + * @sb: Super block of the device to which @b belongs. + * @b: Boot sector of device @sb to check. + * @silent: If TRUE, all output will be silenced. + * + * is_boot_sector_ntfs() checks whether the boot sector @b is a valid NTFS boot + * sector. Returns TRUE if it is valid and FALSE if not. + * + * @sb is only needed for warning/error output, i.e. it can be NULL when silent + * is TRUE. + */ +static BOOL is_boot_sector_ntfs(const struct super_block *sb, + const NTFS_BOOT_SECTOR *b, const BOOL silent) +{ + /* + * Check that checksum == sum of u32 values from b to the checksum + * field. If checksum is zero, no checking is done. + */ + if ((void*)b < (void*)&b->checksum && b->checksum) { + u32 i, *u; + for (i = 0, u = (u32*)b; u < (u32*)(&b->checksum); ++u) + i += le32_to_cpup(u); + if (le32_to_cpu(b->checksum) != i) + goto not_ntfs; + } + /* Check OEMidentifier is "NTFS " */ + if (b->oem_id != magicNTFS) + goto not_ntfs; + /* Check bytes per sector value is between 256 and 4096. */ + if (le16_to_cpu(b->bpb.bytes_per_sector) < 0x100 || + le16_to_cpu(b->bpb.bytes_per_sector) > 0x1000) + goto not_ntfs; + /* Check sectors per cluster value is valid. */ + switch (b->bpb.sectors_per_cluster) { + case 1: case 2: case 4: case 8: case 16: case 32: case 64: case 128: + break; + default: + goto not_ntfs; + } + /* Check the cluster size is not above 65536 bytes. */ + if ((u32)le16_to_cpu(b->bpb.bytes_per_sector) * + b->bpb.sectors_per_cluster > 0x10000) + goto not_ntfs; + /* Check reserved/unused fields are really zero. */ + if (le16_to_cpu(b->bpb.reserved_sectors) || + le16_to_cpu(b->bpb.root_entries) || + le16_to_cpu(b->bpb.sectors) || + le16_to_cpu(b->bpb.sectors_per_fat) || + le32_to_cpu(b->bpb.large_sectors) || b->bpb.fats) + goto not_ntfs; + /* Check clusters per file mft record value is valid. */ + if ((u8)b->clusters_per_mft_record < 0xe1 || + (u8)b->clusters_per_mft_record > 0xf7) + switch (b->clusters_per_mft_record) { + case 1: case 2: case 4: case 8: case 16: case 32: case 64: + break; + default: + goto not_ntfs; + } + /* Check clusters per index block value is valid. */ + if ((u8)b->clusters_per_index_record < 0xe1 || + (u8)b->clusters_per_index_record > 0xf7) + switch (b->clusters_per_index_record) { + case 1: case 2: case 4: case 8: case 16: case 32: case 64: + break; + default: + goto not_ntfs; + } + /* + * Check for valid end of sector marker. We will work without it, but + * many BIOSes will refuse to boot from a bootsector if the magic is + * incorrect, so we emit a warning. + */ + if (!silent && b->end_of_sector_marker != cpu_to_le16(0xaa55)) + ntfs_warning(sb, "Invalid end of sector marker."); + return TRUE; +not_ntfs: + return FALSE; +} + +/** + * read_ntfs_boot_sector - read the NTFS boot sector of a device + * @sb: super block of device to read the boot sector from + * @silent: if true, suppress all output + * + * Reads the boot sector from the device and validates it. If that fails, tries + * to read the backup boot sector, first from the end of the device a-la NT4 and + * later and then from the middle of the device a-la NT3.51 and before. + * + * If a valid boot sector is found but it is not the primary boot sector, we + * repair the primary boot sector silently (unless the device is read-only or + * the primary boot sector is not accessible). + * + * NOTE: To call this function, @sb must have the fields s_dev, the ntfs super + * block (u.ntfs_sb), nr_blocks and the device flags (s_flags) initialized + * to their respective values. + * + * Return the unlocked buffer head containing the boot sector or NULL on error. + */ +static struct buffer_head *read_ntfs_boot_sector(struct super_block *sb, + const int silent) +{ + const char *read_err_str = "Unable to read %s boot sector."; + struct buffer_head *bh_primary, *bh_backup; + long nr_blocks = NTFS_SB(sb)->nr_blocks; + + /* Try to read primary boot sector. */ + if ((bh_primary = sb_bread(sb, 0))) { + if (is_boot_sector_ntfs(sb, (NTFS_BOOT_SECTOR*) + bh_primary->b_data, silent)) + return bh_primary; + if (!silent) + ntfs_error(sb, "Primary boot sector is invalid."); + } else if (!silent) + ntfs_error(sb, read_err_str, "primary"); + if (!(NTFS_SB(sb)->on_errors & ON_ERRORS_RECOVER)) { + if (bh_primary) + brelse(bh_primary); + if (!silent) + ntfs_error(sb, "Mount option errors=recover not used. " + "Aborting without trying to recover."); + return NULL; + } + /* Try to read NT4+ backup boot sector. */ + if ((bh_backup = sb_bread(sb, nr_blocks - 1))) { + if (is_boot_sector_ntfs(sb, (NTFS_BOOT_SECTOR*) + bh_backup->b_data, silent)) + goto hotfix_primary_boot_sector; + brelse(bh_backup); + } else if (!silent) + ntfs_error(sb, read_err_str, "backup"); + /* Try to read NT3.51- backup boot sector. */ + if ((bh_backup = sb_bread(sb, nr_blocks >> 1))) { + if (is_boot_sector_ntfs(sb, (NTFS_BOOT_SECTOR*) + bh_backup->b_data, silent)) + goto hotfix_primary_boot_sector; + if (!silent) + ntfs_error(sb, "Could not find a valid backup boot " + "sector."); + brelse(bh_backup); + } else if (!silent) + ntfs_error(sb, read_err_str, "backup"); + /* We failed. Cleanup and return. */ + if (bh_primary) + brelse(bh_primary); + return NULL; +hotfix_primary_boot_sector: + if (bh_primary) { + /* + * If we managed to read sector zero and the volume is not + * read-only, copy the found, valid backup boot sector to the + * primary boot sector. + */ + if (!(sb->s_flags & MS_RDONLY)) { + ntfs_warning(sb, "Hot-fix: Recovering invalid primary " + "boot sector from backup copy."); + memcpy(bh_primary->b_data, bh_backup->b_data, + sb->s_blocksize); + mark_buffer_dirty(bh_primary); + sync_dirty_buffer(bh_primary); + if (buffer_uptodate(bh_primary)) { + brelse(bh_backup); + return bh_primary; + } + ntfs_error(sb, "Hot-fix: Device write error while " + "recovering primary boot sector."); + } else { + ntfs_warning(sb, "Hot-fix: Recovery of primary boot " + "sector failed: Read-only mount."); + } + brelse(bh_primary); + } + ntfs_warning(sb, "Using backup boot sector."); + return bh_backup; +} + +/** + * parse_ntfs_boot_sector - parse the boot sector and store the data in @vol + * @vol: volume structure to initialise with data from boot sector + * @b: boot sector to parse + * + * Parse the ntfs boot sector @b and store all imporant information therein in + * the ntfs super block @vol. Return TRUE on success and FALSE on error. + */ +static BOOL parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b) +{ + unsigned int sectors_per_cluster_bits, nr_hidden_sects; + int clusters_per_mft_record, clusters_per_index_record; + s64 ll; + + vol->sector_size = le16_to_cpu(b->bpb.bytes_per_sector); + vol->sector_size_bits = ffs(vol->sector_size) - 1; + ntfs_debug("vol->sector_size = %i (0x%x)", vol->sector_size, + vol->sector_size); + ntfs_debug("vol->sector_size_bits = %i (0x%x)", vol->sector_size_bits, + vol->sector_size_bits); + if (vol->sector_size != vol->sb->s_blocksize) + ntfs_warning(vol->sb, "The boot sector indicates a sector size " + "different from the device sector size."); + ntfs_debug("sectors_per_cluster = 0x%x", b->bpb.sectors_per_cluster); + sectors_per_cluster_bits = ffs(b->bpb.sectors_per_cluster) - 1; + ntfs_debug("sectors_per_cluster_bits = 0x%x", + sectors_per_cluster_bits); + nr_hidden_sects = le32_to_cpu(b->bpb.hidden_sectors); + ntfs_debug("number of hidden sectors = 0x%x", nr_hidden_sects); + vol->cluster_size = vol->sector_size << sectors_per_cluster_bits; + vol->cluster_size_mask = vol->cluster_size - 1; + vol->cluster_size_bits = ffs(vol->cluster_size) - 1; + ntfs_debug("vol->cluster_size = %i (0x%x)", vol->cluster_size, + vol->cluster_size); + ntfs_debug("vol->cluster_size_mask = 0x%x", vol->cluster_size_mask); + ntfs_debug("vol->cluster_size_bits = %i (0x%x)", + vol->cluster_size_bits, vol->cluster_size_bits); + if (vol->sector_size > vol->cluster_size) { + ntfs_error(vol->sb, "Sector sizes above the cluster size are " + "not supported. Sorry."); + return FALSE; + } + if (vol->sb->s_blocksize > vol->cluster_size) { + ntfs_error(vol->sb, "Cluster sizes smaller than the device " + "sector size are not supported. Sorry."); + return FALSE; + } + clusters_per_mft_record = b->clusters_per_mft_record; + ntfs_debug("clusters_per_mft_record = %i (0x%x)", + clusters_per_mft_record, clusters_per_mft_record); + if (clusters_per_mft_record > 0) + vol->mft_record_size = vol->cluster_size << + (ffs(clusters_per_mft_record) - 1); + else + /* + * When mft_record_size < cluster_size, clusters_per_mft_record + * = -log2(mft_record_size) bytes. mft_record_size normaly is + * 1024 bytes, which is encoded as 0xF6 (-10 in decimal). + */ + vol->mft_record_size = 1 << -clusters_per_mft_record; + vol->mft_record_size_mask = vol->mft_record_size - 1; + vol->mft_record_size_bits = ffs(vol->mft_record_size) - 1; + ntfs_debug("vol->mft_record_size = %i (0x%x)", vol->mft_record_size, + vol->mft_record_size); + ntfs_debug("vol->mft_record_size_mask = 0x%x", + vol->mft_record_size_mask); + ntfs_debug("vol->mft_record_size_bits = %i (0x%x)", + vol->mft_record_size_bits, vol->mft_record_size_bits); + clusters_per_index_record = b->clusters_per_index_record; + ntfs_debug("clusters_per_index_record = %i (0x%x)", + clusters_per_index_record, clusters_per_index_record); + if (clusters_per_index_record > 0) + vol->index_record_size = vol->cluster_size << + (ffs(clusters_per_index_record) - 1); + else + /* + * When index_record_size < cluster_size, + * clusters_per_index_record = -log2(index_record_size) bytes. + * index_record_size normaly equals 4096 bytes, which is + * encoded as 0xF4 (-12 in decimal). + */ + vol->index_record_size = 1 << -clusters_per_index_record; + vol->index_record_size_mask = vol->index_record_size - 1; + vol->index_record_size_bits = ffs(vol->index_record_size) - 1; + ntfs_debug("vol->index_record_size = %i (0x%x)", + vol->index_record_size, vol->index_record_size); + ntfs_debug("vol->index_record_size_mask = 0x%x", + vol->index_record_size_mask); + ntfs_debug("vol->index_record_size_bits = %i (0x%x)", + vol->index_record_size_bits, + vol->index_record_size_bits); + /* + * Get the size of the volume in clusters and check for 64-bit-ness. + * Windows currently only uses 32 bits to save the clusters so we do + * the same as it is much faster on 32-bit CPUs. + */ + ll = sle64_to_cpu(b->number_of_sectors) >> sectors_per_cluster_bits; + if ((u64)ll >= 1ULL << 32) { + ntfs_error(vol->sb, "Cannot handle 64-bit clusters. Sorry."); + return FALSE; + } + vol->nr_clusters = ll; + ntfs_debug("vol->nr_clusters = 0x%Lx", (long long)vol->nr_clusters); + /* + * On an architecture where unsigned long is 32-bits, we restrict the + * volume size to 2TiB (2^41). On a 64-bit architecture, the compiler + * will hopefully optimize the whole check away. + */ + if (sizeof(unsigned long) < 8) { + if ((ll << vol->cluster_size_bits) >= (1ULL << 41)) { + ntfs_error(vol->sb, "Volume size (%LuTiB) is too large " + "for this architecture. Maximim " + "supported is 2TiB. Sorry.", + ll >> (40 - vol->cluster_size_bits)); + return FALSE; + } + } + ll = sle64_to_cpu(b->mft_lcn); + if (ll >= vol->nr_clusters) { + ntfs_error(vol->sb, "MFT LCN is beyond end of volume. Weird."); + return FALSE; + } + vol->mft_lcn = ll; + ntfs_debug("vol->mft_lcn = 0x%Lx", (long long)vol->mft_lcn); + ll = sle64_to_cpu(b->mftmirr_lcn); + if (ll >= vol->nr_clusters) { + ntfs_error(vol->sb, "MFTMirr LCN is beyond end of volume. " + "Weird."); + return FALSE; + } + vol->mftmirr_lcn = ll; + ntfs_debug("vol->mftmirr_lcn = 0x%Lx", (long long)vol->mftmirr_lcn); + vol->serial_no = le64_to_cpu(b->volume_serial_number); + ntfs_debug("vol->serial_no = 0x%Lx", + (unsigned long long)vol->serial_no); + /* + * Determine MFT zone size. This is not strictly the right place to do + * this, but I am too lazy to create a function especially for it... + */ + vol->mft_zone_end = vol->nr_clusters; + switch (vol->mft_zone_multiplier) { /* % of volume size in clusters */ + case 4: + vol->mft_zone_end = vol->mft_zone_end >> 1; /* 50% */ + break; + case 3: + vol->mft_zone_end = (vol->mft_zone_end + + (vol->mft_zone_end >> 1)) >> 2; /* 37.5% */ + break; + case 2: + vol->mft_zone_end = vol->mft_zone_end >> 2; /* 25% */ + break; + default: + vol->mft_zone_multiplier = 1; + /* Fall through into case 1. */ + case 1: + vol->mft_zone_end = vol->mft_zone_end >> 3; /* 12.5% */ + break; + } + ntfs_debug("vol->mft_zone_multiplier = 0x%x", + vol->mft_zone_multiplier); + vol->mft_zone_start = vol->mft_lcn; + vol->mft_zone_end += vol->mft_lcn; + ntfs_debug("vol->mft_zone_start = 0x%Lx", + (long long)vol->mft_zone_start); + ntfs_debug("vol->mft_zone_end = 0x%Lx", (long long)vol->mft_zone_end); + /* And another misplaced defaults setting. */ + if (!vol->on_errors) + vol->on_errors = ON_ERRORS_PANIC; + return TRUE; +} + +/** + * load_and_init_upcase - load the upcase table for an ntfs volume + * @vol: ntfs super block describing device whose upcase to load + * + * Return TRUE on success or FALSE on error. + */ +static BOOL load_and_init_upcase(ntfs_volume *vol) +{ + struct super_block *sb = vol->sb; + struct inode *ino; + struct page *page; + unsigned long index, max_index; + unsigned int size; + int i, max; + + ntfs_debug("Entering."); + /* Read upcase table and setup vol->upcase and vol->upcase_len. */ + ino = ntfs_iget(sb, FILE_UpCase); + if (IS_ERR(ino) || is_bad_inode(ino)) { + if (!IS_ERR(ino)) + iput(ino); + goto upcase_failed; + } + /* + * The upcase size must not be above 64k Unicode characters, must not + * be zero and must be a multiple of sizeof(uchar_t). + */ + if (!ino->i_size || ino->i_size & (sizeof(uchar_t) - 1) || + ino->i_size > 64ULL * 1024 * sizeof(uchar_t)) + goto iput_upcase_failed; + vol->upcase = (uchar_t*)ntfs_malloc_nofs(ino->i_size); + if (!vol->upcase) + goto iput_upcase_failed; + index = 0; + max_index = ino->i_size >> PAGE_CACHE_SHIFT; + size = PAGE_CACHE_SIZE; + while (index < max_index) { + /* Read the upcase table and copy it into the linear buffer. */ +read_partial_upcase_page: + page = ntfs_map_page(ino->i_mapping, index); + if (IS_ERR(page)) + goto iput_upcase_failed; + memcpy((char*)vol->upcase + (index++ << PAGE_CACHE_SHIFT), + page_address(page), size); + ntfs_unmap_page(page); + }; + if (size == PAGE_CACHE_SIZE) { + size = ino->i_size & ~PAGE_CACHE_MASK; + if (size) + goto read_partial_upcase_page; + } + vol->upcase_len = ino->i_size >> UCHAR_T_SIZE_BITS; + ntfs_debug("Read %Lu bytes from $UpCase (expected %u bytes).", + ino->i_size, 64 * 1024 * sizeof(uchar_t)); + iput(ino); + down(&ntfs_lock); + if (!default_upcase) { + ntfs_debug("Using volume specified $UpCase since default is " + "not present."); + up(&ntfs_lock); + return TRUE; + } + max = default_upcase_len; + if (max > vol->upcase_len) + max = vol->upcase_len; + for (i = 0; i < max; i++) + if (vol->upcase[i] != default_upcase[i]) + break; + if (i == max) { + ntfs_free(vol->upcase); + vol->upcase = default_upcase; + vol->upcase_len = max; + ntfs_nr_upcase_users++; + up(&ntfs_lock); + ntfs_debug("Volume specified $UpCase matches default. Using " + "default."); + return TRUE; + } + up(&ntfs_lock); + ntfs_debug("Using volume specified $UpCase since it does not match " + "the default."); + return TRUE; +iput_upcase_failed: + iput(ino); + ntfs_free(vol->upcase); + vol->upcase = NULL; +upcase_failed: + down(&ntfs_lock); + if (default_upcase) { + vol->upcase = default_upcase; + vol->upcase_len = default_upcase_len; + ntfs_nr_upcase_users++; + up(&ntfs_lock); + ntfs_error(sb, "Failed to load $UpCase from the volume. Using " + "default."); + return TRUE; + } + up(&ntfs_lock); + ntfs_error(sb, "Failed to initialized upcase table."); + return FALSE; +} + +/** + * load_system_files - open the system files using normal functions + * @vol: ntfs super block describing device whose system files to load + * + * Open the system files with normal access functions and complete setting up + * the ntfs super block @vol. + * + * Return TRUE on success or FALSE on error. + */ +static BOOL load_system_files(ntfs_volume *vol) +{ + struct super_block *sb = vol->sb; + struct inode *tmp_ino; + MFT_RECORD *m; + VOLUME_INFORMATION *vi; + attr_search_context *ctx; + + ntfs_debug("Entering."); + + /* Get mft bitmap attribute inode. */ + vol->mftbmp_ino = ntfs_attr_iget(vol->mft_ino, AT_BITMAP, NULL, 0); + if (IS_ERR(vol->mftbmp_ino)) { + ntfs_error(sb, "Failed to load $MFT/$BITMAP attribute."); + return FALSE; + } + + /* Get mft mirror inode. */ + vol->mftmirr_ino = ntfs_iget(sb, FILE_MFTMirr); + if (IS_ERR(vol->mftmirr_ino) || is_bad_inode(vol->mftmirr_ino)) { + if (!IS_ERR(vol->mftmirr_ino)) + iput(vol->mftmirr_ino); + ntfs_error(sb, "Failed to load $MFTMirr."); + goto iput_mftbmp_err_out; + } + // FIXME: Compare mftmirr with mft and repair if appropriate and not + // a read-only mount. + + /* Read upcase table and setup vol->upcase and vol->upcase_len. */ + if (!load_and_init_upcase(vol)) + goto iput_mirr_err_out; + /* + * Get the cluster allocation bitmap inode and verify the size, no + * need for any locking at this stage as we are already running + * exclusively as we are mount in progress task. + */ + vol->lcnbmp_ino = ntfs_iget(sb, FILE_Bitmap); + if (IS_ERR(vol->lcnbmp_ino) || is_bad_inode(vol->lcnbmp_ino)) { + if (!IS_ERR(vol->lcnbmp_ino)) + iput(vol->lcnbmp_ino); + goto bitmap_failed; + } + if ((vol->nr_clusters + 7) >> 3 > vol->lcnbmp_ino->i_size) { + iput(vol->lcnbmp_ino); +bitmap_failed: + ntfs_error(sb, "Failed to load $Bitmap."); + goto iput_mirr_err_out; + } + /* + * Get the volume inode and setup our cache of the volume flags and + * version. + */ + vol->vol_ino = ntfs_iget(sb, FILE_Volume); + if (IS_ERR(vol->vol_ino) || is_bad_inode(vol->vol_ino)) { + if (!IS_ERR(vol->vol_ino)) + iput(vol->vol_ino); +volume_failed: + ntfs_error(sb, "Failed to load $Volume."); + goto iput_lcnbmp_err_out; + } + m = map_mft_record(NTFS_I(vol->vol_ino)); + if (IS_ERR(m)) { +iput_volume_failed: + iput(vol->vol_ino); + goto volume_failed; + } + if (!(ctx = get_attr_search_ctx(NTFS_I(vol->vol_ino), m))) { + ntfs_error(sb, "Failed to get attribute search context."); + goto get_ctx_vol_failed; + } + if (!lookup_attr(AT_VOLUME_INFORMATION, NULL, 0, 0, 0, NULL, 0, ctx) || + ctx->attr->non_resident || ctx->attr->flags) { +err_put_vol: + put_attr_search_ctx(ctx); +get_ctx_vol_failed: + unmap_mft_record(NTFS_I(vol->vol_ino)); + goto iput_volume_failed; + } + vi = (VOLUME_INFORMATION*)((char*)ctx->attr + + le16_to_cpu(ctx->attr->data.resident.value_offset)); + /* Some bounds checks. */ + if ((u8*)vi < (u8*)ctx->attr || (u8*)vi + + le32_to_cpu(ctx->attr->data.resident.value_length) > + (u8*)ctx->attr + le32_to_cpu(ctx->attr->length)) + goto err_put_vol; + /* Setup volume flags and version. */ + vol->vol_flags = vi->flags; + vol->major_ver = vi->major_ver; + vol->minor_ver = vi->minor_ver; + put_attr_search_ctx(ctx); + unmap_mft_record(NTFS_I(vol->vol_ino)); + printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver, + vol->minor_ver); + /* + * Get the inode for the logfile and empty it if this is a read-write + * mount. + */ + tmp_ino = ntfs_iget(sb, FILE_LogFile); + if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) { + if (!IS_ERR(tmp_ino)) + iput(tmp_ino); + ntfs_error(sb, "Failed to load $LogFile."); + // FIMXE: We only want to empty the thing so pointless bailing + // out. Can recover/ignore. + goto iput_vol_err_out; + } + // FIXME: Empty the logfile, but only if not read-only. + // FIXME: What happens if someone remounts rw? We need to empty the file + // then. We need a flag to tell us whether we have done it already. + iput(tmp_ino); + /* + * Get the inode for the attribute definitions file and parse the + * attribute definitions. + */ + tmp_ino = ntfs_iget(sb, FILE_AttrDef); + if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) { + if (!IS_ERR(tmp_ino)) + iput(tmp_ino); + ntfs_error(sb, "Failed to load $AttrDef."); + goto iput_vol_err_out; + } + // FIXME: Parse the attribute definitions. + iput(tmp_ino); + /* Get the root directory inode. */ + vol->root_ino = ntfs_iget(sb, FILE_root); + if (IS_ERR(vol->root_ino) || is_bad_inode(vol->root_ino)) { + if (!IS_ERR(vol->root_ino)) + iput(vol->root_ino); + ntfs_error(sb, "Failed to load root directory."); + goto iput_vol_err_out; + } + /* If on NTFS versions before 3.0, we are done. */ + if (vol->major_ver < 3) + return TRUE; + /* NTFS 3.0+ specific initialization. */ + /* Get the security descriptors inode. */ + vol->secure_ino = ntfs_iget(sb, FILE_Secure); + if (IS_ERR(vol->secure_ino) || is_bad_inode(vol->secure_ino)) { + if (!IS_ERR(vol->secure_ino)) + iput(vol->secure_ino); + ntfs_error(sb, "Failed to load $Secure."); + goto iput_root_err_out; + } + // FIXME: Initialize security. + /* Get the extended system files' directory inode. */ + tmp_ino = ntfs_iget(sb, FILE_Extend); + if (IS_ERR(tmp_ino) || is_bad_inode(tmp_ino)) { + if (!IS_ERR(tmp_ino)) + iput(tmp_ino); + ntfs_error(sb, "Failed to load $Extend."); + goto iput_sec_err_out; + } + // FIXME: Do something. E.g. want to delete the $UsnJrnl if exists. + // Note we might be doing this at the wrong level; we might want to + // d_alloc_root() and then do a "normal" open(2) of $Extend\$UsnJrnl + // rather than using ntfs_iget here, as we don't know the inode number + // for the files in $Extend directory. + iput(tmp_ino); + return TRUE; +iput_sec_err_out: + iput(vol->secure_ino); +iput_root_err_out: + iput(vol->root_ino); +iput_vol_err_out: + iput(vol->vol_ino); +iput_lcnbmp_err_out: + iput(vol->lcnbmp_ino); +iput_mirr_err_out: + iput(vol->mftmirr_ino); +iput_mftbmp_err_out: + iput(vol->mftbmp_ino); + return FALSE; +} + +/** + * ntfs_put_super - called by the vfs to unmount a volume + * @vfs_sb: vfs superblock of volume to unmount + * + * ntfs_put_super() is called by the VFS (from fs/super.c::do_umount()) when + * the volume is being unmounted (umount system call has been invoked) and it + * releases all inodes and memory belonging to the NTFS specific part of the + * super block. + */ +static void ntfs_put_super(struct super_block *vfs_sb) +{ + ntfs_volume *vol = NTFS_SB(vfs_sb); + + ntfs_debug("Entering."); + + iput(vol->vol_ino); + vol->vol_ino = NULL; + + /* NTFS 3.0+ specific clean up. */ + if (vol->major_ver >= 3) { + if (vol->secure_ino) { + iput(vol->secure_ino); + vol->secure_ino = NULL; + } + } + + iput(vol->root_ino); + vol->root_ino = NULL; + + down_write(&vol->lcnbmp_lock); + iput(vol->lcnbmp_ino); + vol->lcnbmp_ino = NULL; + up_write(&vol->lcnbmp_lock); + + iput(vol->mftmirr_ino); + vol->mftmirr_ino = NULL; + + down_write(&vol->mftbmp_lock); + iput(vol->mftbmp_ino); + vol->mftbmp_ino = NULL; + up_write(&vol->mftbmp_lock); + + iput(vol->mft_ino); + vol->mft_ino = NULL; + + vol->upcase_len = 0; + /* + * Decrease the number of mounts and destroy the global default upcase + * table if necessary. Also decrease the number of upcase users if we + * are a user. + */ + down(&ntfs_lock); + ntfs_nr_mounts--; + if (vol->upcase == default_upcase) { + ntfs_nr_upcase_users--; + vol->upcase = NULL; + } + if (!ntfs_nr_upcase_users && default_upcase) { + ntfs_free(default_upcase); + default_upcase = NULL; + } + if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users) + free_compression_buffers(); + up(&ntfs_lock); + if (vol->upcase) { + ntfs_free(vol->upcase); + vol->upcase = NULL; + } + if (vol->nls_map) { + unload_nls(vol->nls_map); + vol->nls_map = NULL; + } + vfs_sb->s_fs_info = NULL; + kfree(vol); + return; +} + +/** + * get_nr_free_clusters - return the number of free clusters on a volume + * @vol: ntfs volume for which to obtain free cluster count + * + * Calculate the number of free clusters on the mounted NTFS volume @vol. We + * actually calculate the number of clusters in use instead because this + * allows us to not care about partial pages as these will be just zero filled + * and hence not be counted as allocated clusters. + * + * The only particularity is that clusters beyond the end of the logical ntfs + * volume will be marked as allocated to prevent errors which means we have to + * discount those at the end. This is important as the cluster bitmap always + * has a size in multiples of 8 bytes, i.e. up to 63 clusters could be outside + * the logical volume and marked in use when they are not as they do not exist. + * + * If any pages cannot be read we assume all clusters in the erroring pages are + * in use. This means we return an underestimate on errors which is better than + * an overestimate. + */ +static s64 get_nr_free_clusters(ntfs_volume *vol) +{ + s64 nr_free = vol->nr_clusters; + u32 *kaddr; + struct address_space *mapping = vol->lcnbmp_ino->i_mapping; + filler_t *readpage = (filler_t*)mapping->a_ops->readpage; + struct page *page; + unsigned long index, max_index; + unsigned int max_size; + + ntfs_debug("Entering."); + /* Serialize accesses to the cluster bitmap. */ + down_read(&vol->lcnbmp_lock); + /* + * Convert the number of bits into bytes rounded up, then convert into + * multiples of PAGE_CACHE_SIZE, rounding up so that if we have one + * full and one partial page max_index = 2. + */ + max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + /* Use multiples of 4 bytes. */ + max_size = PAGE_CACHE_SIZE >> 2; + ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%x.", + max_index, max_size); + for (index = 0UL; index < max_index; index++) { + unsigned int i; + /* + * Read the page from page cache, getting it from backing store + * if necessary, and increment the use count. + */ + page = read_cache_page(mapping, index, (filler_t*)readpage, + NULL); + /* Ignore pages which errored synchronously. */ + if (IS_ERR(page)) { + ntfs_debug("Sync read_cache_page() error. Skipping " + "page (index 0x%lx).", index); + nr_free -= PAGE_CACHE_SIZE * 8; + continue; + } + wait_on_page_locked(page); + /* Ignore pages which errored asynchronously. */ + if (!PageUptodate(page)) { + ntfs_debug("Async read_cache_page() error. Skipping " + "page (index 0x%lx).", index); + page_cache_release(page); + nr_free -= PAGE_CACHE_SIZE * 8; + continue; + } + kaddr = (u32*)kmap_atomic(page, KM_USER0); + /* + * For each 4 bytes, subtract the number of set bits. If this + * is the last page and it is partial we don't really care as + * it just means we do a little extra work but it won't affect + * the result as all out of range bytes are set to zero by + * ntfs_readpage(). + */ + for (i = 0; i < max_size; i++) + nr_free -= (s64)hweight32(kaddr[i]); + kunmap_atomic(kaddr, KM_USER0); + page_cache_release(page); + } + ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1); + /* + * Fixup for eventual bits outside logical ntfs volume (see function + * description above). + */ + if (vol->nr_clusters & 63) + nr_free += 64 - (vol->nr_clusters & 63); + up_read(&vol->lcnbmp_lock); + /* If errors occured we may well have gone below zero, fix this. */ + if (nr_free < 0) + nr_free = 0; + ntfs_debug("Exiting."); + return nr_free; +} + +/** + * __get_nr_free_mft_records - return the number of free inodes on a volume + * @vol: ntfs volume for which to obtain free inode count + * + * Calculate the number of free mft records (inodes) on the mounted NTFS + * volume @vol. We actually calculate the number of mft records in use instead + * because this allows us to not care about partial pages as these will be just + * zero filled and hence not be counted as allocated mft record. + * + * If any pages cannot be read we assume all mft records in the erroring pages + * are in use. This means we return an underestimate on errors which is better + * than an overestimate. + * + * NOTE: Caller must hold mftbmp_lock rw_semaphore for reading or writing. + */ +static unsigned long __get_nr_free_mft_records(ntfs_volume *vol) +{ + s64 nr_free = vol->nr_mft_records; + u32 *kaddr; + struct address_space *mapping = vol->mftbmp_ino->i_mapping; + filler_t *readpage = (filler_t*)mapping->a_ops->readpage; + struct page *page; + unsigned long index, max_index; + unsigned int max_size; + + ntfs_debug("Entering."); + /* + * Convert the number of bits into bytes rounded up, then convert into + * multiples of PAGE_CACHE_SIZE, rounding up so that if we have one + * full and one partial page max_index = 2. + */ + max_index = (((vol->nr_mft_records + 7) >> 3) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + /* Use multiples of 4 bytes. */ + max_size = PAGE_CACHE_SIZE >> 2; + ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = " + "0x%x.", max_index, max_size); + for (index = 0UL; index < max_index; index++) { + unsigned int i; + /* + * Read the page from page cache, getting it from backing store + * if necessary, and increment the use count. + */ + page = read_cache_page(mapping, index, (filler_t*)readpage, + NULL); + /* Ignore pages which errored synchronously. */ + if (IS_ERR(page)) { + ntfs_debug("Sync read_cache_page() error. Skipping " + "page (index 0x%lx).", index); + nr_free -= PAGE_CACHE_SIZE * 8; + continue; + } + wait_on_page_locked(page); + /* Ignore pages which errored asynchronously. */ + if (!PageUptodate(page)) { + ntfs_debug("Async read_cache_page() error. Skipping " + "page (index 0x%lx).", index); + page_cache_release(page); + nr_free -= PAGE_CACHE_SIZE * 8; + continue; + } + kaddr = (u32*)kmap_atomic(page, KM_USER0); + /* + * For each 4 bytes, subtract the number of set bits. If this + * is the last page and it is partial we don't really care as + * it just means we do a little extra work but it won't affect + * the result as all out of range bytes are set to zero by + * ntfs_readpage(). + */ + for (i = 0; i < max_size; i++) + nr_free -= (s64)hweight32(kaddr[i]); + kunmap_atomic(kaddr, KM_USER0); + page_cache_release(page); + } + ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.", + index - 1); + /* If errors occured we may well have gone below zero, fix this. */ + if (nr_free < 0) + nr_free = 0; + ntfs_debug("Exiting."); + return nr_free; +} + +/** + * ntfs_statfs - return information about mounted NTFS volume + * @sb: super block of mounted volume + * @sfs: statfs structure in which to return the information + * + * Return information about the mounted NTFS volume @sb in the statfs structure + * pointed to by @sfs (this is initialized with zeros before ntfs_statfs is + * called). We interpret the values to be correct of the moment in time at + * which we are called. Most values are variable otherwise and this isn't just + * the free values but the totals as well. For example we can increase the + * total number of file nodes if we run out and we can keep doing this until + * there is no more space on the volume left at all. + * + * Called from vfs_statfs which is used to handle the statfs, fstatfs, and + * ustat system calls. + * + * Return 0 on success or -errno on error. + */ +static int ntfs_statfs(struct super_block *sb, struct kstatfs *sfs) +{ + ntfs_volume *vol = NTFS_SB(sb); + s64 size; + + ntfs_debug("Entering."); + /* Type of filesystem. */ + sfs->f_type = NTFS_SB_MAGIC; + /* Optimal transfer block size. */ + sfs->f_bsize = PAGE_CACHE_SIZE; + /* + * Total data blocks in file system in units of f_bsize and since + * inodes are also stored in data blocs ($MFT is a file) this is just + * the total clusters. + */ + sfs->f_blocks = vol->nr_clusters << vol->cluster_size_bits >> + PAGE_CACHE_SHIFT; + /* Free data blocks in file system in units of f_bsize. */ + size = get_nr_free_clusters(vol) << vol->cluster_size_bits >> + PAGE_CACHE_SHIFT; + if (size < 0LL) + size = 0LL; + /* Free blocks avail to non-superuser, same as above on NTFS. */ + sfs->f_bavail = sfs->f_bfree = size; + /* Serialize accesses to the inode bitmap. */ + down_read(&vol->mftbmp_lock); + /* Total file nodes in file system (at this moment in time). */ + sfs->f_files = vol->mft_ino->i_size >> vol->mft_record_size_bits; + /* Free file nodes in fs (based on current total count). */ + sfs->f_ffree = __get_nr_free_mft_records(vol); + up_read(&vol->mftbmp_lock); + /* + * File system id. This is extremely *nix flavour dependent and even + * within Linux itself all fs do their own thing. I interpret this to + * mean a unique id associated with the mounted fs and not the id + * associated with the file system driver, the latter is already given + * by the file system type in sfs->f_type. Thus we use the 64-bit + * volume serial number splitting it into two 32-bit parts. We enter + * the least significant 32-bits in f_fsid[0] and the most significant + * 32-bits in f_fsid[1]. + */ + sfs->f_fsid.val[0] = vol->serial_no & 0xffffffff; + sfs->f_fsid.val[1] = (vol->serial_no >> 32) & 0xffffffff; + /* Maximum length of filenames. */ + sfs->f_namelen = NTFS_MAX_NAME_LEN; + return 0; +} + +/** + * Super operations for mount time when we don't have enough setup to use the + * proper functions. + */ +struct super_operations ntfs_mount_sops = { + .alloc_inode = ntfs_alloc_big_inode, /* VFS: Allocate new inode. */ + .destroy_inode = ntfs_destroy_big_inode, /* VFS: Deallocate inode. */ + .read_inode = ntfs_read_inode_mount, /* VFS: Load inode from disk, + called from iget(). */ + .clear_inode = ntfs_clear_big_inode, /* VFS: Called when inode is + removed from memory. */ +}; + +/** + * The complete super operations. + */ +struct super_operations ntfs_sops = { + .alloc_inode = ntfs_alloc_big_inode, /* VFS: Allocate new inode. */ + .destroy_inode = ntfs_destroy_big_inode, /* VFS: Deallocate inode. */ + //.dirty_inode = ntfs_dirty_inode, /* VFS: Called from + // __mark_inode_dirty(). */ + //.write_inode = NULL, /* VFS: Write dirty inode to disk. */ + .put_inode = ntfs_put_inode, /* VFS: Called just before the inode + reference count is decreased. */ + //.delete_inode = NULL, /* VFS: Delete inode from disk. Called + // when i_count becomes 0 and i_nlink + // is also 0. */ + .put_super = ntfs_put_super, /* Syscall: umount. */ + //write_super = NULL, /* Flush dirty super block to disk. */ + //write_super_lockfs = NULL, /* ? */ + //unlockfs = NULL, /* ? */ + .statfs = ntfs_statfs, /* Syscall: statfs */ + .remount_fs = ntfs_remount, /* Syscall: mount -o remount. */ + .clear_inode = ntfs_clear_big_inode, /* VFS: Called when an inode is + removed from memory. */ + //.umount_begin = NULL, /* Forced umount. */ + .show_options = ntfs_show_options, /* Show mount options in proc. */ +}; + +/** + * ntfs_fill_super - mount an ntfs files system + * @sb: super block of ntfs file system to mount + * @opt: string containing the mount options + * @silent: silence error output + * + * ntfs_fill_super() is called by the VFS to mount the device described by @sb + * with the mount otions in @data with the NTFS file system. + * + * If @silent is true, remain silent even if errors are detected. This is used + * during bootup, when the kernel tries to mount the root file system with all + * registered file systems one after the other until one succeeds. This implies + * that all file systems except the correct one will quite correctly and + * expectedly return an error, but nobody wants to see error messages when in + * fact this is what is supposed to happen. + * + * NOTE: @sb->s_flags contains the mount options flags. + */ +static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) +{ + ntfs_volume *vol; + struct buffer_head *bh; + struct inode *tmp_ino; + int result; + + ntfs_debug("Entering."); +#ifndef NTFS_RW + sb->s_flags |= MS_RDONLY | MS_NOATIME | MS_NODIRATIME; +#endif + /* Allocate a new ntfs_volume and place it in sb->s_fs_info. */ + sb->s_fs_info = kmalloc(sizeof(ntfs_volume), GFP_NOFS); + vol = NTFS_SB(sb); + if (!vol) { + if (!silent) + ntfs_error(sb, "Allocation of NTFS volume structure " + "failed. Aborting mount..."); + return -ENOMEM; + } + /* Initialize ntfs_volume structure. */ + memset(vol, 0, sizeof(ntfs_volume)); + vol->sb = sb; + vol->upcase = NULL; + vol->mft_ino = NULL; + vol->mftbmp_ino = NULL; + init_rwsem(&vol->mftbmp_lock); + vol->mftmirr_ino = NULL; + vol->lcnbmp_ino = NULL; + init_rwsem(&vol->lcnbmp_lock); + vol->vol_ino = NULL; + vol->root_ino = NULL; + vol->secure_ino = NULL; + vol->uid = vol->gid = 0; + vol->flags = 0; + vol->on_errors = 0; + vol->mft_zone_multiplier = 0; + vol->nls_map = NULL; + + /* + * Default is group and other don't have any access to files or + * directories while owner has full access. Further, files by default + * are not executable but directories are of course browseable. + */ + vol->fmask = 0177; + vol->dmask = 0077; + + /* Important to get the mount options dealt with now. */ + if (!parse_options(vol, (char*)opt)) + goto err_out_now; + + /* + * TODO: Fail safety check. In the future we should really be able to + * cope with this being the case, but for now just bail out. + */ + if (bdev_hardsect_size(sb->s_bdev) > NTFS_BLOCK_SIZE) { + if (!silent) + ntfs_error(sb, "Device has unsupported hardsect_size."); + goto err_out_now; + } + + /* Setup the device access block size to NTFS_BLOCK_SIZE. */ + if (sb_set_blocksize(sb, NTFS_BLOCK_SIZE) != NTFS_BLOCK_SIZE) { + if (!silent) + ntfs_error(sb, "Unable to set block size."); + goto err_out_now; + } + + /* Get the size of the device in units of NTFS_BLOCK_SIZE bytes. */ + vol->nr_blocks = sb->s_bdev->bd_inode->i_size >> NTFS_BLOCK_SIZE_BITS; + + /* Read the boot sector and return unlocked buffer head to it. */ + if (!(bh = read_ntfs_boot_sector(sb, silent))) { + if (!silent) + ntfs_error(sb, "Not an NTFS volume."); + goto err_out_now; + } + + /* + * Extract the data from the boot sector and setup the ntfs super block + * using it. + */ + result = parse_ntfs_boot_sector(vol, (NTFS_BOOT_SECTOR*)bh->b_data); + + brelse(bh); + + if (!result) { + if (!silent) + ntfs_error(sb, "Unsupported NTFS filesystem."); + goto err_out_now; + } + + /* + * TODO: When we start coping with sector sizes different from + * NTFS_BLOCK_SIZE, we now probably need to set the blocksize of the + * device (probably to NTFS_BLOCK_SIZE). + */ + + /* Setup remaining fields in the super block. */ + sb->s_magic = NTFS_SB_MAGIC; + + /* + * Ntfs allows 63 bits for the file size, i.e. correct would be: + * sb->s_maxbytes = ~0ULL >> 1; + * But the kernel uses a long as the page cache page index which on + * 32-bit architectures is only 32-bits. MAX_LFS_FILESIZE is kernel + * defined to the maximum the page cache page index can cope with + * without overflowing the index or to 2^63 - 1, whichever is smaller. + */ + sb->s_maxbytes = MAX_LFS_FILESIZE; + + /* + * Now load the metadata required for the page cache and our address + * space operations to function. We do this by setting up a specialised + * read_inode method and then just calling the normal iget() to obtain + * the inode for $MFT which is sufficient to allow our normal inode + * operations and associated address space operations to function. + */ + /* + * Poison vol->mft_ino so we know whether iget() called into our + * ntfs_read_inode_mount() method. + */ +#define OGIN ((struct inode*)le32_to_cpu(0x4e49474f)) /* OGIN */ + vol->mft_ino = OGIN; + sb->s_op = &ntfs_mount_sops; + tmp_ino = iget(vol->sb, FILE_MFT); + if (!tmp_ino || tmp_ino != vol->mft_ino || is_bad_inode(tmp_ino)) { + if (!silent) + ntfs_error(sb, "Failed to load essential metadata."); + if (tmp_ino && vol->mft_ino == OGIN) + ntfs_error(sb, "BUG: iget() did not call " + "ntfs_read_inode_mount() method!\n"); + if (!tmp_ino) + goto cond_iput_mft_ino_err_out_now; + goto iput_tmp_ino_err_out_now; + } + /* + * Note: sb->s_op has already been set to &ntfs_sops by our specialized + * ntfs_read_inode_mount() method when it was invoked by iget(). + */ + down(&ntfs_lock); + /* + * The current mount is a compression user if the cluster size is + * less than or equal 4kiB. + */ + if (vol->cluster_size <= 4096 && !ntfs_nr_compression_users++) { + result = allocate_compression_buffers(); + if (result) { + ntfs_error(NULL, "Failed to allocate buffers " + "for compression engine."); + ntfs_nr_compression_users--; + up(&ntfs_lock); + goto iput_tmp_ino_err_out_now; + } + } + /* + * Increment the number of mounts and generate the global default + * upcase table if necessary. Also temporarily increment the number of + * upcase users to avoid race conditions with concurrent (u)mounts. + */ + if (!ntfs_nr_mounts++) + default_upcase = generate_default_upcase(); + ntfs_nr_upcase_users++; + + up(&ntfs_lock); + /* + * From now on, ignore @silent parameter. If we fail below this line, + * it will be due to a corrupt fs or a system error, so we report it. + */ + /* + * Open the system files with normal access functions and complete + * setting up the ntfs super block. + */ + if (!load_system_files(vol)) { + ntfs_error(sb, "Failed to load system files."); + goto unl_upcase_iput_tmp_ino_err_out_now; + } + if ((sb->s_root = d_alloc_root(vol->root_ino))) { + /* We increment i_count simulating an ntfs_iget(). */ + atomic_inc(&vol->root_ino->i_count); + ntfs_debug("Exiting, status successful."); + /* Release the default upcase if it has no users. */ + down(&ntfs_lock); + if (!--ntfs_nr_upcase_users && default_upcase) { + ntfs_free(default_upcase); + default_upcase = NULL; + } + up(&ntfs_lock); + return 0; + } + ntfs_error(sb, "Failed to allocate root directory."); + /* Clean up after the successful load_system_files() call from above. */ + iput(vol->vol_ino); + vol->vol_ino = NULL; + /* NTFS 3.0+ specific clean up. */ + if (vol->major_ver >= 3) { + iput(vol->secure_ino); + vol->secure_ino = NULL; + } + iput(vol->root_ino); + vol->root_ino = NULL; + iput(vol->lcnbmp_ino); + vol->lcnbmp_ino = NULL; + iput(vol->mftmirr_ino); + vol->mftmirr_ino = NULL; + iput(vol->mftbmp_ino); + vol->mftbmp_ino = NULL; + vol->upcase_len = 0; + if (vol->upcase != default_upcase) + ntfs_free(vol->upcase); + vol->upcase = NULL; + if (vol->nls_map) { + unload_nls(vol->nls_map); + vol->nls_map = NULL; + } + /* Error exit code path. */ +unl_upcase_iput_tmp_ino_err_out_now: + /* + * Decrease the number of mounts and destroy the global default upcase + * table if necessary. + */ + down(&ntfs_lock); + ntfs_nr_mounts--; + if (!--ntfs_nr_upcase_users && default_upcase) { + ntfs_free(default_upcase); + default_upcase = NULL; + } + if (vol->cluster_size <= 4096 && !--ntfs_nr_compression_users) + free_compression_buffers(); + up(&ntfs_lock); +iput_tmp_ino_err_out_now: + iput(tmp_ino); +cond_iput_mft_ino_err_out_now: + if (vol->mft_ino && vol->mft_ino != OGIN && vol->mft_ino != tmp_ino) { + iput(vol->mft_ino); + vol->mft_ino = NULL; + } +#undef OGIN + /* + * This is needed to get ntfs_clear_extent_inode() called for each + * inode we have ever called ntfs_iget()/iput() on, otherwise we A) + * leak resources and B) a subsequent mount fails automatically due to + * ntfs_iget() never calling down into our ntfs_read_locked_inode() + * method again... FIXME: Do we need to do this twice now because of + * attribute inodes? I think not, so leave as is for now... (AIA) + */ + if (invalidate_inodes(sb)) { + ntfs_error(sb, "Busy inodes left. This is most likely a NTFS " + "driver bug."); + /* Copied from fs/super.c. I just love this message. (-; */ + printk("NTFS: Busy inodes after umount. Self-destruct in 5 " + "seconds. Have a nice day...\n"); + } + /* Errors at this stage are irrelevant. */ +err_out_now: + sb->s_fs_info = NULL; + kfree(vol); + ntfs_debug("Failed, returning -EINVAL."); + return -EINVAL; +} + +/* + * This is a slab cache to optimize allocations and deallocations of Unicode + * strings of the maximum length allowed by NTFS, which is NTFS_MAX_NAME_LEN + * (255) Unicode characters + a terminating NULL Unicode character. + */ +kmem_cache_t *ntfs_name_cache; + +/* Slab caches for efficient allocation/deallocation of of inodes. */ +kmem_cache_t *ntfs_inode_cache; +kmem_cache_t *ntfs_big_inode_cache; + +/* Init once constructor for the inode slab cache. */ +static void ntfs_big_inode_init_once(void *foo, kmem_cache_t *cachep, + unsigned long flags) +{ + ntfs_inode *ni = (ntfs_inode *)foo; + + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) + inode_init_once(VFS_I(ni)); +} + +/* + * Slab cache to optimize allocations and deallocations of attribute search + * contexts. + */ +kmem_cache_t *ntfs_attr_ctx_cache; + +/* A global default upcase table and a corresponding reference count. */ +wchar_t *default_upcase = NULL; +unsigned long ntfs_nr_upcase_users = 0; + +/* The number of mounted filesystems. */ +unsigned long ntfs_nr_mounts = 0; + +/* Driver wide semaphore. */ +DECLARE_MUTEX(ntfs_lock); + +static struct super_block *ntfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super); +} + +static struct file_system_type ntfs_fs_type = { + .owner = THIS_MODULE, + .name = "ntfs", + .get_sb = ntfs_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +/* Stable names for the slab caches. */ +static const char ntfs_attr_ctx_cache_name[] = "ntfs_attr_ctx_cache"; +static const char ntfs_name_cache_name[] = "ntfs_name_cache"; +static const char ntfs_inode_cache_name[] = "ntfs_inode_cache"; +static const char ntfs_big_inode_cache_name[] = "ntfs_big_inode_cache"; + +static int __init init_ntfs_fs(void) +{ + int err = 0; + + /* This may be ugly but it results in pretty output so who cares. (-8 */ + printk(KERN_INFO "NTFS driver " NTFS_VERSION " [Flags: R/" +#ifdef NTFS_RW + "W" +#else + "O" +#endif +#ifdef DEBUG + " DEBUG" +#endif +#ifdef MODULE + " MODULE" +#endif + "].\n"); + + ntfs_debug("Debug messages are enabled."); + + ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name, + sizeof(attr_search_context), 0 /* offset */, + SLAB_HWCACHE_ALIGN, NULL /* ctor */, NULL /* dtor */); + if (!ntfs_attr_ctx_cache) { + printk(KERN_CRIT "NTFS: Failed to create %s!\n", + ntfs_attr_ctx_cache_name); + goto ctx_err_out; + } + + ntfs_name_cache = kmem_cache_create(ntfs_name_cache_name, + (NTFS_MAX_NAME_LEN+1) * sizeof(uchar_t), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!ntfs_name_cache) { + printk(KERN_CRIT "NTFS: Failed to create %s!\n", + ntfs_name_cache_name); + goto name_err_out; + } + + ntfs_inode_cache = kmem_cache_create(ntfs_inode_cache_name, + sizeof(ntfs_inode), 0, + SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, NULL, NULL); + if (!ntfs_inode_cache) { + printk(KERN_CRIT "NTFS: Failed to create %s!\n", + ntfs_inode_cache_name); + goto inode_err_out; + } + + ntfs_big_inode_cache = kmem_cache_create(ntfs_big_inode_cache_name, + sizeof(big_ntfs_inode), 0, + SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, + ntfs_big_inode_init_once, NULL); + if (!ntfs_big_inode_cache) { + printk(KERN_CRIT "NTFS: Failed to create %s!\n", + ntfs_big_inode_cache_name); + goto big_inode_err_out; + } + + /* Register the ntfs sysctls. */ + err = ntfs_sysctl(1); + if (err) { + printk(KERN_CRIT "NTFS: Failed to register NTFS sysctls!\n"); + goto sysctl_err_out; + } + + err = register_filesystem(&ntfs_fs_type); + if (!err) { + ntfs_debug("NTFS driver registered successfully."); + return 0; /* Success! */ + } + printk(KERN_CRIT "NTFS: Failed to register NTFS file system driver!\n"); + +sysctl_err_out: + kmem_cache_destroy(ntfs_big_inode_cache); +big_inode_err_out: + kmem_cache_destroy(ntfs_inode_cache); +inode_err_out: + kmem_cache_destroy(ntfs_name_cache); +name_err_out: + kmem_cache_destroy(ntfs_attr_ctx_cache); +ctx_err_out: + if (!err) { + printk(KERN_CRIT "NTFS: Aborting NTFS file system driver " + "registration...\n"); + err = -ENOMEM; + } + return err; +} + +static void __exit exit_ntfs_fs(void) +{ + int err = 0; + + ntfs_debug("Unregistering NTFS driver."); + + unregister_filesystem(&ntfs_fs_type); + + if (kmem_cache_destroy(ntfs_big_inode_cache) && (err = 1)) + printk(KERN_CRIT "NTFS: Failed to destory %s.\n", + ntfs_big_inode_cache_name); + if (kmem_cache_destroy(ntfs_inode_cache) && (err = 1)) + printk(KERN_CRIT "NTFS: Failed to destory %s.\n", + ntfs_inode_cache_name); + if (kmem_cache_destroy(ntfs_name_cache) && (err = 1)) + printk(KERN_CRIT "NTFS: Failed to destory %s.\n", + ntfs_name_cache_name); + if (kmem_cache_destroy(ntfs_attr_ctx_cache) && (err = 1)) + printk(KERN_CRIT "NTFS: Failed to destory %s.\n", + ntfs_attr_ctx_cache_name); + if (err) + printk(KERN_CRIT "NTFS: This causes memory to leak! There is " + "probably a BUG in the driver! Please report " + "you saw this message to " + "linux-ntfs-dev@lists.sf.net\n"); + /* Unregister the ntfs sysctls. */ + ntfs_sysctl(0); +} + +MODULE_AUTHOR("Anton Altaparmakov "); +MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2003 Anton Altaparmakov"); +MODULE_LICENSE("GPL"); +#ifdef DEBUG +MODULE_PARM(debug_msgs, "i"); +MODULE_PARM_DESC(debug_msgs, "Enable debug messages."); +#endif + +module_init(init_ntfs_fs) +module_exit(exit_ntfs_fs) + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/sysctl.c b/reactos/drivers/fs/ntfs/linux-ntfs/sysctl.c new file mode 100644 index 00000000000..0c5fe3076e8 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/sysctl.c @@ -0,0 +1,86 @@ +/* + * sysctl.c - Code for sysctl handling in NTFS Linux kernel driver. Part of + * the Linux-NTFS project. Adapted from the old NTFS driver, + * Copyright (C) 1997 Martin von Löwis, Régis Duchesne. + * + * Copyright (c) 2002 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifdef DEBUG + +#include + +#ifdef CONFIG_SYSCTL + +#include +#include + +#include "sysctl.h" +#include "debug.h" + +#define FS_NTFS 1 + +/* Definition of the ntfs sysctl. */ +static ctl_table ntfs_sysctls[] = { + { FS_NTFS, "ntfs-debug", /* Binary and text IDs. */ + &debug_msgs,sizeof(debug_msgs), /* Data pointer and size. */ + 0644, NULL, &proc_dointvec }, /* Mode, child, proc handler. */ + { 0 } +}; + +/* Define the parent directory /proc/sys/fs. */ +static ctl_table sysctls_root[] = { + { CTL_FS, "fs", NULL, 0, 0555, ntfs_sysctls }, + { 0 } +}; + +/* Storage for the sysctls header. */ +static struct ctl_table_header *sysctls_root_table = NULL; + +/** + * ntfs_sysctl - add or remove the debug sysctl + * @add: add (1) or remove (0) the sysctl + * + * Add or remove the debug sysctl. Return 0 on success or -errno on error. + */ +int ntfs_sysctl(int add) +{ + if (add) { + BUG_ON(sysctls_root_table); + sysctls_root_table = register_sysctl_table(sysctls_root, 0); + if (!sysctls_root_table) + return -ENOMEM; +#ifdef CONFIG_PROC_FS + /* + * If the proc file system is in use and we are a module, need + * to set the owner of our proc entry to our module. In the + * non-modular case, THIS_MODULE is NULL, so this is ok. + */ + ntfs_sysctls[0].de->owner = THIS_MODULE; +#endif + } else { + BUG_ON(!sysctls_root_table); + unregister_sysctl_table(sysctls_root_table); + sysctls_root_table = NULL; + } + return 0; +} + +#endif /* CONFIG_SYSCTL */ +#endif /* DEBUG */ + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/sysctl.h b/reactos/drivers/fs/ntfs/linux-ntfs/sysctl.h new file mode 100644 index 00000000000..2531d758bfd --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/sysctl.h @@ -0,0 +1,43 @@ +/* + * sysctl.h - Defines for sysctl handling in NTFS Linux kernel driver. Part of + * the Linux-NTFS project. Adapted from the old NTFS driver, + * Copyright (C) 1997 Martin von Löwis, Régis Duchesne. + * + * Copyright (c) 2002 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_SYSCTL_H +#define _LINUX_NTFS_SYSCTL_H + +#include + +#if (DEBUG && CONFIG_SYSCTL) + +extern int ntfs_sysctl(int add); + +#else + +/* Just return success. */ +static inline int ntfs_sysctl(int add) +{ + return 0; +} + +#endif /* DEBUG && CONFIG_SYSCTL */ +#endif /* _LINUX_NTFS_SYSCTL_H */ + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/time.c b/reactos/drivers/fs/ntfs/linux-ntfs/time.c new file mode 100644 index 00000000000..73e4b56585f --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/time.c @@ -0,0 +1,82 @@ +/* + * time.c - NTFS time conversion functions. Part of the Linux-NTFS project. + * + * Copyright (c) 2001 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include /* For CURRENT_TIME. */ +#include /* For do_div(). */ + +#include "ntfs.h" + +#define NTFS_TIME_OFFSET ((s64)(369 * 365 + 89) * 24 * 3600 * 10000000) + +/** + * utc2ntfs - convert Linux time to NTFS time + * @time: Linux time to convert to NTFS + * + * Convert the Linux time @time to its corresponding NTFS time and return that + * in little endian format. + * + * Linux stores time in a long at present and measures it as the number of + * 1-second intervals since 1st January 1970, 00:00:00 UTC. + * + * NTFS uses Microsoft's standard time format which is stored in a s64 and is + * measured as the number of 100 nano-second intervals since 1st January 1601, + * 00:00:00 UTC. + */ +inline s64 utc2ntfs(const time_t time) +{ + /* Convert to 100ns intervals and then add the NTFS time offset. */ + return cpu_to_sle64((s64)time * 10000000 + NTFS_TIME_OFFSET); +} + +/** + * get_current_ntfs_time - get the current time in little endian NTFS format + * + * Get the current time from the Linux kernel, convert it to its corresponding + * NTFS time and return that in little endian format. + */ +inline s64 get_current_ntfs_time(void) +{ + /* ignores leap second */ + return utc2ntfs(get_seconds()) + xtime.tv_nsec/1000; +} + +/** + * ntfs2utc - convert NTFS time to Linux time + * @time: NTFS time (little endian) to convert to Linux + * + * Convert the little endian NTFS time @time to its corresponding Linux time + * and return that in cpu format. + * + * Linux stores time in a long at present and measures it as the number of + * 1-second intervals since 1st January 1970, 00:00:00 UTC. + * + * NTFS uses Microsoft's standard time format which is stored in a s64 and is + * measured as the number of 100 nano-second intervals since 1st January 1601, + * 00:00:00 UTC. + */ +inline time_t ntfs2utc(const s64 time) +{ + /* Subtract the NTFS time offset, then convert to 1s intervals. */ + s64 t = sle64_to_cpu(time) - NTFS_TIME_OFFSET; + do_div(t, 10000000); + return (time_t)t; +} + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/types.h b/reactos/drivers/fs/ntfs/linux-ntfs/types.h new file mode 100644 index 00000000000..12c80e32390 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/types.h @@ -0,0 +1,84 @@ +/* + * types.h - Defines for NTFS Linux kernel driver specific types. + * Part of the Linux-NTFS project. + * + * Copyright (c) 2001,2002 Anton Altaparmakov. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_TYPES_H +#define _LINUX_NTFS_TYPES_H + +#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96) +#define SN(X) X /* Struct Name */ +#define SC(P,N) P.N /* ShortCut: Prefix, Name */ +#else +#define SN(X) +#define SC(P,N) N +#endif + +/* 2-byte Unicode character type. */ +typedef u16 uchar_t; +#define UCHAR_T_SIZE_BITS 1 + +/* + * Clusters are signed 64-bit values on NTFS volumes. We define two types, LCN + * and VCN, to allow for type checking and better code readability. + */ +typedef s64 VCN; +typedef s64 LCN; + +/** + * run_list_element - in memory vcn to lcn mapping array element + * @vcn: starting vcn of the current array element + * @lcn: starting lcn of the current array element + * @length: length in clusters of the current array element + * + * The last vcn (in fact the last vcn + 1) is reached when length == 0. + * + * When lcn == -1 this means that the count vcns starting at vcn are not + * physically allocated (i.e. this is a hole / data is sparse). + */ +typedef struct { /* In memory vcn to lcn mapping structure element. */ + VCN vcn; /* vcn = Starting virtual cluster number. */ + LCN lcn; /* lcn = Starting logical cluster number. */ + s64 length; /* Run length in clusters. */ +} run_list_element; + +/** + * run_list - in memory vcn to lcn mapping array including a read/write lock + * @rl: pointer to an array of run list elements + * @lock: read/write spinlock for serializing access to @rl + * + */ +typedef struct { + run_list_element *rl; + struct rw_semaphore lock; +} run_list; + +typedef enum { + FALSE = 0, + TRUE = 1 +} BOOL; + +typedef enum { + CASE_SENSITIVE = 0, + IGNORE_CASE = 1, +} IGNORE_CASE_BOOL; + +#endif /* _LINUX_NTFS_TYPES_H */ + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/unistr.c b/reactos/drivers/fs/ntfs/linux-ntfs/unistr.c new file mode 100644 index 00000000000..ab999db047f --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/unistr.c @@ -0,0 +1,383 @@ +/* + * unistr.c - NTFS Unicode string handling. Part of the Linux-NTFS project. + * + * Copyright (c) 2001-2003 Anton Altaparmakov + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "ntfs.h" + +/* + * IMPORTANT + * ========= + * + * All these routines assume that the Unicode characters are in little endian + * encoding inside the strings!!! + */ + +/* + * This is used by the name collation functions to quickly determine what + * characters are (in)valid. + */ +static const u8 legal_ansi_char_array[0x40] = { + 0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + + 0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17, + 0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00, + + 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, + 0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18, +}; + +/** + * ntfs_are_names_equal - compare two Unicode names for equality + * @s1: name to compare to @s2 + * @s1_len: length in Unicode characters of @s1 + * @s2: name to compare to @s1 + * @s2_len: length in Unicode characters of @s2 + * @ic: ignore case bool + * @upcase: upcase table (only if @ic == IGNORE_CASE) + * @upcase_size: length in Unicode characters of @upcase (if present) + * + * Compare the names @s1 and @s2 and return TRUE (1) if the names are + * identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE, + * the @upcase table is used to performa a case insensitive comparison. + */ +BOOL ntfs_are_names_equal(const uchar_t *s1, size_t s1_len, + const uchar_t *s2, size_t s2_len, + const IGNORE_CASE_BOOL ic, + const uchar_t *upcase, const u32 upcase_size) +{ + if (s1_len != s2_len) + return FALSE; + if (ic == CASE_SENSITIVE) + return !ntfs_ucsncmp(s1, s2, s1_len); + return !ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size); +} + +/** + * ntfs_collate_names - collate two Unicode names + * @name1: first Unicode name to compare + * @name2: second Unicode name to compare + * @err_val: if @name1 contains an invalid character return this value + * @ic: either CASE_SENSITIVE or IGNORE_CASE + * @upcase: upcase table (ignored if @ic is CASE_SENSITIVE) + * @upcase_len: upcase table size (ignored if @ic is CASE_SENSITIVE) + * + * ntfs_collate_names collates two Unicode names and returns: + * + * -1 if the first name collates before the second one, + * 0 if the names match, + * 1 if the second name collates before the first one, or + * @err_val if an invalid character is found in @name1 during the comparison. + * + * The following characters are considered invalid: '"', '*', '<', '>' and '?'. + */ +int ntfs_collate_names(const uchar_t *name1, const u32 name1_len, + const uchar_t *name2, const u32 name2_len, + const int err_val, const IGNORE_CASE_BOOL ic, + const uchar_t *upcase, const u32 upcase_len) +{ + u32 cnt, min_len; + uchar_t c1, c2; + + min_len = name1_len; + if (name1_len > name2_len) + min_len = name2_len; + for (cnt = 0; cnt < min_len; ++cnt) { + c1 = le16_to_cpu(*name1++); + c2 = le16_to_cpu(*name2++); + if (ic) { + if (c1 < upcase_len) + c1 = le16_to_cpu(upcase[c1]); + if (c2 < upcase_len) + c2 = le16_to_cpu(upcase[c2]); + } + if (c1 < 64 && legal_ansi_char_array[c1] & 8) + return err_val; + if (c1 < c2) + return -1; + if (c1 > c2) + return 1; + } + if (name1_len < name2_len) + return -1; + if (name1_len == name2_len) + return 0; + /* name1_len > name2_len */ + c1 = le16_to_cpu(*name1); + if (c1 < 64 && legal_ansi_char_array[c1] & 8) + return err_val; + return 1; +} + +/** + * ntfs_ucsncmp - compare two little endian Unicode strings + * @s1: first string + * @s2: second string + * @n: maximum unicode characters to compare + * + * Compare the first @n characters of the Unicode strings @s1 and @s2, + * The strings in little endian format and appropriate le16_to_cpu() + * conversion is performed on non-little endian machines. + * + * The function returns an integer less than, equal to, or greater than zero + * if @s1 (or the first @n Unicode characters thereof) is found, respectively, + * to be less than, to match, or be greater than @s2. + */ +int ntfs_ucsncmp(const uchar_t *s1, const uchar_t *s2, size_t n) +{ + uchar_t c1, c2; + size_t i; + + for (i = 0; i < n; ++i) { + c1 = le16_to_cpu(s1[i]); + c2 = le16_to_cpu(s2[i]); + if (c1 < c2) + return -1; + if (c1 > c2) + return 1; + if (!c1) + break; + } + return 0; +} + +/** + * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case + * @s1: first string + * @s2: second string + * @n: maximum unicode characters to compare + * @upcase: upcase table + * @upcase_size: upcase table size in Unicode characters + * + * Compare the first @n characters of the Unicode strings @s1 and @s2, + * ignoring case. The strings in little endian format and appropriate + * le16_to_cpu() conversion is performed on non-little endian machines. + * + * Each character is uppercased using the @upcase table before the comparison. + * + * The function returns an integer less than, equal to, or greater than zero + * if @s1 (or the first @n Unicode characters thereof) is found, respectively, + * to be less than, to match, or be greater than @s2. + */ +int ntfs_ucsncasecmp(const uchar_t *s1, const uchar_t *s2, size_t n, + const uchar_t *upcase, const u32 upcase_size) +{ + uchar_t c1, c2; + size_t i; + + for (i = 0; i < n; ++i) { + if ((c1 = le16_to_cpu(s1[i])) < upcase_size) + c1 = le16_to_cpu(upcase[c1]); + if ((c2 = le16_to_cpu(s2[i])) < upcase_size) + c2 = le16_to_cpu(upcase[c2]); + if (c1 < c2) + return -1; + if (c1 > c2) + return 1; + if (!c1) + break; + } + return 0; +} + +void ntfs_upcase_name(uchar_t *name, u32 name_len, const uchar_t *upcase, + const u32 upcase_len) +{ + u32 i; + uchar_t u; + + for (i = 0; i < name_len; i++) + if ((u = le16_to_cpu(name[i])) < upcase_len) + name[i] = upcase[u]; +} + +void ntfs_file_upcase_value(FILE_NAME_ATTR *file_name_attr, + const uchar_t *upcase, const u32 upcase_len) +{ + ntfs_upcase_name((uchar_t*)&file_name_attr->file_name, + file_name_attr->file_name_length, upcase, upcase_len); +} + +int ntfs_file_compare_values(FILE_NAME_ATTR *file_name_attr1, + FILE_NAME_ATTR *file_name_attr2, + const int err_val, const IGNORE_CASE_BOOL ic, + const uchar_t *upcase, const u32 upcase_len) +{ + return ntfs_collate_names((uchar_t*)&file_name_attr1->file_name, + file_name_attr1->file_name_length, + (uchar_t*)&file_name_attr2->file_name, + file_name_attr2->file_name_length, + err_val, ic, upcase, upcase_len); +} + +/** + * ntfs_nlstoucs - convert NLS string to little endian Unicode string + * @vol: ntfs volume which we are working with + * @ins: input NLS string buffer + * @ins_len: length of input string in bytes + * @outs: on return contains the allocated output Unicode string buffer + * + * Convert the input string @ins, which is in whatever format the loaded NLS + * map dictates, into a little endian, 2-byte Unicode string. + * + * This function allocates the string and the caller is responsible for + * calling kmem_cache_free(ntfs_name_cache, @outs); when finished with it. + * + * On success the function returns the number of Unicode characters written to + * the output string *@outs (>= 0), not counting the terminating Unicode NULL + * character. *@outs is set to the allocated output string buffer. + * + * On error, a negative number corresponding to the error code is returned. In + * that case the output string is not allocated. Both *@outs and *@outs_len + * are then undefined. + * + * This might look a bit odd due to fast path optimization... + */ +int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins, + const int ins_len, uchar_t **outs) +{ + struct nls_table *nls = vol->nls_map; + uchar_t *ucs; + wchar_t wc; + int i, o, wc_len; + + /* We don't trust outside sources. */ + if (ins) { + ucs = (uchar_t*)kmem_cache_alloc(ntfs_name_cache, SLAB_NOFS); + if (ucs) { + for (i = o = 0; i < ins_len; i += wc_len) { + wc_len = nls->char2uni(ins + i, ins_len - i, + &wc); + if (wc_len >= 0) { + if (wc) { + ucs[o++] = cpu_to_le16(wc); + continue; + } /* else (!wc) */ + break; + } /* else (wc_len < 0) */ + goto conversion_err; + } + ucs[o] = cpu_to_le16('\0'); + *outs = ucs; + return o; + } /* else (!ucs) */ + ntfs_error(vol->sb, "Failed to allocate name from " + "ntfs_name_cache!"); + return -ENOMEM; + } /* else (!ins) */ + ntfs_error(NULL, "Received NULL pointer."); + return -EINVAL; +conversion_err: + ntfs_error(vol->sb, "Name using character set %s contains characters " + "that cannot be converted to Unicode.", nls->charset); + kmem_cache_free(ntfs_name_cache, ucs); + return -EILSEQ; +} + +/** + * ntfs_ucstonls - convert little endian Unicode string to NLS string + * @vol: ntfs volume which we are working with + * @ins: input Unicode string buffer + * @ins_len: length of input string in Unicode characters + * @outs: on return contains the (allocated) output NLS string buffer + * @outs_len: length of output string buffer in bytes + * + * Convert the input little endian, 2-byte Unicode string @ins, of length + * @ins_len into the string format dictated by the loaded NLS. + * + * If @outs is NULL, this function allocates the string and the caller is + * responsible for calling kfree(@outs); when finished with it. + * + * On success the function returns the number of bytes written to the output + * string *@outs (>= 0), not counting the terminating NULL byte. If the output + * string buffer was allocated, *@outs is set to it. + * + * On error, a negative number corresponding to the error code is returned. In + * that case the output string is not allocated. The contents of *@outs are + * then undefined. + * + * This might look a bit odd due to fast path optimization... + */ +int ntfs_ucstonls(const ntfs_volume *vol, const uchar_t *ins, + const int ins_len, unsigned char **outs, int outs_len) +{ + struct nls_table *nls = vol->nls_map; + unsigned char *ns; + int i, o, ns_len, wc; + + /* We don't trust outside sources. */ + if (ins) { + ns = *outs; + ns_len = outs_len; + if (ns && !ns_len) { + wc = -ENAMETOOLONG; + goto conversion_err; + } + if (!ns) { + ns_len = ins_len * NLS_MAX_CHARSET_SIZE; + ns = (unsigned char*)kmalloc(ns_len + 1, GFP_NOFS); + if (!ns) + goto mem_err_out; + } + for (i = o = 0; i < ins_len; i++) { +retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o, + ns_len - o); + if (wc > 0) { + o += wc; + continue; + } else if (!wc) + break; + else if (wc == -ENAMETOOLONG && ns != *outs) { + unsigned char *tc; + /* Grow in multiples of 64 bytes. */ + tc = (unsigned char*)kmalloc((ns_len + 64) & + ~63, GFP_NOFS); + if (tc) { + memcpy(tc, ns, ns_len); + ns_len = ((ns_len + 64) & ~63) - 1; + kfree(ns); + ns = tc; + goto retry; + } /* No memory so goto conversion_error; */ + } /* wc < 0, real error. */ + goto conversion_err; + } + ns[o] = '\0'; + *outs = ns; + return o; + } /* else (!ins) */ + ntfs_error(vol->sb, "Received NULL pointer."); + return -EINVAL; +conversion_err: + ntfs_error(vol->sb, "Unicode name contains characters that cannot be " + "converted to character set %s.", nls->charset); + if (ns != *outs) + kfree(ns); + if (wc != -ENAMETOOLONG) + wc = -EILSEQ; + return wc; +mem_err_out: + ntfs_error(vol->sb, "Failed to allocate name!"); + return -ENOMEM; +} + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/upcase.c b/reactos/drivers/fs/ntfs/linux-ntfs/upcase.c new file mode 100644 index 00000000000..44789837eed --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/upcase.c @@ -0,0 +1,90 @@ +/* + * upcase.c - Generate the full NTFS Unicode upcase table in little endian. + * Part of the Linux-NTFS project. + * + * Copyright (c) 2001 Richard Russon + * Copyright (c) 2001-2003 Anton Altaparmakov + * + * Modified for mkntfs inclusion 9 June 2001 by Anton Altaparmakov. + * Modified for kernel inclusion 10 September 2001 by Anton Altparmakov. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS source + * in the file COPYING); if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "ntfs.h" + +uchar_t *generate_default_upcase(void) +{ + static const int uc_run_table[][3] = { /* Start, End, Add */ + {0x0061, 0x007B, -32}, {0x0451, 0x045D, -80}, {0x1F70, 0x1F72, 74}, + {0x00E0, 0x00F7, -32}, {0x045E, 0x0460, -80}, {0x1F72, 0x1F76, 86}, + {0x00F8, 0x00FF, -32}, {0x0561, 0x0587, -48}, {0x1F76, 0x1F78, 100}, + {0x0256, 0x0258, -205}, {0x1F00, 0x1F08, 8}, {0x1F78, 0x1F7A, 128}, + {0x028A, 0x028C, -217}, {0x1F10, 0x1F16, 8}, {0x1F7A, 0x1F7C, 112}, + {0x03AC, 0x03AD, -38}, {0x1F20, 0x1F28, 8}, {0x1F7C, 0x1F7E, 126}, + {0x03AD, 0x03B0, -37}, {0x1F30, 0x1F38, 8}, {0x1FB0, 0x1FB2, 8}, + {0x03B1, 0x03C2, -32}, {0x1F40, 0x1F46, 8}, {0x1FD0, 0x1FD2, 8}, + {0x03C2, 0x03C3, -31}, {0x1F51, 0x1F52, 8}, {0x1FE0, 0x1FE2, 8}, + {0x03C3, 0x03CC, -32}, {0x1F53, 0x1F54, 8}, {0x1FE5, 0x1FE6, 7}, + {0x03CC, 0x03CD, -64}, {0x1F55, 0x1F56, 8}, {0x2170, 0x2180, -16}, + {0x03CD, 0x03CF, -63}, {0x1F57, 0x1F58, 8}, {0x24D0, 0x24EA, -26}, + {0x0430, 0x0450, -32}, {0x1F60, 0x1F68, 8}, {0xFF41, 0xFF5B, -32}, + {0} + }; + + static const int uc_dup_table[][2] = { /* Start, End */ + {0x0100, 0x012F}, {0x01A0, 0x01A6}, {0x03E2, 0x03EF}, {0x04CB, 0x04CC}, + {0x0132, 0x0137}, {0x01B3, 0x01B7}, {0x0460, 0x0481}, {0x04D0, 0x04EB}, + {0x0139, 0x0149}, {0x01CD, 0x01DD}, {0x0490, 0x04BF}, {0x04EE, 0x04F5}, + {0x014A, 0x0178}, {0x01DE, 0x01EF}, {0x04BF, 0x04BF}, {0x04F8, 0x04F9}, + {0x0179, 0x017E}, {0x01F4, 0x01F5}, {0x04C1, 0x04C4}, {0x1E00, 0x1E95}, + {0x018B, 0x018B}, {0x01FA, 0x0218}, {0x04C7, 0x04C8}, {0x1EA0, 0x1EF9}, + {0} + }; + + static const int uc_word_table[][2] = { /* Offset, Value */ + {0x00FF, 0x0178}, {0x01AD, 0x01AC}, {0x01F3, 0x01F1}, {0x0269, 0x0196}, + {0x0183, 0x0182}, {0x01B0, 0x01AF}, {0x0253, 0x0181}, {0x026F, 0x019C}, + {0x0185, 0x0184}, {0x01B9, 0x01B8}, {0x0254, 0x0186}, {0x0272, 0x019D}, + {0x0188, 0x0187}, {0x01BD, 0x01BC}, {0x0259, 0x018F}, {0x0275, 0x019F}, + {0x018C, 0x018B}, {0x01C6, 0x01C4}, {0x025B, 0x0190}, {0x0283, 0x01A9}, + {0x0192, 0x0191}, {0x01C9, 0x01C7}, {0x0260, 0x0193}, {0x0288, 0x01AE}, + {0x0199, 0x0198}, {0x01CC, 0x01CA}, {0x0263, 0x0194}, {0x0292, 0x01B7}, + {0x01A8, 0x01A7}, {0x01DD, 0x018E}, {0x0268, 0x0197}, + {0} + }; + + int i, r; + uchar_t *uc; + + uc = ntfs_malloc_nofs(default_upcase_len * sizeof(uchar_t)); + if (!uc) + return uc; + memset(uc, 0, default_upcase_len * sizeof(uchar_t)); + for (i = 0; i < default_upcase_len; i++) + uc[i] = cpu_to_le16(i); + for (r = 0; uc_run_table[r][0]; r++) + for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++) + uc[i] = cpu_to_le16((le16_to_cpu(uc[i]) + + uc_run_table[r][2])); + for (r = 0; uc_dup_table[r][0]; r++) + for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2) + uc[i + 1] = cpu_to_le16(le16_to_cpu(uc[i + 1]) - 1); + for (r = 0; uc_word_table[r][0]; r++) + uc[uc_word_table[r][0]] = cpu_to_le16(uc_word_table[r][1]); + return uc; +} + diff --git a/reactos/drivers/fs/ntfs/linux-ntfs/volume.h b/reactos/drivers/fs/ntfs/linux-ntfs/volume.h new file mode 100644 index 00000000000..0bfea2ab2c8 --- /dev/null +++ b/reactos/drivers/fs/ntfs/linux-ntfs/volume.h @@ -0,0 +1,136 @@ +/* + * volume.h - Defines for volume structures in NTFS Linux kernel driver. Part + * of the Linux-NTFS project. + * + * Copyright (c) 2001,2002 Anton Altaparmakov. + * Copyright (c) 2002 Richard Russon. + * + * This program/include file is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as published + * by the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program/include file is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS + * distribution in the file COPYING); if not, write to the Free Software + * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _LINUX_NTFS_VOLUME_H +#define _LINUX_NTFS_VOLUME_H + +#include "types.h" + +/* + * The NTFS in memory super block structure. + */ +typedef struct { + /* + * FIXME: Reorder to have commonly used together element within the + * same cache line, aiming at a cache line size of 32 bytes. Aim for + * 64 bytes for less commonly used together elements. Put most commonly + * used elements to front of structure. Obviously do this only when the + * structure has stabilized... (AIA) + */ + /* Device specifics. */ + struct super_block *sb; /* Pointer back to the super_block, + so we don't have to get the offset + every time. */ + LCN nr_blocks; /* Number of NTFS_BLOCK_SIZE bytes + sized blocks on the device. */ + /* Configuration provided by user at mount time. */ + unsigned long flags; /* Miscellaneous flags, see above. */ + uid_t uid; /* uid that files will be mounted as. */ + gid_t gid; /* gid that files will be mounted as. */ + mode_t fmask; /* The mask for file permissions. */ + mode_t dmask; /* The mask for directory + permissions. */ + u8 mft_zone_multiplier; /* Initial mft zone multiplier. */ + u8 on_errors; /* What to do on file system errors. */ + /* NTFS bootsector provided information. */ + u16 sector_size; /* in bytes */ + u8 sector_size_bits; /* log2(sector_size) */ + u32 cluster_size; /* in bytes */ + u32 cluster_size_mask; /* cluster_size - 1 */ + u8 cluster_size_bits; /* log2(cluster_size) */ + u32 mft_record_size; /* in bytes */ + u32 mft_record_size_mask; /* mft_record_size - 1 */ + u8 mft_record_size_bits; /* log2(mft_record_size) */ + u32 index_record_size; /* in bytes */ + u32 index_record_size_mask; /* index_record_size - 1 */ + u8 index_record_size_bits; /* log2(index_record_size) */ + LCN nr_clusters; /* Volume size in clusters == number of + bits in lcn bitmap. */ + LCN mft_lcn; /* Cluster location of mft data. */ + LCN mftmirr_lcn; /* Cluster location of copy of mft. */ + u64 serial_no; /* The volume serial number. */ + /* Mount specific NTFS information. */ + u32 upcase_len; /* Number of entries in upcase[]. */ + uchar_t *upcase; /* The upcase table. */ + LCN mft_zone_start; /* First cluster of the mft zone. */ + LCN mft_zone_end; /* First cluster beyond the mft zone. */ + struct inode *mft_ino; /* The VFS inode of $MFT. */ + + struct inode *mftbmp_ino; /* Attribute inode for $MFT/$BITMAP. */ + struct rw_semaphore mftbmp_lock; /* Lock for serializing accesses to the + mft record bitmap ($MFT/$BITMAP). */ + unsigned long nr_mft_records; /* Number of mft records == number of + bits in mft bitmap. */ + + struct inode *mftmirr_ino; /* The VFS inode of $MFTMirr. */ + struct inode *lcnbmp_ino; /* The VFS inode of $Bitmap. */ + struct rw_semaphore lcnbmp_lock; /* Lock for serializing accesses to the + cluster bitmap ($Bitmap/$DATA). */ + struct inode *vol_ino; /* The VFS inode of $Volume. */ + unsigned long vol_flags; /* Volume flags (VOLUME_*). */ + u8 major_ver; /* Ntfs major version of volume. */ + u8 minor_ver; /* Ntfs minor version of volume. */ + struct inode *root_ino; /* The VFS inode of the root + directory. */ + struct inode *secure_ino; /* The VFS inode of $Secure (NTFS3.0+ + only, otherwise NULL). */ + struct nls_table *nls_map; +} ntfs_volume; + +/* + * Defined bits for the flags field in the ntfs_volume structure. + */ +typedef enum { + NV_Errors, /* 1: Volume has errors, prevent remount rw. */ + NV_ShowSystemFiles, /* 1: Return system files in ntfs_readdir(). */ + NV_CaseSensitive, /* 1: Treat file names as case sensitive and + create filenames in the POSIX namespace. + Otherwise be case insensitive and create + file names in WIN32 namespace. */ +} ntfs_volume_flags; + +/* + * Macro tricks to expand the NVolFoo(), NVolSetFoo(), and NVolClearFoo() + * functions. + */ +#define NVOL_FNS(flag) \ +static inline int NVol##flag(ntfs_volume *vol) \ +{ \ + return test_bit(NV_##flag, &(vol)->flags); \ +} \ +static inline void NVolSet##flag(ntfs_volume *vol) \ +{ \ + set_bit(NV_##flag, &(vol)->flags); \ +} \ +static inline void NVolClear##flag(ntfs_volume *vol) \ +{ \ + clear_bit(NV_##flag, &(vol)->flags); \ +} + +/* Emit the ntfs volume bitops functions. */ +NVOL_FNS(Errors) +NVOL_FNS(ShowSystemFiles) +NVOL_FNS(CaseSensitive) + +#endif /* _LINUX_NTFS_VOLUME_H */ +