X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fdentry.c;h=49c0cdf14784337e49b289b298eb4bff198ee251;hp=2e5090f2509ba24fd43697bb726fae80fb8ddbbf;hb=ee547cc83f231d727e4d9984c23e86d96d3da769;hpb=eaf5b4c85a3b7b853317b887867c18a4865a83e2 diff --git a/src/dentry.c b/src/dentry.c index 2e5090f2..49c0cdf1 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -1,1929 +1,1919 @@ /* - * dentry.c + * dentry.c - see description below + */ + +/* + * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers * - * In the WIM file format, the dentries are stored in the "metadata resource" - * section right after the security data. Each image in the WIM file has its - * own metadata resource with its own security data and dentry tree. Dentries - * in different images may share file resources by referring to the same lookup - * table entries. + * This file is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) any + * later version. + * + * This file is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more + * details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this file; if not, see http://www.gnu.org/licenses/. */ /* - * Copyright (C) 2012, 2013 Eric Biggers + * This file contains logic to deal with WIM directory entries, or "dentries": + * + * - Reading a dentry tree from a metadata resource in a WIM file + * - Writing a dentry tree to a metadata resource in a WIM file + * - Iterating through a tree of WIM dentries + * - Path lookup: translating a path into a WIM dentry or inode + * - Creating, modifying, and deleting WIM dentries * - * This file is part of wimlib, a library for working with WIM files. + * Notes: * - * wimlib is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the Free Software - * Foundation; either version 3 of the License, or (at your option) any later - * version. + * - A WIM file can contain multiple images, each of which has an independent + * tree of dentries. "On disk", the dentry tree for an image is stored in + * the "metadata resource" for that image. * - * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR - * A PARTICULAR PURPOSE. See the GNU General Public License for more details. + * - Multiple dentries in an image may correspond to the same inode, or "file". + * When this occurs, it means that the file has multiple names, or "hard + * links". A dentry is not a file, but rather the name of a file! * - * You should have received a copy of the GNU General Public License along with - * wimlib; if not, see http://www.gnu.org/licenses/. + * - Inodes are not represented explicitly in the WIM file format. Instead, + * the metadata resource provides a "hard link group ID" for each dentry. + * wimlib handles pulling out actual inodes from this information, but this + * occurs in inode_fixup.c and not in this file. + * + * - wimlib does not allow *directory* hard links, so a WIM image really does + * have a *tree* of dentries (and not an arbitrary graph of dentries). + * + * - wimlib indexes dentries both case-insensitively and case-sensitively, + * allowing either behavior to be used for path lookup. + * + * - Multiple dentries in a directory might have the same case-insensitive + * name. But wimlib enforces that at most one dentry in a directory can have + * a given case-sensitive name. */ -#include "buffer_io.h" -#include "dentry.h" -#include "lookup_table.h" -#include "timestamp.h" -#include "wimlib_internal.h" +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + #include -/* Calculates the unaligned length, in bytes, of an on-disk WIM dentry that has - * a file name and short name that take the specified numbers of bytes. This - * excludes any alternate data stream entries that may follow the dentry. */ -static u64 -__dentry_correct_length_unaligned(u16 file_name_nbytes, u16 short_name_nbytes) -{ - u64 length = WIM_DENTRY_DISK_SIZE; - if (file_name_nbytes) - length += file_name_nbytes + 2; - if (short_name_nbytes) - length += short_name_nbytes + 2; - return length; -} +#include "wimlib/assert.h" +#include "wimlib/dentry.h" +#include "wimlib/inode.h" +#include "wimlib/encoding.h" +#include "wimlib/endianness.h" +#include "wimlib/metadata.h" +#include "wimlib/paths.h" + +/* On-disk format of a WIM dentry (directory entry), located in the metadata + * resource for a WIM image. */ +struct wim_dentry_on_disk { + + /* Length of this directory entry in bytes, not including any extra + * stream entries. Should be a multiple of 8 so that the following + * dentry or extra stream entry is aligned on an 8-byte boundary. (If + * not, wimlib will round it up.) It must be at least as long as the + * fixed-length fields of the dentry (WIM_DENTRY_DISK_SIZE), plus the + * lengths of the file name and/or short name if present, plus the size + * of any "extra" data. + * + * It is also possible for this field to be 0. This case indicates the + * end of a list of sibling entries in a directory. It also means the + * real length is 8, because the dentry included only the length field, + * but that takes up 8 bytes. */ + le64 length; + + /* File attributes for the file or directory. This is a bitwise OR of + * the FILE_ATTRIBUTE_* constants and should correspond to the value + * retrieved by GetFileAttributes() on Windows. */ + le32 attributes; + + /* A value that specifies the security descriptor for this file or + * directory. If -1, the file or directory has no security descriptor. + * Otherwise, it is a 0-based index into the WIM image's table of + * security descriptors (see: `struct wim_security_data') */ + sle32 security_id; + + /* Offset, in bytes, from the start of the uncompressed metadata + * resource of this directory's child directory entries, or 0 if this + * directory entry does not correspond to a directory or otherwise does + * not have any children. */ + le64 subdir_offset; + + /* Reserved fields */ + le64 unused_1; + le64 unused_2; + + /* Creation time, last access time, and last write time, in + * 100-nanosecond intervals since 12:00 a.m UTC January 1, 1601. They + * should correspond to the times gotten by calling GetFileTime() on + * Windows. */ + le64 creation_time; + le64 last_access_time; + le64 last_write_time; -/* Calculates the unaligned length, in bytes, of an on-disk WIM dentry, based on - * the file name length and short name length. Note that dentry->length is - * ignored; also, this excludes any alternate data stream entries that may - * follow the dentry. */ -static u64 -dentry_correct_length_unaligned(const struct wim_dentry *dentry) -{ - return __dentry_correct_length_unaligned(dentry->file_name_nbytes, - dentry->short_name_nbytes); -} + /* + * Usually this is the SHA-1 message digest of the file's "contents" + * (the unnamed data stream). + * + * If the file has FILE_ATTRIBUTE_REPARSE_POINT set, then this is + * instead usually the SHA-1 message digest of the uncompressed reparse + * point data. + * + * However, there are some special rules that need to be applied to + * interpret this field correctly when extra stream entries are present. + * See the code for details. + */ + u8 default_hash[SHA1_HASH_SIZE]; -/* Return the "correct" value to write in the length field of a WIM dentry, - * based on the file name length and short name length. */ -static u64 -dentry_correct_length(const struct wim_dentry *dentry) -{ - return (dentry_correct_length_unaligned(dentry) + 7) & ~7; -} + /* Unknown field (maybe accidental padding) */ + le32 unknown_0x54; -/* Return %true iff the alternate data stream entry @entry has the UTF-16LE - * stream name @name that has length @name_nbytes bytes. */ -static inline bool -ads_entry_has_name(const struct wim_ads_entry *entry, - const utf16lechar *name, size_t name_nbytes) + /* + * The following 8-byte union contains either information about the + * reparse point (for files with FILE_ATTRIBUTE_REPARSE_POINT set), or + * the "hard link group ID" (for other files). + * + * The reparse point information contains ReparseTag and ReparseReserved + * from the header of the reparse point buffer. It also contains a flag + * that indicates whether a reparse point fixup (for the target of an + * absolute symbolic link or junction) was done or not. + * + * The "hard link group ID" is like an inode number; all dentries for + * the same inode share the same value. See inode_fixup.c for more + * information. + * + * Note that this union creates the limitation that reparse point files + * cannot have multiple names (hard links). + */ + union { + struct { + le32 reparse_tag; + le16 rp_reserved; + le16 rp_flags; + } _packed_attribute reparse; + struct { + le64 hard_link_group_id; + } _packed_attribute nonreparse; + }; + + /* Number of extra stream entries that directly follow this dentry + * on-disk. */ + le16 num_extra_streams; + + /* If nonzero, this is the length, in bytes, of this dentry's UTF-16LE + * encoded short name (8.3 DOS-compatible name), excluding the null + * terminator. If zero, then the long name of this dentry does not have + * a corresponding short name (but this does not exclude the possibility + * that another dentry for the same file has a short name). */ + le16 short_name_nbytes; + + /* If nonzero, this is the length, in bytes, of this dentry's UTF-16LE + * encoded "long" name, excluding the null terminator. If zero, then + * this file has no long name. The root dentry should not have a long + * name, but all other dentries in the image should have long names. */ + le16 name_nbytes; + + /* Beginning of optional, variable-length fields */ + + /* If name_nbytes != 0, the next field will be the UTF-16LE encoded long + * name. This will be null-terminated, so the size of this field will + * really be name_nbytes + 2. */ + /*utf16lechar name[];*/ + + /* If short_name_nbytes != 0, the next field will be the UTF-16LE + * encoded short name. This will be null-terminated, so the size of + * this field will really be short_name_nbytes + 2. */ + /*utf16lechar short_name[];*/ + + /* If there is still space in the dentry (according to the 'length' + * field) after 8-byte alignment, then the remaining space will be a + * variable-length list of tagged metadata items. See tagged_items.c + * for more information. */ + /* u8 tagged_items[] _aligned_attribute(8); */ + +} _packed_attribute; + /* If num_extra_streams != 0, then there are that many extra stream + * entries following the dentry, starting on the next 8-byte aligned + * boundary. They are not counted in the 'length' field of the dentry. + */ + +/* On-disk format of an extra stream entry. This represents an extra NTFS-style + * "stream" associated with the file, such as a named data stream. */ +struct wim_extra_stream_entry_on_disk { + + /* Length of this extra stream entry, in bytes. This includes all + * fixed-length fields, plus the name and null terminator if present, + * and any needed padding such that the length is a multiple of 8. */ + le64 length; + + /* Reserved field */ + le64 reserved; + + /* SHA-1 message digest of this stream's uncompressed data, or all + * zeroes if this stream's data is of zero length. */ + u8 hash[SHA1_HASH_SIZE]; + + /* Length of this stream's name, in bytes and excluding the null + * terminator; or 0 if this stream is unnamed. */ + le16 name_nbytes; + + /* Stream name in UTF-16LE. It is @name_nbytes bytes long, excluding + * the null terminator. There is a null terminator character if + * @name_nbytes != 0; i.e., if this stream is named. */ + utf16lechar name[]; +} _packed_attribute; + +static void +do_dentry_set_name(struct wim_dentry *dentry, utf16lechar *name, + size_t name_nbytes) { - return entry->stream_name_nbytes == name_nbytes && - memcmp(entry->stream_name, name, name_nbytes) == 0; + FREE(dentry->d_name); + dentry->d_name = name; + dentry->d_name_nbytes = name_nbytes; + + if (dentry_has_short_name(dentry)) { + FREE(dentry->d_short_name); + dentry->d_short_name = NULL; + dentry->d_short_name_nbytes = 0; + } } -/* Duplicates a string of system-dependent encoding into a UTF-16LE string and - * returns the string and its length, in bytes, in the pointer arguments. Frees - * any existing string at the return location before overwriting it. */ -static int -get_utf16le_name(const tchar *name, utf16lechar **name_utf16le_ret, - u16 *name_utf16le_nbytes_ret) +/* + * Set the name of a WIM dentry from a UTF-16LE string. + * + * This sets the long name of the dentry. The short name will automatically be + * removed, since it may not be appropriate for the new long name. + * + * The @name string need not be null-terminated, since its length is specified + * in @name_nbytes. + * + * If @name_nbytes is 0, both the long and short names of the dentry will be + * removed. + * + * Only use this function on unlinked dentries, since it doesn't update the name + * indices. For dentries that are currently linked into the tree, use + * rename_wim_path(). + * + * Returns 0 or WIMLIB_ERR_NOMEM. + */ +int +dentry_set_name_utf16le(struct wim_dentry *dentry, const utf16lechar *name, + size_t name_nbytes) { - utf16lechar *name_utf16le; - size_t name_utf16le_nbytes; - int ret; -#if TCHAR_IS_UTF16LE - name_utf16le_nbytes = tstrlen(name) * sizeof(utf16lechar); - name_utf16le = MALLOC(name_utf16le_nbytes + sizeof(utf16lechar)); - if (!name_utf16le) - return WIMLIB_ERR_NOMEM; - memcpy(name_utf16le, name, name_utf16le_nbytes + sizeof(utf16lechar)); - ret = 0; -#else + utf16lechar *dup = NULL; - ret = tstr_to_utf16le(name, tstrlen(name), &name_utf16le, - &name_utf16le_nbytes); - if (ret == 0) { - if (name_utf16le_nbytes > 0xffff) { - FREE(name_utf16le); - ERROR("Multibyte string \"%"TS"\" is too long!", name); - ret = WIMLIB_ERR_INVALID_UTF8_STRING; - } - } -#endif - if (ret == 0) { - FREE(*name_utf16le_ret); - *name_utf16le_ret = name_utf16le; - *name_utf16le_nbytes_ret = name_utf16le_nbytes; + if (name_nbytes) { + dup = utf16le_dupz(name, name_nbytes); + if (!dup) + return WIMLIB_ERR_NOMEM; } - return ret; + do_dentry_set_name(dentry, dup, name_nbytes); + return 0; } -/* Sets the name of a WIM dentry from a multibyte string. */ + +/* + * Set the name of a WIM dentry from a 'tchar' string. + * + * This sets the long name of the dentry. The short name will automatically be + * removed, since it may not be appropriate for the new long name. + * + * If @name is NULL or empty, both the long and short names of the dentry will + * be removed. + * + * Only use this function on unlinked dentries, since it doesn't update the name + * indices. For dentries that are currently linked into the tree, use + * rename_wim_path(). + * + * Returns 0 or an error code resulting from a failed string conversion. + */ int -set_dentry_name(struct wim_dentry *dentry, const tchar *new_name) +dentry_set_name(struct wim_dentry *dentry, const tchar *name) { + utf16lechar *name_utf16le = NULL; + size_t name_utf16le_nbytes = 0; int ret; - ret = get_utf16le_name(new_name, &dentry->file_name, - &dentry->file_name_nbytes); - if (ret == 0) { - /* Clear the short name and recalculate the dentry length */ - if (dentry_has_short_name(dentry)) { - FREE(dentry->short_name); - dentry->short_name = NULL; - dentry->short_name_nbytes = 0; - } - dentry->length = dentry_correct_length(dentry); + + if (name && *name) { + ret = tstr_to_utf16le(name, tstrlen(name) * sizeof(tchar), + &name_utf16le, &name_utf16le_nbytes); + if (ret) + return ret; } - return ret; + + do_dentry_set_name(dentry, name_utf16le, name_utf16le_nbytes); + return 0; } -/* Returns the total length of a WIM alternate data stream entry on-disk, - * including the stream name, the null terminator, AND the padding after the - * entry to align the next ADS entry or dentry on an 8-byte boundary. */ -static u64 -ads_entry_total_length(const struct wim_ads_entry *entry) +/* Calculate the minimum unaligned length, in bytes, of an on-disk WIM dentry + * that has names of the specified lengths. (Zero length means the + * corresponding name actually does not exist.) The returned value excludes + * tagged metadata items as well as any extra stream entries that may need to + * follow the dentry. */ +static size_t +dentry_min_len_with_names(u16 name_nbytes, u16 short_name_nbytes) { - u64 len = WIM_ADS_ENTRY_DISK_SIZE; - if (entry->stream_name_nbytes) - len += entry->stream_name_nbytes + 2; - return (len + 7) & ~7; + size_t length = sizeof(struct wim_dentry_on_disk); + if (name_nbytes) + length += (u32)name_nbytes + 2; + if (short_name_nbytes) + length += (u32)short_name_nbytes + 2; + return length; } -static u64 -__dentry_total_length(const struct wim_dentry *dentry, u64 length) +/* Return the length, in bytes, required for the specified stream on-disk, when + * represented as an extra stream entry. */ +static size_t +stream_out_total_length(const struct wim_inode_stream *strm) { - const struct wim_inode *inode = dentry->d_inode; - for (u16 i = 0; i < inode->i_num_ads; i++) - length += ads_entry_total_length(&inode->i_ads_entries[i]); - return (length + 7) & ~7; -} + /* Account for the fixed length portion */ + size_t len = sizeof(struct wim_extra_stream_entry_on_disk); -/* Calculate the aligned *total* length of an on-disk WIM dentry. This includes - * all alternate data streams. */ -u64 -dentry_correct_total_length(const struct wim_dentry *dentry) -{ - return __dentry_total_length(dentry, - dentry_correct_length_unaligned(dentry)); -} + /* For named streams, account for the variable-length name. */ + if (stream_is_named(strm)) + len += utf16le_len_bytes(strm->stream_name) + 2; -/* Like dentry_correct_total_length(), but use the existing dentry->length field - * instead of calculating its "correct" value. */ -static u64 -dentry_total_length(const struct wim_dentry *dentry) -{ - return __dentry_total_length(dentry, dentry->length); + /* Account for any necessary padding to the next 8-byte boundary. */ + return ALIGN(len, 8); } -int -for_dentry_in_rbtree(struct rb_node *root, - int (*visitor)(struct wim_dentry *, void *), - void *arg) +/* + * Calculate the total number of bytes that will be consumed when a dentry is + * written. This includes the fixed-length portion of the dentry, the name + * fields, any tagged metadata items, and any extra stream entries. This also + * includes all alignment bytes. + */ +size_t +dentry_out_total_length(const struct wim_dentry *dentry) { - int ret; - struct rb_node *node = root; - LIST_HEAD(stack); - while (1) { - if (node) { - list_add(&rbnode_dentry(node)->tmp_list, &stack); - node = node->rb_left; - } else { - struct list_head *next; - struct wim_dentry *dentry; - - next = stack.next; - if (next == &stack) - return 0; - dentry = container_of(next, struct wim_dentry, tmp_list); - list_del(next); - ret = visitor(dentry, arg); - if (ret != 0) - return ret; - node = dentry->rb_node.rb_right; + const struct wim_inode *inode = dentry->d_inode; + size_t len; + + len = dentry_min_len_with_names(dentry->d_name_nbytes, + dentry->d_short_name_nbytes); + len = ALIGN(len, 8); + + len += ALIGN(inode->i_extra_size, 8); + + if (!(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) { + /* + * Extra stream entries: + * + * - Use one extra stream entry for each named data stream + * - Use one extra stream entry for the unnamed data stream when there is either: + * - a reparse point stream + * - at least one named data stream (for Windows PE bug workaround) + * - Use one extra stream entry for the reparse point stream if there is one + */ + bool have_named_data_stream = false; + bool have_reparse_point_stream = false; + for (unsigned i = 0; i < inode->i_num_streams; i++) { + const struct wim_inode_stream *strm = &inode->i_streams[i]; + if (stream_is_named_data_stream(strm)) { + len += stream_out_total_length(strm); + have_named_data_stream = true; + } else if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) { + wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT); + have_reparse_point_stream = true; + } + } + + if (have_named_data_stream || have_reparse_point_stream) { + if (have_reparse_point_stream) + len += ALIGN(sizeof(struct wim_extra_stream_entry_on_disk), 8); + len += ALIGN(sizeof(struct wim_extra_stream_entry_on_disk), 8); } } + + return len; } +/* Internal version of for_dentry_in_tree() that omits the NULL check */ static int -for_dentry_tree_in_rbtree_depth(struct rb_node *node, - int (*visitor)(struct wim_dentry*, void*), - void *arg) +do_for_dentry_in_tree(struct wim_dentry *dentry, + int (*visitor)(struct wim_dentry *, void *), void *arg) { int ret; - if (node) { - ret = for_dentry_tree_in_rbtree_depth(node->rb_left, - visitor, arg); - if (ret != 0) - return ret; - ret = for_dentry_tree_in_rbtree_depth(node->rb_right, - visitor, arg); - if (ret != 0) - return ret; - ret = for_dentry_in_tree_depth(rbnode_dentry(node), visitor, arg); - if (ret != 0) + struct wim_dentry *child; + + ret = (*visitor)(dentry, arg); + if (unlikely(ret)) + return ret; + + for_dentry_child(child, dentry) { + ret = do_for_dentry_in_tree(child, visitor, arg); + if (unlikely(ret)) return ret; } return 0; } +/* Internal version of for_dentry_in_tree_depth() that omits the NULL check */ static int -for_dentry_tree_in_rbtree(struct rb_node *node, - int (*visitor)(struct wim_dentry*, void*), - void *arg) +do_for_dentry_in_tree_depth(struct wim_dentry *dentry, + int (*visitor)(struct wim_dentry *, void *), void *arg) { int ret; - if (node) { - ret = for_dentry_tree_in_rbtree(node->rb_left, visitor, arg); - if (ret != 0) - return ret; - ret = for_dentry_in_tree(rbnode_dentry(node), visitor, arg); - if (ret != 0) - return ret; - ret = for_dentry_tree_in_rbtree(node->rb_right, visitor, arg); - if (ret != 0) + struct wim_dentry *child; + + for_dentry_child_postorder(child, dentry) { + ret = do_for_dentry_in_tree_depth(child, visitor, arg); + if (unlikely(ret)) return ret; } - return 0; + return unlikely((*visitor)(dentry, arg)); } -/* Calls a function on all directory entries in a WIM dentry tree. Logically, - * this is a pre-order traversal (the function is called on a parent dentry - * before its children), but sibling dentries will be visited in order as well. - * */ +/* + * Call a function on all dentries in a tree. + * + * @arg will be passed as the second argument to each invocation of @visitor. + * + * This function does a pre-order traversal --- that is, a parent will be + * visited before its children. It also will visit siblings in order of + * case-sensitive filename. Equivalently, this function visits the entire tree + * in the case-sensitive lexicographic order of the full paths. + * + * It is safe to pass NULL for @root, which means that the dentry tree is empty. + * In this case, this function does nothing. + * + * @visitor must not modify the structure of the dentry tree during the + * traversal. + * + * The return value will be 0 if all calls to @visitor returned 0. Otherwise, + * the return value will be the first nonzero value returned by @visitor. + */ int for_dentry_in_tree(struct wim_dentry *root, - int (*visitor)(struct wim_dentry*, void*), void *arg) + int (*visitor)(struct wim_dentry *, void *), void *arg) { - int ret = visitor(root, arg); - if (ret == 0) { - ret = for_dentry_tree_in_rbtree(root->d_inode->i_children.rb_node, - visitor, - arg); - } - return ret; + if (unlikely(!root)) + return 0; + return do_for_dentry_in_tree(root, visitor, arg); } -/* Like for_dentry_in_tree(), but the visitor function is always called on a - * dentry's children before on itself. */ -int +/* Like for_dentry_in_tree(), but do a depth-first traversal of the dentry tree. + * That is, the visitor function will be called on a dentry's children before + * itself. It will be safe to free a dentry when visiting it. */ +static int for_dentry_in_tree_depth(struct wim_dentry *root, - int (*visitor)(struct wim_dentry*, void*), void *arg) + int (*visitor)(struct wim_dentry *, void *), void *arg) { - int ret; - ret = for_dentry_tree_in_rbtree_depth(root->d_inode->i_children.rb_node, - visitor, arg); - if (ret == 0) - ret = visitor(root, arg); - return ret; + if (unlikely(!root)) + return 0; + return do_for_dentry_in_tree_depth(root, visitor, arg); } -/* Calculate the full path of @dentry. The full path of its parent must have - * already been calculated, or it must be the root dentry. */ -static int +/* + * Calculate the full path to @dentry within the WIM image, if not already done. + * + * The full name will be saved in the cached value 'dentry->d_full_path'. + * + * Whenever possible, use dentry_full_path() instead of calling this and + * accessing d_full_path directly. + * + * Returns 0 or an error code resulting from a failed string conversion. + */ +int calculate_dentry_full_path(struct wim_dentry *dentry) { - tchar *full_path; - u32 full_path_nbytes; - int ret; + size_t ulen; + size_t dummy; + const struct wim_dentry *d; - if (dentry->_full_path) + if (dentry->d_full_path) return 0; - if (dentry_is_root(dentry)) { - full_path = TSTRDUP(T("/")); - if (!full_path) - return WIMLIB_ERR_NOMEM; - full_path_nbytes = 1 * sizeof(tchar); - } else { - struct wim_dentry *parent; - tchar *parent_full_path; - u32 parent_full_path_nbytes; - size_t filename_nbytes; - - parent = dentry->parent; - if (dentry_is_root(parent)) { - parent_full_path = T(""); - parent_full_path_nbytes = 0; - } else { - if (!parent->_full_path) { - ret = calculate_dentry_full_path(parent); - if (ret) - return ret; - } - parent_full_path = parent->_full_path; - parent_full_path_nbytes = parent->full_path_nbytes; - } + ulen = 0; + d = dentry; + do { + ulen += d->d_name_nbytes / sizeof(utf16lechar); + ulen++; + d = d->d_parent; /* assumes d == d->d_parent for root */ + } while (!dentry_is_root(d)); - /* Append this dentry's name as a tchar string to the full path - * of the parent followed by the path separator */ - #if TCHAR_IS_UTF16LE - filename_nbytes = dentry->file_name_nbytes; - #else - { - int ret = utf16le_to_tstr_nbytes(dentry->file_name, - dentry->file_name_nbytes, - &filename_nbytes); - if (ret) - return ret; - } - #endif + utf16lechar ubuf[ulen]; + utf16lechar *p = &ubuf[ulen]; - full_path_nbytes = parent_full_path_nbytes + sizeof(tchar) + - filename_nbytes; - full_path = MALLOC(full_path_nbytes + sizeof(tchar)); - if (!full_path) - return WIMLIB_ERR_NOMEM; - memcpy(full_path, parent_full_path, parent_full_path_nbytes); - full_path[parent_full_path_nbytes / sizeof(tchar)] = T('/'); - #if TCHAR_IS_UTF16LE - memcpy(&full_path[parent_full_path_nbytes / sizeof(tchar) + 1], - dentry->file_name, - filename_nbytes + sizeof(tchar)); - #else - utf16le_to_tstr_buf(dentry->file_name, - dentry->file_name_nbytes, - &full_path[parent_full_path_nbytes / - sizeof(tchar) + 1]); - #endif - } - dentry->_full_path = full_path; - dentry->full_path_nbytes= full_path_nbytes; - return 0; -} + d = dentry; + do { + p -= d->d_name_nbytes / sizeof(utf16lechar); + memcpy(p, d->d_name, d->d_name_nbytes); + *--p = cpu_to_le16(WIM_PATH_SEPARATOR); + d = d->d_parent; /* assumes d == d->d_parent for root */ + } while (!dentry_is_root(d)); -static int -do_calculate_dentry_full_path(struct wim_dentry *dentry, void *_ignore) -{ - return calculate_dentry_full_path(dentry); -} + wimlib_assert(p == ubuf); -int -calculate_dentry_tree_full_paths(struct wim_dentry *root) -{ - return for_dentry_in_tree(root, do_calculate_dentry_full_path, NULL); + return utf16le_to_tstr(ubuf, ulen * sizeof(utf16lechar), + &dentry->d_full_path, &dummy); } +/* + * Return the full path to the @dentry within the WIM image, or NULL if the full + * path could not be determined due to a string conversion error. + * + * The returned memory will be cached in the dentry, so the caller is not + * responsible for freeing it. + */ tchar * dentry_full_path(struct wim_dentry *dentry) { calculate_dentry_full_path(dentry); - return dentry->_full_path; + return dentry->d_full_path; } static int -increment_subdir_offset(struct wim_dentry *dentry, void *subdir_offset_p) +dentry_calculate_subdir_offset(struct wim_dentry *dentry, void *_subdir_offset_p) { - *(u64*)subdir_offset_p += dentry_correct_total_length(dentry); - return 0; -} + if (dentry_is_directory(dentry)) { + u64 *subdir_offset_p = _subdir_offset_p; + struct wim_dentry *child; -static int -call_calculate_subdir_offsets(struct wim_dentry *dentry, void *subdir_offset_p) -{ - calculate_subdir_offsets(dentry, subdir_offset_p); + /* Set offset of directory's child dentries */ + dentry->d_subdir_offset = *subdir_offset_p; + + /* Account for child dentries */ + for_dentry_child(child, dentry) + *subdir_offset_p += dentry_out_total_length(child); + + /* Account for end-of-directory entry */ + *subdir_offset_p += 8; + } else { + /* Not a directory; set the subdir offset to 0 */ + dentry->d_subdir_offset = 0; + } return 0; } /* - * Recursively calculates the subdir offsets for a directory tree. + * Calculate the subdir offsets for a dentry tree, in preparation of writing + * that dentry tree to a metadata resource. + * + * The subdir offset of each dentry is the offset in the uncompressed metadata + * resource at which its child dentries begin, or 0 if that dentry has no + * children. * - * @dentry: The root of the directory tree. - * @subdir_offset_p: The current subdirectory offset; i.e., the subdirectory - * offset for @dentry. + * The caller must initialize *subdir_offset_p to the first subdir offset that + * is available to use after the root dentry is written. + * + * When this function returns, *subdir_offset_p will have been advanced past the + * size needed for the dentry tree within the uncompressed metadata resource. */ void -calculate_subdir_offsets(struct wim_dentry *dentry, u64 *subdir_offset_p) +calculate_subdir_offsets(struct wim_dentry *root, u64 *subdir_offset_p) { - struct rb_node *node; - - dentry->subdir_offset = *subdir_offset_p; - node = dentry->d_inode->i_children.rb_node; - if (node) { - /* Advance the subdir offset by the amount of space the children - * of this dentry take up. */ - for_dentry_in_rbtree(node, increment_subdir_offset, subdir_offset_p); + for_dentry_in_tree(root, dentry_calculate_subdir_offset, subdir_offset_p); +} - /* End-of-directory dentry on disk. */ - *subdir_offset_p += 8; +/* Compare the UTF-16LE long filenames of two dentries case insensitively. */ +static int +dentry_compare_names_case_insensitive(const struct wim_dentry *d1, + const struct wim_dentry *d2) +{ + return cmp_utf16le_strings(d1->d_name, + d1->d_name_nbytes / 2, + d2->d_name, + d2->d_name_nbytes / 2, + true); +} - /* Recursively call calculate_subdir_offsets() on all the - * children. */ - for_dentry_in_rbtree(node, call_calculate_subdir_offsets, subdir_offset_p); - } else { - /* On disk, childless directories have a valid subdir_offset - * that points to an 8-byte end-of-directory dentry. Regular - * files or reparse points have a subdir_offset of 0. */ - if (dentry_is_directory(dentry)) - *subdir_offset_p += 8; - else - dentry->subdir_offset = 0; - } +/* Compare the UTF-16LE long filenames of two dentries case sensitively. */ +static int +dentry_compare_names_case_sensitive(const struct wim_dentry *d1, + const struct wim_dentry *d2) +{ + return cmp_utf16le_strings(d1->d_name, + d1->d_name_nbytes / 2, + d2->d_name, + d2->d_name_nbytes / 2, + false); } static int -compare_utf16le_names(const utf16lechar *name1, size_t nbytes1, - const utf16lechar *name2, size_t nbytes2) +_avl_dentry_compare_names_ci(const struct avl_tree_node *n1, + const struct avl_tree_node *n2) { - int result = memcmp(name1, name2, min(nbytes1, nbytes2)); - if (result) - return result; - else - return (int)nbytes1 - (int)nbytes2; + const struct wim_dentry *d1, *d2; + + d1 = avl_tree_entry(n1, struct wim_dentry, d_index_node_ci); + d2 = avl_tree_entry(n2, struct wim_dentry, d_index_node_ci); + return dentry_compare_names_case_insensitive(d1, d2); } static int -dentry_compare_names(const struct wim_dentry *d1, const struct wim_dentry *d2) +_avl_dentry_compare_names(const struct avl_tree_node *n1, + const struct avl_tree_node *n2) { - return compare_utf16le_names(d1->file_name, d1->file_name_nbytes, - d2->file_name, d2->file_name_nbytes); + const struct wim_dentry *d1, *d2; + + d1 = avl_tree_entry(n1, struct wim_dentry, d_index_node); + d2 = avl_tree_entry(n2, struct wim_dentry, d_index_node); + return dentry_compare_names_case_sensitive(d1, d2); } +/* Default case sensitivity behavior for searches with + * WIMLIB_CASE_PLATFORM_DEFAULT specified. This can be modified by passing + * WIMLIB_INIT_FLAG_DEFAULT_CASE_SENSITIVE or + * WIMLIB_INIT_FLAG_DEFAULT_CASE_INSENSITIVE to wimlib_global_init(). */ +bool default_ignore_case = +#ifdef __WIN32__ + true +#else + false +#endif +; +/* Case-sensitive dentry lookup. Only @d_name and @d_name_nbytes of @dummy must + * be valid. */ +static struct wim_dentry * +dir_lookup(const struct wim_inode *dir, const struct wim_dentry *dummy) +{ + struct avl_tree_node *node; + + node = avl_tree_lookup_node(dir->i_children, + &dummy->d_index_node, + _avl_dentry_compare_names); + if (!node) + return NULL; + return avl_tree_entry(node, struct wim_dentry, d_index_node); +} + +/* Case-insensitive dentry lookup. Only @d_name and @d_name_nbytes of @dummy + * must be valid. */ +static struct wim_dentry * +dir_lookup_ci(const struct wim_inode *dir, const struct wim_dentry *dummy) +{ + struct avl_tree_node *node; + + node = avl_tree_lookup_node(dir->i_children_ci, + &dummy->d_index_node_ci, + _avl_dentry_compare_names_ci); + if (!node) + return NULL; + return avl_tree_entry(node, struct wim_dentry, d_index_node_ci); +} + +/* Given a UTF-16LE filename and a directory, look up the dentry for the file. + * Return it if found, otherwise NULL. This has configurable case sensitivity, + * and @name need not be null-terminated. */ struct wim_dentry * get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry, const utf16lechar *name, - size_t name_nbytes) + size_t name_nbytes, + CASE_SENSITIVITY_TYPE case_ctype) { - struct rb_node *node = dentry->d_inode->i_children.rb_node; + const struct wim_inode *dir = dentry->d_inode; + bool ignore_case = will_ignore_case(case_ctype); + struct wim_dentry dummy; struct wim_dentry *child; - while (node) { - child = rbnode_dentry(node); - int result = compare_utf16le_names(name, name_nbytes, - child->file_name, - child->file_name_nbytes); - if (result < 0) - node = node->rb_left; - else if (result > 0) - node = node->rb_right; - else - return child; - } - return NULL; + + dummy.d_name = (utf16lechar*)name; + dummy.d_name_nbytes = name_nbytes; + + if (!ignore_case) + /* Case-sensitive lookup. */ + return dir_lookup(dir, &dummy); + + /* Case-insensitive lookup. */ + + child = dir_lookup_ci(dir, &dummy); + if (!child) + return NULL; + + if (likely(list_empty(&child->d_ci_conflict_list))) + /* Only one dentry has this case-insensitive name; return it */ + return child; + + /* Multiple dentries have the same case-insensitive name. Choose the + * dentry with the same case-sensitive name, if one exists; otherwise + * print a warning and choose one of the possible dentries arbitrarily. + */ + struct wim_dentry *alt = child; + size_t num_alts = 0; + + do { + num_alts++; + if (!dentry_compare_names_case_sensitive(&dummy, alt)) + return alt; + alt = list_entry(alt->d_ci_conflict_list.next, + struct wim_dentry, d_ci_conflict_list); + } while (alt != child); + + WARNING("Result of case-insensitive lookup is ambiguous\n" + " (returning \"%"TS"\" of %zu " + "possible files, including \"%"TS"\")", + dentry_full_path(child), + num_alts, + dentry_full_path(list_entry(child->d_ci_conflict_list.next, + struct wim_dentry, + d_ci_conflict_list))); + return child; } -/* Returns the child of @dentry that has the file name @name. Returns NULL if - * no child has the name. */ +/* Given a 'tchar' filename and a directory, look up the dentry for the file. + * If the filename was successfully converted to UTF-16LE and the dentry was + * found, return it; otherwise return NULL. This has configurable case + * sensitivity. */ struct wim_dentry * -get_dentry_child_with_name(const struct wim_dentry *dentry, const tchar *name) +get_dentry_child_with_name(const struct wim_dentry *dentry, const tchar *name, + CASE_SENSITIVITY_TYPE case_type) { -#if TCHAR_IS_UTF16LE - return get_dentry_child_with_utf16le_name(dentry, name, - tstrlen(name) * sizeof(tchar)); -#else - utf16lechar *utf16le_name; - size_t utf16le_name_nbytes; int ret; + const utf16lechar *name_utf16le; + size_t name_utf16le_nbytes; struct wim_dentry *child; - ret = tstr_to_utf16le(name, tstrlen(name) * sizeof(tchar), - &utf16le_name, &utf16le_name_nbytes); - if (ret) { - child = NULL; - } else { - child = get_dentry_child_with_utf16le_name(dentry, - utf16le_name, - utf16le_name_nbytes); - FREE(utf16le_name); - } + ret = tstr_get_utf16le_and_len(name, &name_utf16le, + &name_utf16le_nbytes); + if (ret) + return NULL; + + child = get_dentry_child_with_utf16le_name(dentry, + name_utf16le, + name_utf16le_nbytes, + case_type); + tstr_put_utf16le(name_utf16le); return child; -#endif } +/* This is the UTF-16LE version of get_dentry(), currently private to this file + * because no one needs it besides get_dentry(). */ static struct wim_dentry * -get_dentry_utf16le(WIMStruct *w, const utf16lechar *path, - size_t path_nbytes) +get_dentry_utf16le(WIMStruct *wim, const utf16lechar *path, + CASE_SENSITIVITY_TYPE case_type) { - struct wim_dentry *cur_dentry, *parent_dentry; - const utf16lechar *p, *pp; - - cur_dentry = parent_dentry = wim_root_dentry(w); - p = path; - while (1) { - while (*p == cpu_to_le16('/')) - p++; - if (*p == '\0') - break; - pp = p; - while (*pp != cpu_to_le16('/') && *pp != cpu_to_le16('\0')) - pp++; - - cur_dentry = get_dentry_child_with_utf16le_name(parent_dentry, p, - (void*)pp - (void*)p); - if (cur_dentry == NULL) - break; - p = pp; - parent_dentry = cur_dentry; - } - if (cur_dentry == NULL) { - if (dentry_is_directory(parent_dentry)) + struct wim_dentry *cur_dentry; + const utf16lechar *name_start, *name_end; + + /* Start with the root directory of the image. Note: this will be NULL + * if an image has been added directly with wimlib_add_empty_image() but + * no files have been added yet; in that case we fail with ENOENT. */ + cur_dentry = wim_get_current_root_dentry(wim); + + name_start = path; + for (;;) { + if (cur_dentry == NULL) { errno = ENOENT; - else + return NULL; + } + + if (*name_start && !dentry_is_directory(cur_dentry)) { errno = ENOTDIR; + return NULL; + } + + while (*name_start == cpu_to_le16(WIM_PATH_SEPARATOR)) + name_start++; + + if (!*name_start) + return cur_dentry; + + name_end = name_start; + do { + ++name_end; + } while (*name_end != cpu_to_le16(WIM_PATH_SEPARATOR) && *name_end); + + cur_dentry = get_dentry_child_with_utf16le_name(cur_dentry, + name_start, + (u8*)name_end - (u8*)name_start, + case_type); + name_start = name_end; } - return cur_dentry; } -/* Returns the dentry corresponding to the @path, or NULL if there is no such - * dentry. */ +/* + * WIM path lookup: translate a path in the currently selected WIM image to the + * corresponding dentry, if it exists. + * + * @wim + * The WIMStruct for the WIM. The search takes place in the currently + * selected image. + * + * @path + * The path to look up, given relative to the root of the WIM image. + * Characters with value WIM_PATH_SEPARATOR are taken to be path + * separators. Leading path separators are ignored, whereas one or more + * trailing path separators cause the path to only match a directory. + * + * @case_type + * The case-sensitivity behavior of this function, as one of the following + * constants: + * + * - WIMLIB_CASE_SENSITIVE: Perform the search case sensitively. This means + * that names must match exactly. + * + * - WIMLIB_CASE_INSENSITIVE: Perform the search case insensitively. This + * means that names are considered to match if they are equal when + * transformed to upper case. If a path component matches multiple names + * case-insensitively, the name that matches the path component + * case-sensitively is chosen, if existent; otherwise one + * case-insensitively matching name is chosen arbitrarily. + * + * - WIMLIB_CASE_PLATFORM_DEFAULT: Perform either case-sensitive or + * case-insensitive search, depending on the value of the global variable + * default_ignore_case. + * + * In any case, no Unicode normalization is done before comparing strings. + * + * Returns a pointer to the dentry that is the result of the lookup, or NULL if + * no such dentry exists. If NULL is returned, errno is set to one of the + * following values: + * + * ENOTDIR if one of the path components used as a directory existed but + * was not, in fact, a directory. + * + * ENOENT otherwise. + * + * Additional notes: + * + * - This function does not consider a reparse point to be a directory, even + * if it has FILE_ATTRIBUTE_DIRECTORY set. + * + * - This function does not dereference symbolic links or junction points + * when performing the search. + * + * - Since this function ignores leading slashes, the empty path is valid and + * names the root directory of the WIM image. + * + * - An image added with wimlib_add_empty_image() does not have a root + * directory yet, and this function will fail with ENOENT for any path on + * such an image. + */ struct wim_dentry * -get_dentry(WIMStruct *w, const tchar *path) +get_dentry(WIMStruct *wim, const tchar *path, CASE_SENSITIVITY_TYPE case_type) { -#if TCHAR_IS_UTF16LE - return get_dentry_utf16le(w, path, tstrlen(path) * sizeof(tchar)); -#else - utf16lechar *path_utf16le; - size_t path_utf16le_nbytes; int ret; + const utf16lechar *path_utf16le; struct wim_dentry *dentry; - ret = tstr_to_utf16le(path, tstrlen(path) * sizeof(tchar), - &path_utf16le, &path_utf16le_nbytes); + ret = tstr_get_utf16le(path, &path_utf16le); if (ret) return NULL; - dentry = get_dentry_utf16le(w, path_utf16le, path_utf16le_nbytes); - FREE(path_utf16le); + dentry = get_dentry_utf16le(wim, path_utf16le, case_type); + tstr_put_utf16le(path_utf16le); return dentry; -#endif } -struct wim_inode * -wim_pathname_to_inode(WIMStruct *w, const tchar *path) -{ - struct wim_dentry *dentry; - dentry = get_dentry(w, path); - if (dentry) - return dentry->d_inode; - else - return NULL; -} - -/* Takes in a path of length @len in @buf, and transforms it into a string for - * the path of its parent directory. */ +/* Modify @path, which is a null-terminated string @len 'tchars' in length, + * in-place to produce the path to its parent directory. */ static void -to_parent_name(tchar *buf, size_t len) +to_parent_name(tchar *path, size_t len) { ssize_t i = (ssize_t)len - 1; - while (i >= 0 && buf[i] == T('/')) + while (i >= 0 && path[i] == WIM_PATH_SEPARATOR) i--; - while (i >= 0 && buf[i] != T('/')) + while (i >= 0 && path[i] != WIM_PATH_SEPARATOR) i--; - while (i >= 0 && buf[i] == T('/')) + while (i >= 0 && path[i] == WIM_PATH_SEPARATOR) i--; - buf[i + 1] = T('\0'); + path[i + 1] = T('\0'); } -/* Returns the dentry that corresponds to the parent directory of @path, or NULL - * if the dentry is not found. */ +/* Similar to get_dentry(), but returns the dentry named by @path with the last + * component stripped off. + * + * Note: The returned dentry is NOT guaranteed to be a directory. */ struct wim_dentry * -get_parent_dentry(WIMStruct *w, const tchar *path) +get_parent_dentry(WIMStruct *wim, const tchar *path, + CASE_SENSITIVITY_TYPE case_type) { size_t path_len = tstrlen(path); tchar buf[path_len + 1]; tmemcpy(buf, path, path_len + 1); to_parent_name(buf, path_len); - return get_dentry(w, buf); -} - -/* Prints the full path of a dentry. */ -int -print_dentry_full_path(struct wim_dentry *dentry, void *_ignore) -{ - int ret = calculate_dentry_full_path(dentry); - if (ret) - return ret; - tprintf(T("%"TS"\n"), dentry->_full_path); - return 0; -} - -/* We want to be able to show the names of the file attribute flags that are - * set. */ -struct file_attr_flag { - u32 flag; - const tchar *name; -}; -struct file_attr_flag file_attr_flags[] = { - {FILE_ATTRIBUTE_READONLY, T("READONLY")}, - {FILE_ATTRIBUTE_HIDDEN, T("HIDDEN")}, - {FILE_ATTRIBUTE_SYSTEM, T("SYSTEM")}, - {FILE_ATTRIBUTE_DIRECTORY, T("DIRECTORY")}, - {FILE_ATTRIBUTE_ARCHIVE, T("ARCHIVE")}, - {FILE_ATTRIBUTE_DEVICE, T("DEVICE")}, - {FILE_ATTRIBUTE_NORMAL, T("NORMAL")}, - {FILE_ATTRIBUTE_TEMPORARY, T("TEMPORARY")}, - {FILE_ATTRIBUTE_SPARSE_FILE, T("SPARSE_FILE")}, - {FILE_ATTRIBUTE_REPARSE_POINT, T("REPARSE_POINT")}, - {FILE_ATTRIBUTE_COMPRESSED, T("COMPRESSED")}, - {FILE_ATTRIBUTE_OFFLINE, T("OFFLINE")}, - {FILE_ATTRIBUTE_NOT_CONTENT_INDEXED,T("NOT_CONTENT_INDEXED")}, - {FILE_ATTRIBUTE_ENCRYPTED, T("ENCRYPTED")}, - {FILE_ATTRIBUTE_VIRTUAL, T("VIRTUAL")}, -}; - -/* Prints a directory entry. @lookup_table is a pointer to the lookup table, if - * available. If the dentry is unresolved and the lookup table is NULL, the - * lookup table entries will not be printed. Otherwise, they will be. */ -int -print_dentry(struct wim_dentry *dentry, void *lookup_table) -{ - const u8 *hash; - struct wim_lookup_table_entry *lte; - const struct wim_inode *inode = dentry->d_inode; - tchar buf[50]; - - tprintf(T("[DENTRY]\n")); - tprintf(T("Length = %"PRIu64"\n"), dentry->length); - tprintf(T("Attributes = 0x%x\n"), inode->i_attributes); - for (size_t i = 0; i < ARRAY_LEN(file_attr_flags); i++) - if (file_attr_flags[i].flag & inode->i_attributes) - tprintf(T(" FILE_ATTRIBUTE_%"TS" is set\n"), - file_attr_flags[i].name); - tprintf(T("Security ID = %d\n"), inode->i_security_id); - tprintf(T("Subdir offset = %"PRIu64"\n"), dentry->subdir_offset); - - wim_timestamp_to_str(inode->i_creation_time, buf, sizeof(buf)); - tprintf(T("Creation Time = %"TS"\n"), buf); - - wim_timestamp_to_str(inode->i_last_access_time, buf, sizeof(buf)); - tprintf(T("Last Access Time = %"TS"\n"), buf); - - wim_timestamp_to_str(inode->i_last_write_time, buf, sizeof(buf)); - tprintf(T("Last Write Time = %"TS"\n"), buf); - - if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { - tprintf(T("Reparse Tag = 0x%"PRIx32"\n"), inode->i_reparse_tag); - tprintf(T("Reparse Point Flags = 0x%"PRIx16"\n"), - inode->i_not_rpfixed); - tprintf(T("Reparse Point Unknown 2 = 0x%"PRIx32"\n"), - inode->i_rp_unknown_2); - } - tprintf(T("Reparse Point Unknown 1 = 0x%"PRIx32"\n"), - inode->i_rp_unknown_1); - tprintf(T("Hard Link Group = 0x%"PRIx64"\n"), inode->i_ino); - tprintf(T("Hard Link Group Size = %"PRIu32"\n"), inode->i_nlink); - tprintf(T("Number of Alternate Data Streams = %hu\n"), inode->i_num_ads); - if (dentry_has_long_name(dentry)) - wimlib_printf(T("Filename = \"%"WS"\"\n"), dentry->file_name); - if (dentry_has_short_name(dentry)) - wimlib_printf(T("Short Name \"%"WS"\"\n"), dentry->short_name); - if (dentry->_full_path) - tprintf(T("Full Path = \"%"TS"\"\n"), dentry->_full_path); - - lte = inode_stream_lte(dentry->d_inode, 0, lookup_table); - if (lte) { - print_lookup_table_entry(lte, stdout); - } else { - hash = inode_stream_hash(inode, 0); - if (hash) { - tprintf(T("Hash = 0x")); - print_hash(hash, stdout); - tputchar(T('\n')); - tputchar(T('\n')); - } - } - for (u16 i = 0; i < inode->i_num_ads; i++) { - tprintf(T("[Alternate Stream Entry %u]\n"), i); - wimlib_printf(T("Name = \"%"WS"\"\n"), - inode->i_ads_entries[i].stream_name); - tprintf(T("Name Length (UTF16 bytes) = %hu\n"), - inode->i_ads_entries[i].stream_name_nbytes); - hash = inode_stream_hash(inode, i + 1); - if (hash) { - tprintf(T("Hash = 0x")); - print_hash(hash, stdout); - tputchar(T('\n')); - } - print_lookup_table_entry(inode_stream_lte(inode, i + 1, lookup_table), - stdout); - } - return 0; -} - -/* Initializations done on every `struct wim_dentry'. */ -static void -dentry_common_init(struct wim_dentry *dentry) -{ - memset(dentry, 0, sizeof(struct wim_dentry)); -} - -struct wim_inode * -new_timeless_inode() -{ - struct wim_inode *inode = CALLOC(1, sizeof(struct wim_inode)); - if (inode) { - inode->i_security_id = -1; - inode->i_nlink = 1; - inode->i_next_stream_id = 1; - inode->i_not_rpfixed = 1; - INIT_LIST_HEAD(&inode->i_list); - #ifdef WITH_FUSE - if (pthread_mutex_init(&inode->i_mutex, NULL) != 0) { - ERROR_WITH_ERRNO("Error initializing mutex"); - FREE(inode); - return NULL; - } - #endif - INIT_LIST_HEAD(&inode->i_dentry); - } - return inode; -} - -static struct wim_inode * -new_inode() -{ - struct wim_inode *inode = new_timeless_inode(); - if (inode) { - u64 now = get_wim_timestamp(); - inode->i_creation_time = now; - inode->i_last_access_time = now; - inode->i_last_write_time = now; - } - return inode; + return get_dentry(wim, buf, case_type); } -/* Creates an unlinked directory entry. */ -int +/* + * Create an unlinked dentry. + * + * @name specifies the long name to give the new dentry. If NULL or empty, the + * new dentry will be given no long name. + * + * The new dentry will have no short name and no associated inode. + * + * On success, returns 0 and a pointer to the new, allocated dentry is stored in + * *dentry_ret. On failure, returns WIMLIB_ERR_NOMEM or an error code resulting + * from a failed string conversion. + */ +static int new_dentry(const tchar *name, struct wim_dentry **dentry_ret) { struct wim_dentry *dentry; int ret; - dentry = MALLOC(sizeof(struct wim_dentry)); + dentry = CALLOC(1, sizeof(struct wim_dentry)); if (!dentry) return WIMLIB_ERR_NOMEM; - dentry_common_init(dentry); - ret = set_dentry_name(dentry, name); - if (ret == 0) { - dentry->parent = dentry; - *dentry_ret = dentry; - } else { - FREE(dentry); - ERROR("Failed to set name on new dentry with name \"%"TS"\"", - name); + if (name && *name) { + ret = dentry_set_name(dentry, name); + if (ret) { + FREE(dentry); + return ret; + } } - return ret; + dentry->d_parent = dentry; + *dentry_ret = dentry; + return 0; } - -static int -__new_dentry_with_inode(const tchar *name, struct wim_dentry **dentry_ret, - bool timeless) +/* Like new_dentry(), but also allocate an inode and associate it with the + * dentry. If set_timestamps=true, the timestamps for the inode will be set to + * the current time; otherwise, they will be left 0. */ +int +new_dentry_with_new_inode(const tchar *name, bool set_timestamps, + struct wim_dentry **dentry_ret) { struct wim_dentry *dentry; + struct wim_inode *inode; int ret; ret = new_dentry(name, &dentry); if (ret) return ret; - if (timeless) - dentry->d_inode = new_timeless_inode(); - else - dentry->d_inode = new_inode(); - if (!dentry->d_inode) { + inode = new_inode(dentry, set_timestamps); + if (!inode) { free_dentry(dentry); return WIMLIB_ERR_NOMEM; } - inode_add_dentry(dentry, dentry->d_inode); *dentry_ret = dentry; return 0; } +/* Like new_dentry(), but also associate the new dentry with the specified inode + * and acquire a reference to each of the inode's blobs. */ int -new_dentry_with_timeless_inode(const tchar *name, struct wim_dentry **dentry_ret) +new_dentry_with_existing_inode(const tchar *name, struct wim_inode *inode, + struct wim_dentry **dentry_ret) { - return __new_dentry_with_inode(name, dentry_ret, true); + int ret = new_dentry(name, dentry_ret); + if (ret) + return ret; + d_associate(*dentry_ret, inode); + inode_ref_blobs(inode); + return 0; } +/* Create an unnamed dentry with a new inode for a directory with the default + * metadata. */ int -new_dentry_with_inode(const tchar *name, struct wim_dentry **dentry_ret) +new_filler_directory(struct wim_dentry **dentry_ret) { - return __new_dentry_with_inode(name, dentry_ret, false); -} - - -static int -init_ads_entry(struct wim_ads_entry *ads_entry, const void *name, - size_t name_nbytes, bool is_utf16le) -{ - int ret = 0; - memset(ads_entry, 0, sizeof(*ads_entry)); + int ret; + struct wim_dentry *dentry; - if (is_utf16le) { - utf16lechar *p = MALLOC(name_nbytes + sizeof(utf16lechar)); - if (!p) - return WIMLIB_ERR_NOMEM; - memcpy(p, name, name_nbytes); - p[name_nbytes / 2] = 0; - ads_entry->stream_name = p; - ads_entry->stream_name_nbytes = name_nbytes; - } else { - if (name && *(const tchar*)name != T('\0')) { - ret = get_utf16le_name(name, &ads_entry->stream_name, - &ads_entry->stream_name_nbytes); - } - } - return ret; + ret = new_dentry_with_new_inode(NULL, true, &dentry); + if (ret) + return ret; + /* Leave the inode number as 0; this is allowed for non + * hard-linked files. */ + dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY; + *dentry_ret = dentry; + return 0; } -static void -destroy_ads_entry(struct wim_ads_entry *ads_entry) +static int +dentry_clear_inode_visited(struct wim_dentry *dentry, void *_ignore) { - FREE(ads_entry->stream_name); + dentry->d_inode->i_visited = 0; + return 0; } -/* Frees an inode. */ void -free_inode(struct wim_inode *inode) +dentry_tree_clear_inode_visited(struct wim_dentry *root) { - if (inode) { - if (inode->i_ads_entries) { - for (u16 i = 0; i < inode->i_num_ads; i++) - destroy_ads_entry(&inode->i_ads_entries[i]); - FREE(inode->i_ads_entries); - } - #ifdef WITH_FUSE - wimlib_assert(inode->i_num_opened_fds == 0); - FREE(inode->i_fds); - pthread_mutex_destroy(&inode->i_mutex); - #endif - /* HACK: This may instead delete the inode from i_list, but the - * hlist_del() behaves the same as list_del(). */ - hlist_del(&inode->i_hlist); - FREE(inode->i_extracted_file); - FREE(inode); - } + for_dentry_in_tree(root, dentry_clear_inode_visited, NULL); } -/* Decrements link count on an inode and frees it if the link count reaches 0. - * */ -static void -put_inode(struct wim_inode *inode) -{ - wimlib_assert(inode->i_nlink != 0); - if (--inode->i_nlink == 0) { - #ifdef WITH_FUSE - if (inode->i_num_opened_fds == 0) - #endif - { - free_inode(inode); - } - } -} - -/* Frees a WIM dentry. +/* + * Free a WIM dentry. * - * The corresponding inode (if any) is freed only if its link count is - * decremented to 0. + * In addition to freeing the dentry itself, this disassociates the dentry from + * its inode. If the inode is no longer in use, it will be freed as well. */ void free_dentry(struct wim_dentry *dentry) { - FREE(dentry->file_name); - FREE(dentry->short_name); - FREE(dentry->_full_path); - if (dentry->d_inode) - put_inode(dentry->d_inode); - FREE(dentry); + if (dentry) { + d_disassociate(dentry); + FREE(dentry->d_name); + FREE(dentry->d_short_name); + FREE(dentry->d_full_path); + FREE(dentry); + } } -/* This function is passed as an argument to for_dentry_in_tree_depth() in order - * to free a directory tree. */ static int -do_free_dentry(struct wim_dentry *dentry, void *__lookup_table) +do_free_dentry(struct wim_dentry *dentry, void *_ignore) { - struct wim_lookup_table *lookup_table = __lookup_table; - unsigned i; - - if (lookup_table) { - struct wim_lookup_table_entry *lte; - struct wim_inode *inode = dentry->d_inode; - wimlib_assert(inode->i_nlink != 0); - for (i = 0; i <= inode->i_num_ads; i++) { - lte = inode_stream_lte(inode, i, lookup_table); - if (lte) - lte_decrement_refcnt(lte, lookup_table); - } - } free_dentry(dentry); return 0; } -/* - * Unlinks and frees a dentry tree. - * - * @root: The root of the tree. - * @lookup_table: The lookup table for dentries. If non-NULL, the - * reference counts in the lookup table for the lookup - * table entries corresponding to the dentries will be - * decremented. - */ -void -free_dentry_tree(struct wim_dentry *root, struct wim_lookup_table *lookup_table) -{ - if (root) - for_dentry_in_tree_depth(root, do_free_dentry, lookup_table); -} - -/* - * Links a dentry into the directory tree. - * - * @parent: The dentry that will be the parent of @dentry. - * @dentry: The dentry to link. - */ -struct wim_dentry * -dentry_add_child(struct wim_dentry * restrict parent, - struct wim_dentry * restrict child) -{ - wimlib_assert(dentry_is_directory(parent)); - - struct rb_root *root = &parent->d_inode->i_children; - struct rb_node **new = &(root->rb_node); - struct rb_node *rb_parent = NULL; - - while (*new) { - struct wim_dentry *this = rbnode_dentry(*new); - int result = dentry_compare_names(child, this); - - rb_parent = *new; - - if (result < 0) - new = &((*new)->rb_left); - else if (result > 0) - new = &((*new)->rb_right); - else - return this; - } - child->parent = parent; - rb_link_node(&child->rb_node, rb_parent, new); - rb_insert_color(&child->rb_node, root); - return NULL; -} - -/* Unlink a WIM dentry from the directory entry tree. */ -void -unlink_dentry(struct wim_dentry *dentry) +static int +do_free_dentry_and_unref_blobs(struct wim_dentry *dentry, void *blob_table) { - struct wim_dentry *parent = dentry->parent; - if (parent == dentry) - return; - rb_erase(&dentry->rb_node, &parent->d_inode->i_children); + inode_unref_blobs(dentry->d_inode, blob_table); + free_dentry(dentry); + return 0; } /* - * Returns the alternate data stream entry belonging to @inode that has the - * stream name @stream_name. + * Free all dentries in a tree. + * + * @root: + * The root of the dentry tree to free. If NULL, this function has no + * effect. + * + * @blob_table: + * A pointer to the blob table for the WIM, or NULL if not specified. If + * specified, this function will decrement the reference counts of the + * blobs referenced by the dentries. + * + * This function also releases references to the corresponding inodes. + * + * This function does *not* unlink @root from its parent directory, if it has + * one. If @root has a parent, the caller must unlink @root before calling this + * function. */ -struct wim_ads_entry * -inode_get_ads_entry(struct wim_inode *inode, const tchar *stream_name, - u16 *idx_ret) +void +free_dentry_tree(struct wim_dentry *root, struct blob_table *blob_table) { - if (inode->i_num_ads == 0) { - return NULL; - } else { - size_t stream_name_utf16le_nbytes; - u16 i; - struct wim_ads_entry *result; - - #if TCHAR_IS_UTF16LE - const utf16lechar *stream_name_utf16le; + int (*f)(struct wim_dentry *, void *); - stream_name_utf16le = stream_name; - stream_name_utf16le_nbytes = tstrlen(stream_name) * sizeof(tchar); - #else - utf16lechar *stream_name_utf16le; + if (blob_table) + f = do_free_dentry_and_unref_blobs; + else + f = do_free_dentry; - { - int ret = tstr_to_utf16le(stream_name, - tstrlen(stream_name) * - sizeof(tchar), - &stream_name_utf16le, - &stream_name_utf16le_nbytes); - if (ret) - return NULL; - } - #endif - i = 0; - result = NULL; - do { - if (ads_entry_has_name(&inode->i_ads_entries[i], - stream_name_utf16le, - stream_name_utf16le_nbytes)) - { - if (idx_ret) - *idx_ret = i; - result = &inode->i_ads_entries[i]; - break; - } - } while (++i != inode->i_num_ads); - #if !TCHAR_IS_UTF16LE - FREE(stream_name_utf16le); - #endif - return result; - } + for_dentry_in_tree_depth(root, f, blob_table); } -static struct wim_ads_entry * -do_inode_add_ads(struct wim_inode *inode, const void *stream_name, - size_t stream_name_nbytes, bool is_utf16le) +/* Insert the @child dentry into the case sensitive index of the @dir directory. + * Return NULL if successfully inserted, otherwise a pointer to the + * already-inserted duplicate. */ +static struct wim_dentry * +dir_index_child(struct wim_inode *dir, struct wim_dentry *child) { - u16 num_ads; - struct wim_ads_entry *ads_entries; - struct wim_ads_entry *new_entry; + struct avl_tree_node *duplicate; - if (inode->i_num_ads >= 0xfffe) { - ERROR("Too many alternate data streams in one inode!"); + duplicate = avl_tree_insert(&dir->i_children, + &child->d_index_node, + _avl_dentry_compare_names); + if (!duplicate) return NULL; - } - num_ads = inode->i_num_ads + 1; - ads_entries = REALLOC(inode->i_ads_entries, - num_ads * sizeof(inode->i_ads_entries[0])); - if (!ads_entries) { - ERROR("Failed to allocate memory for new alternate data stream"); - return NULL; - } - inode->i_ads_entries = ads_entries; - - new_entry = &inode->i_ads_entries[num_ads - 1]; - if (init_ads_entry(new_entry, stream_name, stream_name_nbytes, is_utf16le)) - return NULL; - new_entry->stream_id = inode->i_next_stream_id++; - inode->i_num_ads = num_ads; - return new_entry; + return avl_tree_entry(duplicate, struct wim_dentry, d_index_node); } -struct wim_ads_entry * -inode_add_ads_utf16le(struct wim_inode *inode, - const utf16lechar *stream_name, - size_t stream_name_nbytes) +/* Insert the @child dentry into the case insensitive index of the @dir + * directory. Return NULL if successfully inserted, otherwise a pointer to the + * already-inserted duplicate. */ +static struct wim_dentry * +dir_index_child_ci(struct wim_inode *dir, struct wim_dentry *child) { - DEBUG("Add alternate data stream \"%"WS"\"", stream_name); - return do_inode_add_ads(inode, stream_name, stream_name_nbytes, true); + struct avl_tree_node *duplicate; + + duplicate = avl_tree_insert(&dir->i_children_ci, + &child->d_index_node_ci, + _avl_dentry_compare_names_ci); + if (!duplicate) + return NULL; + return avl_tree_entry(duplicate, struct wim_dentry, d_index_node_ci); } -/* - * Add an alternate stream entry to a WIM inode and return a pointer to it, or - * NULL if memory could not be allocated. - */ -struct wim_ads_entry * -inode_add_ads(struct wim_inode *inode, const tchar *stream_name) +/* Remove the specified dentry from its directory's case-sensitive index. */ +static void +dir_unindex_child(struct wim_inode *dir, struct wim_dentry *child) { - DEBUG("Add alternate data stream \"%"TS"\"", stream_name); - return do_inode_add_ads(inode, stream_name, - tstrlen(stream_name) * sizeof(tchar), - TCHAR_IS_UTF16LE); + avl_tree_remove(&dir->i_children, &child->d_index_node); } -int -inode_add_ads_with_data(struct wim_inode *inode, const tchar *name, - const void *value, size_t size, - struct wim_lookup_table *lookup_table) +/* Remove the specified dentry from its directory's case-insensitive index. */ +static void +dir_unindex_child_ci(struct wim_inode *dir, struct wim_dentry *child) { - int ret = WIMLIB_ERR_NOMEM; - struct wim_ads_entry *new_ads_entry; - struct wim_lookup_table_entry *existing_lte; - struct wim_lookup_table_entry *lte; - u8 value_hash[SHA1_HASH_SIZE]; - - wimlib_assert(inode->i_resolved); - new_ads_entry = inode_add_ads(inode, name); - if (!new_ads_entry) - goto out; - sha1_buffer((const u8*)value, size, value_hash); - existing_lte = __lookup_resource(lookup_table, value_hash); - if (existing_lte) { - lte = existing_lte; - lte->refcnt++; - } else { - u8 *value_copy; - lte = new_lookup_table_entry(); - if (!lte) - goto out_remove_ads_entry; - value_copy = MALLOC(size); - if (!value_copy) { - FREE(lte); - goto out_remove_ads_entry; - } - memcpy(value_copy, value, size); - lte->resource_location = RESOURCE_IN_ATTACHED_BUFFER; - lte->attached_buffer = value_copy; - lte->resource_entry.original_size = size; - lte->resource_entry.size = size; - copy_hash(lte->hash, value_hash); - lookup_table_insert(lookup_table, lte); - } - new_ads_entry->lte = lte; - ret = 0; - goto out; -out_remove_ads_entry: - inode_remove_ads(inode, new_ads_entry - inode->i_ads_entries, - lookup_table); -out: - return ret; + avl_tree_remove(&dir->i_children_ci, &child->d_index_node_ci); } -/* Set the unnamed stream of a WIM inode, given a data buffer containing the - * stream contents. */ -int -inode_set_unnamed_stream(struct wim_inode *inode, const void *data, size_t len, - struct wim_lookup_table *lookup_table) +/* Return true iff the specified dentry is in its parent directory's + * case-insensitive index. */ +static bool +dentry_in_ci_index(const struct wim_dentry *dentry) { - struct wim_lookup_table_entry *lte, *existing_lte; - u8 hash[SHA1_HASH_SIZE]; - void *buf; - - sha1_buffer(data, len, hash); - existing_lte = __lookup_resource(lookup_table, hash); - if (existing_lte) { - wimlib_assert(wim_resource_size(existing_lte) == len); - lte = existing_lte; - lte->refcnt++; - } else { - lte = new_lookup_table_entry(); - if (!lte) - return WIMLIB_ERR_NOMEM; - buf = MALLOC(len); - if (!buf) { - free_lookup_table_entry(lte); - return WIMLIB_ERR_NOMEM; - } - memcpy(buf, data, len); - lte->resource_location = RESOURCE_IN_ATTACHED_BUFFER; - lte->attached_buffer = buf; - lte->resource_entry.original_size = len; - copy_hash(lte->hash, hash); - lookup_table_insert(lookup_table, lte); - } - inode->i_lte = lte; - inode->i_resolved = 1; - return 0; + return !avl_tree_node_is_unlinked(&dentry->d_index_node_ci); } -/* Remove an alternate data stream from a WIM inode */ -void -inode_remove_ads(struct wim_inode *inode, u16 idx, - struct wim_lookup_table *lookup_table) +/* + * Link a dentry into the tree. + * + * @parent: + * The dentry that will be the parent of @child. It must name a directory. + * + * @child: + * The dentry to link. It must be currently unlinked. + * + * Returns NULL if successful. If @parent already contains a dentry with the + * same case-sensitive name as @child, returns a pointer to this duplicate + * dentry. + */ +struct wim_dentry * +dentry_add_child(struct wim_dentry *parent, struct wim_dentry *child) { - struct wim_ads_entry *ads_entry; - struct wim_lookup_table_entry *lte; + struct wim_dentry *duplicate; + struct wim_inode *dir; - wimlib_assert(idx < inode->i_num_ads); - wimlib_assert(inode->i_resolved); + wimlib_assert(parent != child); - ads_entry = &inode->i_ads_entries[idx]; + dir = parent->d_inode; - DEBUG("Remove alternate data stream \"%"WS"\"", ads_entry->stream_name); + wimlib_assert(inode_is_directory(dir)); - lte = ads_entry->lte; - if (lte) - lte_decrement_refcnt(lte, lookup_table); + duplicate = dir_index_child(dir, child); + if (duplicate) + return duplicate; - destroy_ads_entry(ads_entry); - - memmove(&inode->i_ads_entries[idx], - &inode->i_ads_entries[idx + 1], - (inode->i_num_ads - idx - 1) * sizeof(inode->i_ads_entries[0])); - inode->i_num_ads--; + duplicate = dir_index_child_ci(dir, child); + if (duplicate) { + list_add(&child->d_ci_conflict_list, &duplicate->d_ci_conflict_list); + avl_tree_node_set_unlinked(&child->d_index_node_ci); + } else { + INIT_LIST_HEAD(&child->d_ci_conflict_list); + } + child->d_parent = parent; + return NULL; } -#ifndef __WIN32__ -int -inode_get_unix_data(const struct wim_inode *inode, - struct wimlib_unix_data *unix_data, - u16 *stream_idx_ret) +/* Unlink a dentry from the tree. */ +void +unlink_dentry(struct wim_dentry *dentry) { - const struct wim_ads_entry *ads_entry; - const struct wim_lookup_table_entry *lte; - size_t size; - int ret; + struct wim_inode *dir; - wimlib_assert(inode->i_resolved); + /* Do nothing if the dentry is root or it's already unlinked. Not + * actually necessary based on the current callers, but we do the check + * here to be safe. */ + if (unlikely(dentry->d_parent == dentry)) + return; - ads_entry = inode_get_ads_entry((struct wim_inode*)inode, - WIMLIB_UNIX_DATA_TAG, NULL); - if (!ads_entry) - return NO_UNIX_DATA; + dir = dentry->d_parent->d_inode; - if (stream_idx_ret) - *stream_idx_ret = ads_entry - inode->i_ads_entries; + dir_unindex_child(dir, dentry); - lte = ads_entry->lte; - if (!lte) - return NO_UNIX_DATA; + if (dentry_in_ci_index(dentry)) { - size = wim_resource_size(lte); - if (size != sizeof(struct wimlib_unix_data)) - return BAD_UNIX_DATA; + dir_unindex_child_ci(dir, dentry); - ret = read_full_resource_into_buf(lte, unix_data); - if (ret) - return ret; + if (!list_empty(&dentry->d_ci_conflict_list)) { + /* Make a different case-insensitively-the-same dentry + * be the "representative" in the search index. */ + struct list_head *next; + struct wim_dentry *other; + struct wim_dentry *existing; - if (unix_data->version != 0) - return BAD_UNIX_DATA; - return 0; + next = dentry->d_ci_conflict_list.next; + other = list_entry(next, struct wim_dentry, d_ci_conflict_list); + existing = dir_index_child_ci(dir, other); + wimlib_assert(existing == NULL); + } + } + list_del(&dentry->d_ci_conflict_list); + + /* Not actually necessary, but to be safe don't retain the now-obsolete + * parent pointer. */ + dentry->d_parent = dentry; } -int -inode_set_unix_data(struct wim_inode *inode, uid_t uid, gid_t gid, mode_t mode, - struct wim_lookup_table *lookup_table, int which) +static int +read_extra_data(const u8 *p, const u8 *end, struct wim_inode *inode) { - struct wimlib_unix_data unix_data; - int ret; - bool have_good_unix_data = false; - bool have_unix_data = false; - u16 stream_idx; - - if (!(which & UNIX_DATA_CREATE)) { - ret = inode_get_unix_data(inode, &unix_data, &stream_idx); - if (ret == 0 || ret == BAD_UNIX_DATA || ret > 0) - have_unix_data = true; - if (ret == 0) - have_good_unix_data = true; + while (((uintptr_t)p & 7) && p < end) + p++; + + if (unlikely(p < end)) { + inode->i_extra = memdup(p, end - p); + if (!inode->i_extra) + return WIMLIB_ERR_NOMEM; + inode->i_extra_size = end - p; } - unix_data.version = 0; - if (which & UNIX_DATA_UID || !have_good_unix_data) - unix_data.uid = uid; - if (which & UNIX_DATA_GID || !have_good_unix_data) - unix_data.gid = gid; - if (which & UNIX_DATA_MODE || !have_good_unix_data) - unix_data.mode = mode; - ret = inode_add_ads_with_data(inode, WIMLIB_UNIX_DATA_TAG, - &unix_data, - sizeof(struct wimlib_unix_data), - lookup_table); - if (ret == 0 && have_unix_data) - inode_remove_ads(inode, stream_idx, lookup_table); - return ret; + return 0; } -#endif /* !__WIN32__ */ -/* Replace weird characters in filenames and alternate data stream names. +/* + * Set the type of each stream for an encrypted file. * - * In particular we do not want the path separator to appear in any names, as - * that would make it possible for a "malicious" WIM to extract itself to any - * location it wanted to. */ + * All data streams of the encrypted file should have been packed into a single + * stream in the format provided by ReadEncryptedFileRaw() on Windows. We + * assign this stream type STREAM_TYPE_EFSRPC_RAW_DATA. + * + * Encrypted files can't have a reparse point stream. In the on-disk NTFS + * format they can, but as far as I know the reparse point stream of an + * encrypted file can't be stored in the WIM format in a way that's compatible + * with WIMGAPI, nor is there even any way for it to be read or written on + * Windows when the process does not have access to the file encryption key. + */ static void -replace_forbidden_characters(utf16lechar *name) +assign_stream_types_encrypted(struct wim_inode *inode) { - utf16lechar *p; - - for (p = name; *p; p++) { - #ifdef __WIN32__ - if (wcschr(L"<>:\"/\\|?*", (wchar_t)*p)) - #else - if (*p == cpu_to_le16('/')) - #endif + for (unsigned i = 0; i < inode->i_num_streams; i++) { + struct wim_inode_stream *strm = &inode->i_streams[i]; + if (!stream_is_named(strm) && !is_zero_hash(strm->_stream_hash)) { - #ifdef __WIN32__ - *p = cpu_to_le16(0xfffd); - #else - *p = cpu_to_le16('?'); - #endif - if (name) { - WARNING("File, directory, or stream name \"%"WS"\"\n" - " contains forbidden characters; " - "substituting replacement characters.", - name); - name = NULL; + strm->stream_type = STREAM_TYPE_EFSRPC_RAW_DATA; + return; + } + } +} + +/* + * Set the type of each stream for an unencrypted file. + * + * There will be an unnamed data stream, a reparse point stream, or both an + * unnamed data stream and a reparse point stream. In addition, there may be + * named data streams. + */ +static void +assign_stream_types_unencrypted(struct wim_inode *inode) +{ + bool found_reparse_point_stream = false; + bool found_unnamed_data_stream = false; + struct wim_inode_stream *unnamed_stream_with_zero_hash = NULL; + + for (unsigned i = 0; i < inode->i_num_streams; i++) { + struct wim_inode_stream *strm = &inode->i_streams[i]; + + if (stream_is_named(strm)) { + /* Named data stream */ + strm->stream_type = STREAM_TYPE_DATA; + } else if (!is_zero_hash(strm->_stream_hash)) { + if ((inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) && + !found_reparse_point_stream) { + found_reparse_point_stream = true; + strm->stream_type = STREAM_TYPE_REPARSE_POINT; + } else if (!found_unnamed_data_stream) { + found_unnamed_data_stream = true; + strm->stream_type = STREAM_TYPE_DATA; } + } else { + /* If no stream name is specified and the hash is zero, + * then remember this stream for later so that we can + * assign it to the unnamed data stream if we don't find + * a better candidate. */ + unnamed_stream_with_zero_hash = strm; } } + + if (!found_unnamed_data_stream && unnamed_stream_with_zero_hash != NULL) + unnamed_stream_with_zero_hash->stream_type = STREAM_TYPE_DATA; } /* - * Reads the alternate data stream entries of a WIM dentry. - * - * @p: Pointer to buffer that starts with the first alternate stream entry. - * - * @inode: Inode to load the alternate data streams into. - * @inode->i_num_ads must have been set to the number of - * alternate data streams that are expected. - * - * @remaining_size: Number of bytes of data remaining in the buffer pointed - * to by @p. - * - * The format of the on-disk alternate stream entries is as follows: - * - * struct wim_ads_entry_on_disk { - * u64 length; // Length of the entry, in bytes. This includes - * all fields (including the stream name and - * null terminator if present, AND the padding!). - * u64 reserved; // Seems to be unused - * u8 hash[20]; // SHA1 message digest of the uncompressed stream - * u16 stream_name_len; // Length of the stream name, in bytes - * char stream_name[]; // Stream name in UTF-16LE, @stream_name_len bytes long, - * not including null terminator - * u16 zero; // UTF-16 null terminator for the stream name, NOT - * included in @stream_name_len. Based on what - * I've observed from filenames in dentries, - * this field should not exist when - * (@stream_name_len == 0), but you can't - * actually tell because of the padding anyway - * (provided that the padding is zeroed, which - * it always seems to be). - * char padding[]; // Padding to make the size a multiple of 8 bytes. - * }; - * - * In addition, the entries are 8-byte aligned. - * - * Return 0 on success or nonzero on failure. On success, inode->i_ads_entries - * is set to an array of `struct wim_ads_entry's of length inode->i_num_ads. On - * failure, @inode is not modified. + * Read and interpret the collection of streams for the specified inode. */ static int -read_ads_entries(const u8 *p, struct wim_inode *inode, u64 remaining_size) +setup_inode_streams(const u8 *p, const u8 *end, struct wim_inode *inode, + unsigned num_extra_streams, const u8 *default_hash, + u64 *offset_p) { - u16 num_ads; - struct wim_ads_entry *ads_entries; - int ret; + const u8 *orig_p = p; - num_ads = inode->i_num_ads; - ads_entries = CALLOC(num_ads, sizeof(inode->i_ads_entries[0])); - if (!ads_entries) { - ERROR("Could not allocate memory for %"PRIu16" " - "alternate data stream entries", num_ads); - return WIMLIB_ERR_NOMEM; + inode->i_num_streams = 1 + num_extra_streams; + + if (unlikely(inode->i_num_streams > ARRAY_LEN(inode->i_embedded_streams))) { + inode->i_streams = CALLOC(inode->i_num_streams, + sizeof(inode->i_streams[0])); + if (!inode->i_streams) + return WIMLIB_ERR_NOMEM; } - for (u16 i = 0; i < num_ads; i++) { - struct wim_ads_entry *cur_entry; + /* Use the default hash field for the first stream */ + inode->i_streams[0].stream_name = (utf16lechar *)NO_STREAM_NAME; + copy_hash(inode->i_streams[0]._stream_hash, default_hash); + inode->i_streams[0].stream_type = STREAM_TYPE_UNKNOWN; + inode->i_streams[0].stream_id = 0; + + /* Read the extra stream entries */ + for (unsigned i = 1; i < inode->i_num_streams; i++) { + struct wim_inode_stream *strm; + const struct wim_extra_stream_entry_on_disk *disk_strm; u64 length; - u64 length_no_padding; - u64 total_length; - const u8 *p_save = p; - - cur_entry = &ads_entries[i]; - - #ifdef WITH_FUSE - ads_entries[i].stream_id = i + 1; - #endif - - /* Read the base stream entry, excluding the stream name. */ - if (remaining_size < WIM_ADS_ENTRY_DISK_SIZE) { - ERROR("Stream entries go past end of metadata resource"); - ERROR("(remaining_size = %"PRIu64")", remaining_size); - ret = WIMLIB_ERR_INVALID_DENTRY; - goto out_free_ads_entries; - } + u16 name_nbytes; - p = get_u64(p, &length); - p = get_u64(p, &cur_entry->unused); - p = get_bytes(p, SHA1_HASH_SIZE, cur_entry->hash); - p = get_u16(p, &cur_entry->stream_name_nbytes); + strm = &inode->i_streams[i]; - cur_entry->stream_name = NULL; + strm->stream_id = i; - /* Length including neither the null terminator nor the padding - * */ - length_no_padding = WIM_ADS_ENTRY_DISK_SIZE + - cur_entry->stream_name_nbytes; + /* Do we have at least the size of the fixed-length data we know + * need? */ + if ((end - p) < sizeof(struct wim_extra_stream_entry_on_disk)) + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; - /* Length including the null terminator and the padding */ - total_length = ((length_no_padding + 2) + 7) & ~7; + disk_strm = (const struct wim_extra_stream_entry_on_disk *)p; - wimlib_assert(total_length == ads_entry_total_length(cur_entry)); + /* Read the length field */ + length = ALIGN(le64_to_cpu(disk_strm->length), 8); - if (remaining_size < length_no_padding) { - ERROR("Stream entries go past end of metadata resource"); - ERROR("(remaining_size = %"PRIu64" bytes, " - "length_no_padding = %"PRIu64" bytes)", - remaining_size, length_no_padding); - ret = WIMLIB_ERR_INVALID_DENTRY; - goto out_free_ads_entries; - } + /* Make sure the length field is neither so small it doesn't + * include all the fixed-length data nor so large it overflows + * the metadata resource buffer. */ + if (length < sizeof(struct wim_extra_stream_entry_on_disk) || + length > (end - p)) + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; - /* The @length field in the on-disk ADS entry is expected to be - * equal to @total_length, which includes all of the entry and - * the padding that follows it to align the next ADS entry to an - * 8-byte boundary. However, to be safe, we'll accept the - * length field as long as it's not less than the un-padded - * total length and not more than the padded total length. */ - if (length < length_no_padding || length > total_length) { - ERROR("Stream entry has unexpected length " - "field (length field = %"PRIu64", " - "unpadded total length = %"PRIu64", " - "padded total length = %"PRIu64")", - length, length_no_padding, total_length); - ret = WIMLIB_ERR_INVALID_DENTRY; - goto out_free_ads_entries; - } + /* Read the rest of the fixed-length data. */ - if (cur_entry->stream_name_nbytes) { - cur_entry->stream_name = MALLOC(cur_entry->stream_name_nbytes + 2); - if (!cur_entry->stream_name) { - ret = WIMLIB_ERR_NOMEM; - goto out_free_ads_entries; - } - get_bytes(p, cur_entry->stream_name_nbytes, - cur_entry->stream_name); - cur_entry->stream_name[cur_entry->stream_name_nbytes / 2] = 0; - replace_forbidden_characters(cur_entry->stream_name); + copy_hash(strm->_stream_hash, disk_strm->hash); + name_nbytes = le16_to_cpu(disk_strm->name_nbytes); + + /* If stream_name_nbytes != 0, the stream is named. */ + if (name_nbytes != 0) { + /* The name is encoded in UTF16-LE, which uses 2-byte + * coding units, so the length of the name had better be + * an even number of bytes. */ + if (name_nbytes & 1) + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; + + /* Add the length of the stream name to get the length + * we actually need to read. Make sure this isn't more + * than the specified length of the entry. */ + if (sizeof(struct wim_extra_stream_entry_on_disk) + + name_nbytes > length) + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; + + strm->stream_name = utf16le_dupz(disk_strm->name, + name_nbytes); + if (!strm->stream_name) + return WIMLIB_ERR_NOMEM; + } else { + strm->stream_name = (utf16lechar *)NO_STREAM_NAME; } - /* It's expected that the size of every ADS entry is a multiple - * of 8. However, to be safe, I'm allowing the possibility of - * an ADS entry at the very end of the metadata resource ending - * un-aligned. So although we still need to increment the input - * pointer by @total_length to reach the next ADS entry, it's - * possible that less than @total_length is actually remaining - * in the metadata resource. We should set the remaining size to - * 0 bytes if this happens. */ - p = p_save + total_length; - if (remaining_size < total_length) - remaining_size = 0; - else - remaining_size -= total_length; + + strm->stream_type = STREAM_TYPE_UNKNOWN; + + p += length; } - inode->i_ads_entries = ads_entries; -#ifdef WITH_FUSE - inode->i_next_stream_id = inode->i_num_ads + 1; -#endif + + inode->i_next_stream_id = inode->i_num_streams; + + /* Now, assign a type to each stream. Unfortunately this requires + * various hacks because stream types aren't explicitly provided in the + * WIM on-disk format. */ + + if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) + assign_stream_types_encrypted(inode); + else + assign_stream_types_unencrypted(inode); + + *offset_p += p - orig_p; return 0; -out_free_ads_entries: - for (u16 i = 0; i < num_ads; i++) - destroy_ads_entry(&ads_entries[i]); - FREE(ads_entries); - return ret; } -/* - * Reads a WIM directory entry, including all alternate data stream entries that - * follow it, from the WIM image's metadata resource. - * - * @metadata_resource: Buffer containing the uncompressed metadata resource. - * @metadata_resource_len: Length of the metadata resource. - * @offset: Offset of this directory entry in the metadata resource. - * @dentry: A `struct wim_dentry' that will be filled in by this function. - * - * Return 0 on success or nonzero on failure. On failure, @dentry will have - * been modified, but it will not be left with pointers to any allocated - * buffers. On success, the dentry->length field must be examined. If zero, - * this was a special "end of directory" dentry and not a real dentry. If - * nonzero, this was a real dentry. - */ -int -read_dentry(const u8 metadata_resource[], u64 metadata_resource_len, - u64 offset, struct wim_dentry *dentry) +/* Read a dentry, including all extra stream entries that follow it, from an + * uncompressed metadata resource buffer. */ +static int +read_dentry(const u8 * restrict buf, size_t buf_len, + u64 *offset_p, struct wim_dentry **dentry_ret) { + u64 offset = *offset_p; + u64 length; const u8 *p; - u64 calculated_size; - utf16lechar *file_name = NULL; - utf16lechar *short_name = NULL; + const struct wim_dentry_on_disk *disk_dentry; + struct wim_dentry *dentry; + struct wim_inode *inode; u16 short_name_nbytes; - u16 file_name_nbytes; + u16 name_nbytes; + u64 calculated_size; int ret; - struct wim_inode *inode = NULL; - - dentry_common_init(dentry); - /*Make sure the dentry really fits into the metadata resource.*/ - if (offset + 8 > metadata_resource_len || offset + 8 < offset) { - ERROR("Directory entry starting at %"PRIu64" ends past the " - "end of the metadata resource (size %"PRIu64")", - offset, metadata_resource_len); - return WIMLIB_ERR_INVALID_DENTRY; - } + BUILD_BUG_ON(sizeof(struct wim_dentry_on_disk) != WIM_DENTRY_DISK_SIZE); /* Before reading the whole dentry, we need to read just the length. * This is because a dentry of length 8 (that is, just the length field) * terminates the list of sibling directory entries. */ - p = get_u64(&metadata_resource[offset], &dentry->length); + /* Check for buffer overrun. */ + if (unlikely(offset + sizeof(u64) > buf_len || + offset + sizeof(u64) < offset)) + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; - /* A zero length field (really a length of 8, since that's how big the - * directory entry is...) indicates that this is the end of directory - * dentry. We do not read it into memory as an actual dentry, so just - * return successfully in that case. */ - if (dentry->length == 0) - return 0; + /* Get pointer to the dentry data. */ + p = &buf[offset]; + disk_dentry = (const struct wim_dentry_on_disk*)p; - /* If the dentry does not overflow the metadata resource buffer and is - * not too short, read the rest of it (excluding the alternate data - * streams, but including the file name and short name variable-length - * fields) into memory. */ - if (offset + dentry->length >= metadata_resource_len - || offset + dentry->length < offset) - { - ERROR("Directory entry at offset %"PRIu64" and with size " - "%"PRIu64" ends past the end of the metadata resource " - "(size %"PRIu64")", - offset, dentry->length, metadata_resource_len); - return WIMLIB_ERR_INVALID_DENTRY; - } + /* Get dentry length. */ + length = ALIGN(le64_to_cpu(disk_dentry->length), 8); - if (dentry->length < WIM_DENTRY_DISK_SIZE) { - ERROR("Directory entry has invalid length of %"PRIu64" bytes", - dentry->length); - return WIMLIB_ERR_INVALID_DENTRY; + /* Check for end-of-directory. */ + if (length <= 8) { + *dentry_ret = NULL; + return 0; } - inode = new_timeless_inode(); - if (!inode) - return WIMLIB_ERR_NOMEM; + /* Validate dentry length. */ + if (unlikely(length < sizeof(struct wim_dentry_on_disk))) + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; - p = get_u32(p, &inode->i_attributes); - p = get_u32(p, (u32*)&inode->i_security_id); - p = get_u64(p, &dentry->subdir_offset); + /* Check for buffer overrun. */ + if (unlikely(offset + length > buf_len || + offset + length < offset)) + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; - p = get_u64(p, &inode->i_unused_1); - p = get_u64(p, &inode->i_unused_2); + /* Allocate new dentry structure, along with a preliminary inode. */ + ret = new_dentry_with_new_inode(NULL, false, &dentry); + if (ret) + return ret; - p = get_u64(p, &inode->i_creation_time); - p = get_u64(p, &inode->i_last_access_time); - p = get_u64(p, &inode->i_last_write_time); + inode = dentry->d_inode; - p = get_bytes(p, SHA1_HASH_SIZE, inode->i_hash); + /* Read more fields: some into the dentry, and some into the inode. */ + inode->i_attributes = le32_to_cpu(disk_dentry->attributes); + inode->i_security_id = le32_to_cpu(disk_dentry->security_id); + dentry->d_subdir_offset = le64_to_cpu(disk_dentry->subdir_offset); + inode->i_creation_time = le64_to_cpu(disk_dentry->creation_time); + inode->i_last_access_time = le64_to_cpu(disk_dentry->last_access_time); + inode->i_last_write_time = le64_to_cpu(disk_dentry->last_write_time); + inode->i_unknown_0x54 = le32_to_cpu(disk_dentry->unknown_0x54); - /* I don't know what's going on here. It seems like M$ screwed up the - * reparse points, then put the fields in the same place and didn't - * document it. */ if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { - p = get_u32(p, &inode->i_rp_unknown_1); - p = get_u32(p, &inode->i_reparse_tag); - p = get_u16(p, &inode->i_rp_unknown_2); - p = get_u16(p, &inode->i_not_rpfixed); + inode->i_reparse_tag = le32_to_cpu(disk_dentry->reparse.reparse_tag); + inode->i_rp_reserved = le16_to_cpu(disk_dentry->reparse.rp_reserved); + inode->i_rp_flags = le16_to_cpu(disk_dentry->reparse.rp_flags); + /* Leave inode->i_ino at 0. Note: this means that WIM cannot + * represent multiple hard links to a reparse point file. */ } else { - p = get_u32(p, &inode->i_rp_unknown_1); - p = get_u64(p, &inode->i_ino); + inode->i_ino = le64_to_cpu(disk_dentry->nonreparse.hard_link_group_id); } - /* By the way, the reparse_reserved field does not actually exist (at - * least when the file is not a reparse point) */ + /* Now onto reading the names. There are two of them: the (long) file + * name, and the short name. */ - p = get_u16(p, &inode->i_num_ads); + short_name_nbytes = le16_to_cpu(disk_dentry->short_name_nbytes); + name_nbytes = le16_to_cpu(disk_dentry->name_nbytes); - p = get_u16(p, &short_name_nbytes); - p = get_u16(p, &file_name_nbytes); + if (unlikely((short_name_nbytes & 1) | (name_nbytes & 1))) { + ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; + goto err_free_dentry; + } /* We now know the length of the file name and short name. Make sure - * the length of the dentry is large enough to actually hold them. - * - * The calculated length here is unaligned to allow for the possibility - * that the dentry->length names an unaligned length, although this - * would be unexpected. */ - calculated_size = __dentry_correct_length_unaligned(file_name_nbytes, - short_name_nbytes); - - if (dentry->length < calculated_size) { - ERROR("Unexpected end of directory entry! (Expected " - "at least %"PRIu64" bytes, got %"PRIu64" bytes. " - "short_name_nbytes = %hu, file_name_nbytes = %hu)", - calculated_size, dentry->length, - short_name_nbytes, file_name_nbytes); - ret = WIMLIB_ERR_INVALID_DENTRY; - goto out_free_inode; + * the length of the dentry is large enough to actually hold them. */ + calculated_size = dentry_min_len_with_names(name_nbytes, + short_name_nbytes); + + if (unlikely(length < calculated_size)) { + ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; + goto err_free_dentry; } + /* Advance p to point past the base dentry, to the first name. */ + p += sizeof(struct wim_dentry_on_disk); + /* Read the filename if present. Note: if the filename is empty, there - * is no null terminator following it. */ - if (file_name_nbytes) { - file_name = MALLOC(file_name_nbytes + 2); - if (!file_name) { - ERROR("Failed to allocate %d bytes for dentry file name", - file_name_nbytes + 2); + * is no null terminator following it. */ + if (name_nbytes) { + dentry->d_name = utf16le_dupz(p, name_nbytes); + if (unlikely(!dentry->d_name)) { ret = WIMLIB_ERR_NOMEM; - goto out_free_inode; + goto err_free_dentry; } - p = get_bytes(p, file_name_nbytes + 2, file_name); - if (file_name[file_name_nbytes / 2] != 0) { - file_name[file_name_nbytes / 2] = 0; - WARNING("File name in WIM dentry \"%"WS"\" is not " - "null-terminated!", file_name); - } - replace_forbidden_characters(file_name); - } - - /* Align the calculated size */ - calculated_size = (calculated_size + 7) & ~7; - - if (dentry->length > calculated_size) { - /* Weird; the dentry says it's longer than it should be. Note - * that the length field does NOT include the size of the - * alternate stream entries. */ - - /* Strangely, some directory entries inexplicably have a little - * over 70 bytes of extra data. The exact amount of data seems - * to be 72 bytes, but it is aligned on the next 8-byte - * boundary. It does NOT seem to be alternate data stream - * entries. Here's an example of the aligned data: - * - * 01000000 40000000 6c786bba c58ede11 b0bb0026 1870892a b6adb76f - * e63a3e46 8fca8653 0d2effa1 6c786bba c58ede11 b0bb0026 1870892a - * 00000000 00000000 00000000 00000000 - * - * Here's one interpretation of how the data is laid out. - * - * struct unknown { - * u32 field1; (always 0x00000001) - * u32 field2; (always 0x40000000) - * u8 data[48]; (???) - * u64 reserved1; (always 0) - * u64 reserved2; (always 0) - * };*/ - /*DEBUG("Dentry for file or directory `%"WS"' has %"PRIu64" "*/ - /*"extra bytes of data", file_name,*/ - /*dentry->length - calculated_size);*/ + dentry->d_name_nbytes = name_nbytes; + p += (u32)name_nbytes + 2; } /* Read the short filename if present. Note: if there is no short * filename, there is no null terminator following it. */ if (short_name_nbytes) { - short_name = MALLOC(short_name_nbytes + 2); - if (!short_name) { - ERROR("Failed to allocate %d bytes for dentry short name", - short_name_nbytes + 2); + dentry->d_short_name = utf16le_dupz(p, short_name_nbytes); + if (unlikely(!dentry->d_short_name)) { ret = WIMLIB_ERR_NOMEM; - goto out_free_file_name; + goto err_free_dentry; + } + dentry->d_short_name_nbytes = short_name_nbytes; + p += (u32)short_name_nbytes + 2; + } + + /* Read extra data at end of dentry (but before extra stream entries). + * This may contain tagged metadata items. */ + ret = read_extra_data(p, &buf[offset + length], inode); + if (ret) + goto err_free_dentry; + + offset += length; + + /* Set up the inode's collection of streams. */ + ret = setup_inode_streams(&buf[offset], + &buf[buf_len], + inode, + le16_to_cpu(disk_dentry->num_extra_streams), + disk_dentry->default_hash, + &offset); + if (ret) + goto err_free_dentry; + + *offset_p = offset; /* Sets offset of next dentry in directory */ + *dentry_ret = dentry; + return 0; + +err_free_dentry: + free_dentry(dentry); + return ret; +} + +/* Is the dentry named "." or ".." ? */ +static bool +dentry_is_dot_or_dotdot(const struct wim_dentry *dentry) +{ + if (dentry->d_name_nbytes <= 4) { + if (dentry->d_name_nbytes == 4) { + if (dentry->d_name[0] == cpu_to_le16('.') && + dentry->d_name[1] == cpu_to_le16('.')) + return true; + } else if (dentry->d_name_nbytes == 2) { + if (dentry->d_name[0] == cpu_to_le16('.')) + return true; } - p = get_bytes(p, short_name_nbytes + 2, short_name); - if (short_name[short_name_nbytes / 2] != 0) { - short_name[short_name_nbytes / 2] = 0; - WARNING("Short name in WIM dentry \"%"WS"\" is not " - "null-terminated!", file_name); + } + return false; +} + +static int +read_dentry_tree_recursive(const u8 * restrict buf, size_t buf_len, + struct wim_dentry * restrict dir) +{ + u64 cur_offset = dir->d_subdir_offset; + + /* Check for cyclic directory structure, which would cause infinite + * recursion if not handled. */ + for (struct wim_dentry *d = dir->d_parent; + !dentry_is_root(d); d = d->d_parent) + { + if (unlikely(d->d_subdir_offset == cur_offset)) { + ERROR("Cyclic directory structure detected: children " + "of \"%"TS"\" coincide with children of \"%"TS"\"", + dentry_full_path(dir), dentry_full_path(d)); + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; } - replace_forbidden_characters(short_name); } - /* - * Read the alternate data streams, if present. dentry->num_ads tells - * us how many they are, and they will directly follow the dentry - * on-disk. - * - * Note that each alternate data stream entry begins on an 8-byte - * aligned boundary, and the alternate data stream entries are NOT - * included in the dentry->length field for some reason. - */ - if (inode->i_num_ads != 0) { + for (;;) { + struct wim_dentry *child; + struct wim_dentry *duplicate; + int ret; - /* Trying different lengths is just a hack to make sure we have - * a chance of reading the ADS entries correctly despite the - * poor documentation. */ + /* Read next child of @dir. */ + ret = read_dentry(buf, buf_len, &cur_offset, &child); + if (ret) + return ret; + + /* Check for end of directory. */ + if (child == NULL) + return 0; + + /* All dentries except the root should be named. */ + if (unlikely(!dentry_has_long_name(child))) { + WARNING("Ignoring unnamed dentry in " + "directory \"%"TS"\"", dentry_full_path(dir)); + free_dentry(child); + continue; + } - if (calculated_size != dentry->length) { - WARNING("Trying calculated dentry length (%"PRIu64") " - "instead of dentry->length field (%"PRIu64") " - "to read ADS entries", - calculated_size, dentry->length); + /* Don't allow files named "." or "..". */ + if (unlikely(dentry_is_dot_or_dotdot(child))) { + WARNING("Ignoring file named \".\" or \"..\"; " + "potentially malicious archive!!!"); + free_dentry(child); + continue; } - u64 lengths_to_try[3] = {calculated_size, - (dentry->length + 7) & ~7, - dentry->length}; - ret = WIMLIB_ERR_INVALID_DENTRY; - for (size_t i = 0; i < ARRAY_LEN(lengths_to_try); i++) { - if (lengths_to_try[i] > metadata_resource_len - offset) - continue; - ret = read_ads_entries(&metadata_resource[offset + lengths_to_try[i]], - inode, - metadata_resource_len - offset - lengths_to_try[i]); - if (ret == 0) - goto out; + + /* Link the child into the directory. */ + duplicate = dentry_add_child(dir, child); + if (unlikely(duplicate)) { + /* We already found a dentry with this same + * case-sensitive long name. Only keep the first one. + */ + WARNING("Ignoring duplicate file \"%"TS"\" " + "(the WIM image already contains a file " + "at that path with the exact same name)", + dentry_full_path(duplicate)); + free_dentry(child); + continue; + } + + /* If this child is a directory that itself has children, call + * this procedure recursively. */ + if (child->d_subdir_offset != 0) { + if (likely(dentry_is_directory(child))) { + ret = read_dentry_tree_recursive(buf, + buf_len, + child); + if (ret) + return ret; + } else { + WARNING("Ignoring children of " + "non-directory file \"%"TS"\"", + dentry_full_path(child)); + } } - ERROR("Failed to read alternate data stream " - "entries of WIM dentry \"%"WS"\"", file_name); - goto out_free_short_name; } -out: - /* We've read all the data for this dentry. Set the names and their - * lengths, and we've done. */ - dentry->d_inode = inode; - dentry->file_name = file_name; - dentry->short_name = short_name; - dentry->file_name_nbytes = file_name_nbytes; - dentry->short_name_nbytes = short_name_nbytes; - return 0; -out_free_short_name: - FREE(short_name); -out_free_file_name: - FREE(file_name); -out_free_inode: - free_inode(inode); - return ret; } -/* Reads the children of a dentry, and all their children, ..., etc. from the - * metadata resource and into the dentry tree. +/* + * Read a tree of dentries from a WIM metadata resource. + * + * @buf: + * Buffer containing an uncompressed WIM metadata resource. * - * @metadata_resource: An array that contains the uncompressed metadata - * resource for the WIM file. + * @buf_len: + * Length of the uncompressed metadata resource, in bytes. * - * @metadata_resource_len: The length of the uncompressed metadata resource, in - * bytes. + * @root_offset + * Offset in the metadata resource of the root of the dentry tree. * - * @dentry: A pointer to a `struct wim_dentry' that is the root of the directory - * tree and has already been read from the metadata resource. It - * does not need to be the real root because this procedure is - * called recursively. + * @root_ret: + * On success, either NULL or a pointer to the root dentry is written to + * this location. The former case only occurs in the unexpected case that + * the tree began with an end-of-directory entry. * - * Returns zero on success; nonzero on failure. + * Return values: + * WIMLIB_ERR_SUCCESS (0) + * WIMLIB_ERR_INVALID_METADATA_RESOURCE + * WIMLIB_ERR_NOMEM */ int -read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, - struct wim_dentry *dentry) +read_dentry_tree(const u8 *buf, size_t buf_len, + u64 root_offset, struct wim_dentry **root_ret) { - u64 cur_offset = dentry->subdir_offset; - struct wim_dentry *child; - struct wim_dentry cur_child; int ret; + struct wim_dentry *root; - /* - * If @dentry has no child dentries, nothing more needs to be done for - * this branch. This is the case for regular files, symbolic links, and - * *possibly* empty directories (although an empty directory may also - * have one child dentry that is the special end-of-directory dentry) - */ - if (cur_offset == 0) - return 0; + DEBUG("Reading dentry tree (root_offset=%"PRIu64")", root_offset); - /* Find and read all the children of @dentry. */ - while (1) { - - /* Read next child of @dentry into @cur_child. */ - ret = read_dentry(metadata_resource, metadata_resource_len, - cur_offset, &cur_child); - if (ret != 0) - break; - - /* Check for end of directory. */ - if (cur_child.length == 0) - break; - - /* Not end of directory. Allocate this child permanently and - * link it to the parent and previous child. */ - child = MALLOC(sizeof(struct wim_dentry)); - if (!child) { - ERROR("Failed to allocate %zu bytes for new dentry", - sizeof(struct wim_dentry)); - ret = WIMLIB_ERR_NOMEM; - break; + ret = read_dentry(buf, buf_len, &root_offset, &root); + if (ret) + return ret; + + if (likely(root != NULL)) { + if (unlikely(dentry_has_long_name(root) || + dentry_has_short_name(root))) + { + WARNING("The root directory has a nonempty name; " + "removing it."); + dentry_set_name(root, NULL); } - memcpy(child, &cur_child, sizeof(struct wim_dentry)); - dentry_add_child(dentry, child); - inode_add_dentry(child, child->d_inode); - - /* If there are children of this child, call this procedure - * recursively. */ - if (child->subdir_offset != 0) { - ret = read_dentry_tree(metadata_resource, - metadata_resource_len, child); - if (ret != 0) - break; + + if (unlikely(!dentry_is_directory(root))) { + ERROR("The root of the WIM image is not a directory!"); + ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; + goto err_free_dentry_tree; } - /* Advance to the offset of the next child. Note: We need to - * advance by the TOTAL length of the dentry, not by the length - * child->length, which although it does take into account the - * padding, it DOES NOT take into account alternate stream - * entries. */ - cur_offset += dentry_total_length(child); + if (likely(root->d_subdir_offset != 0)) { + ret = read_dentry_tree_recursive(buf, buf_len, root); + if (ret) + goto err_free_dentry_tree; + } + } else { + WARNING("The metadata resource has no directory entries; " + "treating as an empty image."); } + *root_ret = root; + return 0; + +err_free_dentry_tree: + free_dentry_tree(root, NULL); return ret; } +static u8 * +write_extra_stream_entry(u8 * restrict p, const utf16lechar * restrict name, + const u8 * restrict hash) +{ + struct wim_extra_stream_entry_on_disk *disk_strm = + (struct wim_extra_stream_entry_on_disk *)p; + u8 *orig_p = p; + size_t name_nbytes; + + if (name == NO_STREAM_NAME) + name_nbytes = 0; + else + name_nbytes = utf16le_len_bytes(name); + + disk_strm->reserved = 0; + copy_hash(disk_strm->hash, hash); + disk_strm->name_nbytes = cpu_to_le16(name_nbytes); + p += sizeof(struct wim_extra_stream_entry_on_disk); + if (name_nbytes != 0) + p = mempcpy(p, name, name_nbytes + 2); + /* Align to 8-byte boundary */ + while ((uintptr_t)p & 7) + *p++ = 0; + disk_strm->length = cpu_to_le64(p - orig_p); + return p; +} + /* - * Writes a WIM dentry to an output buffer. + * Write a WIM dentry to an output buffer. + * + * This includes any extra stream entries that may follow the dentry itself. + * + * @dentry: + * The dentry to write. * - * @dentry: The dentry structure. - * @p: The memory location to write the data to. - * @return: Pointer to the byte after the last byte we wrote as part of the - * dentry. + * @p: + * The memory location to which to write the data. + * + * Returns a pointer to the byte following the last written. */ static u8 * -write_dentry(const struct wim_dentry *dentry, u8 *p) +write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p) { - u8 *orig_p = p; - const u8 *hash; - const struct wim_inode *inode = dentry->d_inode; + const struct wim_inode *inode; + struct wim_dentry_on_disk *disk_dentry; + const u8 *orig_p; + + wimlib_assert(((uintptr_t)p & 7) == 0); /* 8 byte aligned */ + orig_p = p; - /* We calculate the correct length of the dentry ourselves because the - * dentry->length field may been set to an unexpected value from when we - * read the dentry in (for example, there may have been unknown data - * appended to the end of the dentry...) */ - u64 length = dentry_correct_length(dentry); - - p = put_u64(p, length); - p = put_u32(p, inode->i_attributes); - p = put_u32(p, inode->i_security_id); - p = put_u64(p, dentry->subdir_offset); - p = put_u64(p, inode->i_unused_1); - p = put_u64(p, inode->i_unused_2); - p = put_u64(p, inode->i_creation_time); - p = put_u64(p, inode->i_last_access_time); - p = put_u64(p, inode->i_last_write_time); - hash = inode_stream_hash(inode, 0); - p = put_bytes(p, SHA1_HASH_SIZE, hash); + inode = dentry->d_inode; + disk_dentry = (struct wim_dentry_on_disk*)p; + + disk_dentry->attributes = cpu_to_le32(inode->i_attributes); + disk_dentry->security_id = cpu_to_le32(inode->i_security_id); + disk_dentry->subdir_offset = cpu_to_le64(dentry->d_subdir_offset); + + disk_dentry->unused_1 = cpu_to_le64(0); + disk_dentry->unused_2 = cpu_to_le64(0); + + disk_dentry->creation_time = cpu_to_le64(inode->i_creation_time); + disk_dentry->last_access_time = cpu_to_le64(inode->i_last_access_time); + disk_dentry->last_write_time = cpu_to_le64(inode->i_last_write_time); + disk_dentry->unknown_0x54 = cpu_to_le32(inode->i_unknown_0x54); if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { - p = put_u32(p, inode->i_rp_unknown_1); - p = put_u32(p, inode->i_reparse_tag); - p = put_u16(p, inode->i_rp_unknown_2); - p = put_u16(p, inode->i_not_rpfixed); + disk_dentry->reparse.reparse_tag = cpu_to_le32(inode->i_reparse_tag); + disk_dentry->reparse.rp_reserved = cpu_to_le16(inode->i_rp_reserved); + disk_dentry->reparse.rp_flags = cpu_to_le16(inode->i_rp_flags); } else { - u64 link_group_id; - p = put_u32(p, inode->i_rp_unknown_1); - if (inode->i_nlink == 1) - link_group_id = 0; - else - link_group_id = inode->i_ino; - p = put_u64(p, link_group_id); - } - p = put_u16(p, inode->i_num_ads); - p = put_u16(p, dentry->short_name_nbytes); - p = put_u16(p, dentry->file_name_nbytes); - if (dentry_has_long_name(dentry)) { - p = put_bytes(p, dentry->file_name_nbytes + 2, - dentry->file_name); - } - if (dentry_has_short_name(dentry)) { - p = put_bytes(p, dentry->short_name_nbytes + 2, - dentry->short_name); + disk_dentry->nonreparse.hard_link_group_id = + cpu_to_le64((inode->i_nlink == 1) ? 0 : inode->i_ino); } + disk_dentry->short_name_nbytes = cpu_to_le16(dentry->d_short_name_nbytes); + disk_dentry->name_nbytes = cpu_to_le16(dentry->d_name_nbytes); + p += sizeof(struct wim_dentry_on_disk); + + wimlib_assert(dentry_is_root(dentry) != dentry_has_long_name(dentry)); + + if (dentry_has_long_name(dentry)) + p = mempcpy(p, dentry->d_name, (u32)dentry->d_name_nbytes + 2); + + if (dentry_has_short_name(dentry)) + p = mempcpy(p, dentry->d_short_name, (u32)dentry->d_short_name_nbytes + 2); + /* Align to 8-byte boundary */ - wimlib_assert(length >= (p - orig_p) && length - (p - orig_p) <= 7); - p = put_zeroes(p, length - (p - orig_p)); - - /* Write the alternate data streams, if there are any. Please see - * read_ads_entries() for comments about the format of the on-disk - * alternate data stream entries. */ - for (u16 i = 0; i < inode->i_num_ads; i++) { - p = put_u64(p, ads_entry_total_length(&inode->i_ads_entries[i])); - p = put_u64(p, inode->i_ads_entries[i].unused); - hash = inode_stream_hash(inode, i + 1); - p = put_bytes(p, SHA1_HASH_SIZE, hash); - p = put_u16(p, inode->i_ads_entries[i].stream_name_nbytes); - if (inode->i_ads_entries[i].stream_name_nbytes) { - p = put_bytes(p, - inode->i_ads_entries[i].stream_name_nbytes + 2, - inode->i_ads_entries[i].stream_name); + while ((uintptr_t)p & 7) + *p++ = 0; + + if (inode->i_extra_size) { + /* Extra tagged items --- not usually present. */ + p = mempcpy(p, inode->i_extra, inode->i_extra_size); + + /* Align to 8-byte boundary */ + while ((uintptr_t)p & 7) + *p++ = 0; + } + + disk_dentry->length = cpu_to_le64(p - orig_p); + + /* Streams */ + + if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) { + const struct wim_inode_stream *efs_strm; + const u8 *efs_hash; + + efs_strm = inode_get_unnamed_stream(inode, STREAM_TYPE_EFSRPC_RAW_DATA); + efs_hash = efs_strm ? stream_hash(efs_strm) : zero_hash; + copy_hash(disk_dentry->default_hash, efs_hash); + disk_dentry->num_extra_streams = cpu_to_le16(0); + } else { + /* + * Extra stream entries: + * + * - Use one extra stream entry for each named data stream + * - Use one extra stream entry for the unnamed data stream when there is either: + * - a reparse point stream + * - at least one named data stream (for Windows PE bug workaround) + * - Use one extra stream entry for the reparse point stream if there is one + */ + bool have_named_data_stream = false; + bool have_reparse_point_stream = false; + const u8 *unnamed_data_stream_hash = zero_hash; + const u8 *reparse_point_hash; + for (unsigned i = 0; i < inode->i_num_streams; i++) { + const struct wim_inode_stream *strm = &inode->i_streams[i]; + if (strm->stream_type == STREAM_TYPE_DATA) { + if (stream_is_named(strm)) + have_named_data_stream = true; + else + unnamed_data_stream_hash = stream_hash(strm); + } else if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) { + have_reparse_point_stream = true; + reparse_point_hash = stream_hash(strm); + } + } + + if (unlikely(have_reparse_point_stream || have_named_data_stream)) { + + unsigned num_extra_streams = 0; + + copy_hash(disk_dentry->default_hash, zero_hash); + + if (have_reparse_point_stream) { + p = write_extra_stream_entry(p, NO_STREAM_NAME, + reparse_point_hash); + num_extra_streams++; + } + + p = write_extra_stream_entry(p, NO_STREAM_NAME, + unnamed_data_stream_hash); + num_extra_streams++; + + for (unsigned i = 0; i < inode->i_num_streams; i++) { + const struct wim_inode_stream *strm = &inode->i_streams[i]; + if (stream_is_named_data_stream(strm)) { + p = write_extra_stream_entry(p, strm->stream_name, + stream_hash(strm)); + num_extra_streams++; + } + } + wimlib_assert(num_extra_streams <= 0xFFFF); + + disk_dentry->num_extra_streams = cpu_to_le16(num_extra_streams); + } else { + copy_hash(disk_dentry->default_hash, unnamed_data_stream_hash); + disk_dentry->num_extra_streams = cpu_to_le16(0); } - p = put_zeroes(p, (8 - (p - orig_p) % 8) % 8); } - wimlib_assert(p - orig_p == __dentry_total_length(dentry, length)); + return p; } static int -write_dentry_cb(struct wim_dentry *dentry, void *_p) +write_dir_dentries(struct wim_dentry *dir, void *_pp) { - u8 **p = _p; - *p = write_dentry(dentry, *p); - return 0; -} + if (dir->d_subdir_offset != 0) { + u8 **pp = _pp; + u8 *p = *pp; + struct wim_dentry *child; -static u8 * -write_dentry_tree_recursive(const struct wim_dentry *parent, u8 *p); + /* write child dentries */ + for_dentry_child(child, dir) + p = write_dentry(child, p); -static int -write_dentry_tree_recursive_cb(struct wim_dentry *dentry, void *_p) -{ - u8 **p = _p; - *p = write_dentry_tree_recursive(dentry, *p); + /* write end of directory entry */ + *(u64*)p = 0; + p += 8; + *pp = p; + } return 0; } -/* Recursive function that writes a dentry tree rooted at @parent, not including - * @parent itself, which has already been written. */ -static u8 * -write_dentry_tree_recursive(const struct wim_dentry *parent, u8 *p) -{ - /* Nothing to do if this dentry has no children. */ - if (parent->subdir_offset == 0) - return p; - - /* Write child dentries and end-of-directory entry. - * - * Note: we need to write all of this dentry's children before - * recursively writing the directory trees rooted at each of the child - * dentries, since the on-disk dentries for a dentry's children are - * always located at consecutive positions in the metadata resource! */ - for_dentry_child(parent, write_dentry_cb, &p); - - /* write end of directory entry */ - p = put_u64(p, 0); - - /* Recurse on children. */ - for_dentry_child(parent, write_dentry_tree_recursive_cb, &p); - return p; -} - -/* Writes a directory tree to the metadata resource. +/* + * Write a directory tree to the metadata resource. + * + * @root: + * The root of a dentry tree on which calculate_subdir_offsets() has been + * called. This cannot be NULL; if the dentry tree is empty, the caller is + * expected to first generate a dummy root directory. * - * @root: Root of the dentry tree. - * @p: Pointer to a buffer with enough space for the dentry tree. + * @p: + * Pointer to a buffer with enough space for the dentry tree. This size + * must have been obtained by calculate_subdir_offsets(). * - * Returns pointer to the byte after the last byte we wrote. + * Returns a pointer to the byte following the last written. */ u8 * -write_dentry_tree(const struct wim_dentry *root, u8 *p) +write_dentry_tree(struct wim_dentry *root, u8 *p) { DEBUG("Writing dentry tree."); - wimlib_assert(dentry_is_root(root)); - /* If we're the root dentry, we have no parent that already - * wrote us, so we need to write ourselves. */ + wimlib_assert(root != NULL); + + /* write root dentry and end-of-directory entry following it */ p = write_dentry(root, p); + *(u64*)p = 0; + p += 8; - /* Write end of directory entry after the root dentry just to be safe; - * however the root dentry obviously cannot have any siblings. */ - p = put_u64(p, 0); + /* write the rest of the dentry tree */ + for_dentry_in_tree(root, write_dir_dentries, &p); - /* Recursively write the rest of the dentry tree. */ - return write_dentry_tree_recursive(root, p); + return p; }