X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Fdentry.c;h=0a898ff73943e3327b3f3b604c96630080fe9cad;hb=c73468ab9d94f48a801008ea4fb15de6880c44e8;hp=1c93792fd9ed0556f9d0914336c4b386dac2e795;hpb=bd25d812eaf391fea72f4a7970bc67fddf80ac6f;p=wimlib diff --git a/src/dentry.c b/src/dentry.c index 1c93792f..0a898ff7 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -1,232 +1,569 @@ /* * dentry.c * - * A dentry (directory entry) contains the metadata for a file. In the WIM file - * format, the dentries are stored in the "metadata resource" section right - * after the security data. Each image in the WIM file has its own metadata - * resource with its own security data and dentry tree. Dentries in different - * images may share file resources by referring to the same lookup table - * entries. + * In the WIM file format, the dentries are stored in the "metadata resource" + * section right after the security data. Each image in the WIM file has its + * own metadata resource with its own security data and dentry tree. Dentries + * in different images may share file resources by referring to the same lookup + * table entries. */ /* - * - * Copyright (C) 2010 Carl Thijssen - * Copyright (C) 2012 Eric Biggers + * Copyright (C) 2012, 2013 Eric Biggers * * This file is part of wimlib, a library for working with WIM files. * * wimlib is free software; you can redistribute it and/or modify it under the - * terms of the GNU Lesser General Public License as published by the Free - * Software Foundation; either version 2.1 of the License, or (at your option) - * any later version. + * terms of the GNU General Public License as published by the Free Software + * Foundation; either version 3 of the License, or (at your option) any later + * version. * * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR - * A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more - * details. + * A PARTICULAR PURPOSE. See the GNU General Public License for more details. * - * You should have received a copy of the GNU Lesser General Public License - * along with wimlib; if not, see http://www.gnu.org/licenses/. + * You should have received a copy of the GNU General Public License along with + * wimlib; if not, see http://www.gnu.org/licenses/. */ -#include "wimlib_internal.h" -#include "dentry.h" -#include "io.h" -#include "timestamp.h" -#include "lookup_table.h" -#include "sha1.h" -#include -#include - +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif -/* Transfers file attributes from a `stat' buffer to a struct dentry. */ -void stbuf_to_dentry(const struct stat *stbuf, struct dentry *dentry) +#include "wimlib.h" +#include "wimlib/case.h" +#include "wimlib/dentry.h" +#include "wimlib/encoding.h" +#include "wimlib/endianness.h" +#include "wimlib/error.h" +#include "wimlib/lookup_table.h" +#include "wimlib/metadata.h" +#include "wimlib/paths.h" +#include "wimlib/resource.h" +#include "wimlib/security.h" +#include "wimlib/sha1.h" +#include "wimlib/timestamp.h" + +#include + +/* On-disk format of a WIM dentry (directory entry), located in the metadata + * resource for a WIM image. */ +struct wim_dentry_on_disk { + + /* Length of this directory entry in bytes, not including any alternate + * data stream entries. Should be a multiple of 8 so that the following + * dentry or alternate data stream entry is aligned on an 8-byte + * boundary. (If not, wimlib will round it up.) It must be at least as + * long as the fixed-length fields of the dentry (WIM_DENTRY_DISK_SIZE), + * plus the lengths of the file name and/or short name if present. + * + * It is also possible for this field to be 0. This situation, which is + * undocumented, indicates the end of a list of sibling nodes in a + * directory. It also means the real length is 8, because the dentry + * included only the length field, but that takes up 8 bytes. */ + le64 length; + + /* Attributes of the file or directory. This is a bitwise OR of the + * FILE_ATTRIBUTE_* constants and should correspond to the value + * retrieved by GetFileAttributes() on Windows. */ + le32 attributes; + + /* A value that specifies the security descriptor for this file or + * directory. If -1, the file or directory has no security descriptor. + * Otherwise, it is a 0-based index into the WIM image's table of + * security descriptors (see: `struct wim_security_data') */ + sle32 security_id; + + /* Offset, in bytes, from the start of the uncompressed metadata + * resource of this directory's child directory entries, or 0 if this + * directory entry does not correspond to a directory or otherwise does + * not have any children. */ + le64 subdir_offset; + + /* Reserved fields */ + le64 unused_1; + le64 unused_2; + + + /* Creation time, last access time, and last write time, in + * 100-nanosecond intervals since 12:00 a.m UTC January 1, 1601. They + * should correspond to the times gotten by calling GetFileTime() on + * Windows. */ + le64 creation_time; + le64 last_access_time; + le64 last_write_time; + + /* Vaguely, the SHA-1 message digest ("hash") of the file's contents. + * More specifically, this is for the "unnamed data stream" rather than + * any "alternate data streams". This hash value is used to look up the + * corresponding entry in the WIM's stream lookup table to actually find + * the file contents within the WIM. + * + * If the file has no unnamed data stream (e.g. is a directory), then + * this field will be all zeroes. If the unnamed data stream is empty + * (i.e. an "empty file"), then this field is also expected to be all + * zeroes. (It will be if wimlib created the WIM image, at least; + * otherwise it can't be ruled out that the SHA-1 message digest of 0 + * bytes of data is given explicitly.) + * + * If the file has reparse data, then this field will instead specify + * the SHA-1 message digest of the reparse data. If it is somehow + * possible for a file to have both an unnamed data stream and reparse + * data, then this is not handled by wimlib. + * + * As a further special case, if this field is all zeroes but there is + * an alternate data stream entry with no name and a nonzero SHA-1 + * message digest field, then that hash must be used instead of this + * one. In fact, when named data streams are present, some versions of + * Windows PE contain a bug where they only look in the alternate data + * stream entries for the unnamed data stream, not here. + */ + u8 unnamed_stream_hash[SHA1_HASH_SIZE]; + + /* The format of the following data is not yet completely known and they + * do not correspond to Microsoft's documentation. + * + * If this directory entry is for a reparse point (has + * FILE_ATTRIBUTE_REPARSE_POINT set in the attributes field), then the + * version of the following fields containing the reparse tag is valid. + * Furthermore, the field notated as not_rpfixed, as far as I can tell, + * is supposed to be set to 1 if reparse point fixups (a.k.a. fixing the + * targets of absolute symbolic links) were *not* done, and otherwise 0. + * + * If this directory entry is not for a reparse point, then the version + * of the following fields containing the hard_link_group_id is valid. + * All MS says about this field is that "If this file is part of a hard + * link set, all the directory entries in the set will share the same + * value in this field.". However, more specifically I have observed + * the following: + * - If the file is part of a hard link set of size 1, then the + * hard_link_group_id should be set to either 0, which is treated + * specially as indicating "not hardlinked", or any unique value. + * - The specific nonzero values used to identity hard link sets do + * not matter, as long as they are unique. + * - However, due to bugs in Microsoft's software, it is actually NOT + * guaranteed that directory entries that share the same hard link + * group ID are actually hard linked to each either. We have to + * handle this by using special code to use distinguishing features + * (which is possible because some information about the underlying + * inode is repeated in each dentry) to split up these fake hard link + * groups into what they actually are supposed to be. + */ + union { + struct { + le32 rp_unknown_1; + le32 reparse_tag; + le16 rp_unknown_2; + le16 not_rpfixed; + } _packed_attribute reparse; + struct { + le32 rp_unknown_1; + le64 hard_link_group_id; + } _packed_attribute nonreparse; + }; + + /* Number of alternate data stream entries that directly follow this + * dentry on-disk. */ + le16 num_alternate_data_streams; + + /* Length of this file's UTF-16LE encoded short name (8.3 DOS-compatible + * name), if present, in bytes, excluding the null terminator. If this + * file has no short name, then this field should be 0. */ + le16 short_name_nbytes; + + /* Length of this file's UTF-16LE encoded "long" name, excluding the + * null terminator. If this file has no short name, then this field + * should be 0. It's expected that only the root dentry has this field + * set to 0. */ + le16 file_name_nbytes; + + /* Followed by variable length file name, in UTF16-LE, if + * file_name_nbytes != 0. Includes null terminator. */ + /*utf16lechar file_name[];*/ + + /* Followed by variable length short name, in UTF16-LE, if + * short_name_nbytes != 0. Includes null terminator. */ + /*utf16lechar short_name[];*/ +} _packed_attribute; + +/* Calculates the unaligned length, in bytes, of an on-disk WIM dentry that has + * a file name and short name that take the specified numbers of bytes. This + * excludes any alternate data stream entries that may follow the dentry. */ +static u64 +dentry_correct_length_unaligned(u16 file_name_nbytes, u16 short_name_nbytes) { - if (S_ISDIR(stbuf->st_mode)) - dentry->attributes = WIM_FILE_ATTRIBUTE_DIRECTORY; - else - dentry->attributes = WIM_FILE_ATTRIBUTE_NORMAL; + u64 length = sizeof(struct wim_dentry_on_disk); + if (file_name_nbytes) + length += file_name_nbytes + 2; + if (short_name_nbytes) + length += short_name_nbytes + 2; + return length; } -/* Transfers file attributes from a struct dentry to a `stat' buffer. */ -void dentry_to_stbuf(const struct dentry *dentry, struct stat *stbuf, - const struct lookup_table *table) +/* Calculates the unaligned length, in bytes, of an on-disk WIM dentry, based on + * the file name length and short name length. Note that dentry->length is + * ignored; also, this excludes any alternate data stream entries that may + * follow the dentry. */ +static u64 +dentry_correct_length_aligned(const struct wim_dentry *dentry) { - struct lookup_table_entry *lte; + u64 len; - if (dentry_is_directory(dentry)) - stbuf->st_mode = S_IFDIR | 0755; - else - stbuf->st_mode = S_IFREG | 0644; - - if (table) - lte = lookup_resource(table, dentry->hash); - else - lte = NULL; + len = dentry_correct_length_unaligned(dentry->file_name_nbytes, + dentry->short_name_nbytes); + return (len + 7) & ~7; +} - if (lte) { - stbuf->st_nlink = lte->refcnt; - stbuf->st_size = lte->resource_entry.original_size; - } else { - stbuf->st_nlink = 1; - stbuf->st_size = 0; +/* Sets the name of a WIM dentry from a multibyte string. + * Only use this on dentries not inserted into the tree. Use rename_wim_path() + * to do a real rename. */ +int +dentry_set_name(struct wim_dentry *dentry, const tchar *new_name) +{ + int ret; + ret = get_utf16le_string(new_name, &dentry->file_name, + &dentry->file_name_nbytes); + if (ret == 0) { + /* Clear the short name and recalculate the dentry length */ + if (dentry_has_short_name(dentry)) { + FREE(dentry->short_name); + dentry->short_name = NULL; + dentry->short_name_nbytes = 0; + } } - stbuf->st_uid = getuid(); - stbuf->st_gid = getgid(); - stbuf->st_atime = ms_timestamp_to_unix(dentry->last_access_time); - stbuf->st_mtime = ms_timestamp_to_unix(dentry->last_write_time); - stbuf->st_ctime = ms_timestamp_to_unix(dentry->creation_time); - stbuf->st_blocks = (stbuf->st_size + 511) / 512; + return ret; } -/* Makes all timestamp fields for the dentry be the current time. */ -void dentry_update_all_timestamps(struct dentry *dentry) +/* Returns the total length of a WIM alternate data stream entry on-disk, + * including the stream name, the null terminator, AND the padding after the + * entry to align the next ADS entry or dentry on an 8-byte boundary. */ +static u64 +ads_entry_total_length(const struct wim_ads_entry *entry) { - u64 now = get_timestamp(); - dentry->creation_time = now; - dentry->last_access_time = now; - dentry->last_write_time = now; + u64 len = sizeof(struct wim_ads_entry_on_disk); + if (entry->stream_name_nbytes) + len += entry->stream_name_nbytes + 2; + return (len + 7) & ~7; } -/* - * Calls a function on all directory entries in a directory tree. It is called - * on a parent before its children. +/* + * Determine whether to include a "dummy" stream when writing a WIM dentry: + * + * Some versions of Microsoft's WIM software (the boot driver(s) in WinPE 3.0, + * for example) contain a bug where they assume the first alternate data stream + * (ADS) entry of a dentry with a nonzero ADS count specifies the unnamed + * stream, even if it has a name and the unnamed stream is already specified in + * the hash field of the dentry itself. + * + * wimlib has to work around this behavior by carefully emulating the behavior + * of (most versions of) ImageX/WIMGAPI, which move the unnamed stream reference + * into the alternate stream entries whenever there are named data streams, even + * though there is already a field in the dentry itself for the unnamed stream + * reference, which then goes to waste. */ -int for_dentry_in_tree(struct dentry *root, - int (*visitor)(struct dentry*, void*), void *arg) +static inline bool +inode_needs_dummy_stream(const struct wim_inode *inode) { - int ret; - struct dentry *child; + return (inode->i_num_ads > 0 && + inode->i_num_ads < 0xffff && /* overflow check */ + inode->i_canonical_streams); /* assume the dentry is okay if it + already had an unnamed ADS entry + when it was read in */ +} - ret = visitor(root, arg); +/* Calculate the total number of bytes that will be consumed when a WIM dentry + * is written. This includes base dentry and name fields as well as all + * alternate data stream entries and alignment bytes. */ +u64 +dentry_out_total_length(const struct wim_dentry *dentry) +{ + u64 length = dentry_correct_length_aligned(dentry); + const struct wim_inode *inode = dentry->d_inode; - if (ret != 0) - return ret; + if (inode_needs_dummy_stream(inode)) + length += ads_entry_total_length(&(struct wim_ads_entry){}); - child = root->children; + for (u16 i = 0; i < inode->i_num_ads; i++) + length += ads_entry_total_length(&inode->i_ads_entries[i]); - if (!child) - return 0; + return length; +} + +/* Calculate the aligned, total length of a dentry, including all alternate data + * stream entries. Uses dentry->length. */ +static u64 +dentry_in_total_length(const struct wim_dentry *dentry) +{ + u64 length = dentry->length; + const struct wim_inode *inode = dentry->d_inode; + for (u16 i = 0; i < inode->i_num_ads; i++) + length += ads_entry_total_length(&inode->i_ads_entries[i]); + return (length + 7) & ~7; +} - do { - ret = for_dentry_in_tree(child, visitor, arg); +int +for_dentry_in_rbtree(struct rb_node *root, + int (*visitor)(struct wim_dentry *, void *), + void *arg) +{ + int ret; + struct rb_node *node = root; + LIST_HEAD(stack); + while (1) { + if (node) { + list_add(&rbnode_dentry(node)->tmp_list, &stack); + node = node->rb_left; + } else { + struct list_head *next; + struct wim_dentry *dentry; + + next = stack.next; + if (next == &stack) + return 0; + dentry = container_of(next, struct wim_dentry, tmp_list); + list_del(next); + ret = visitor(dentry, arg); + if (ret != 0) + return ret; + node = dentry->rb_node.rb_right; + } + } +} + +static int +for_dentry_tree_in_rbtree_depth(struct rb_node *node, + int (*visitor)(struct wim_dentry*, void*), + void *arg) +{ + int ret; + if (node) { + ret = for_dentry_tree_in_rbtree_depth(node->rb_left, + visitor, arg); + if (ret != 0) + return ret; + ret = for_dentry_tree_in_rbtree_depth(node->rb_right, + visitor, arg); + if (ret != 0) + return ret; + ret = for_dentry_in_tree_depth(rbnode_dentry(node), visitor, arg); if (ret != 0) return ret; - child = child->next; - } while (child != root->children); + } return 0; } -/* - * Like for_dentry_in_tree(), but the visitor function is always called on a - * dentry's children before on itself. - */ -int for_dentry_in_tree_depth(struct dentry *root, - int (*visitor)(struct dentry*, void*), void *arg) +static int +for_dentry_tree_in_rbtree(struct rb_node *node, + int (*visitor)(struct wim_dentry*, void*), + void *arg) { int ret; - struct dentry *child; - struct dentry *next; - - child = root->children; - if (child) { - do { - next = child->next; - ret = for_dentry_in_tree_depth(child, visitor, arg); - if (ret != 0) - return ret; - child = next; - } while (child != root->children); + if (node) { + ret = for_dentry_tree_in_rbtree(node->rb_left, visitor, arg); + if (ret) + return ret; + ret = for_dentry_in_tree(rbnode_dentry(node), visitor, arg); + if (ret) + return ret; + ret = for_dentry_tree_in_rbtree(node->rb_right, visitor, arg); + if (ret) + return ret; } - return visitor(root, arg); + return 0; } -/* - * Calculate the full path of @dentry, based on its parent's full path and on - * its UTF-8 file name. - */ -int calculate_dentry_full_path(struct dentry *dentry, void *ignore) +/* + * Iterate over all children of @dentry, calling the function @visitor, passing + * it a child dentry and the extra argument @arg. + * + * Note: this function iterates over ALL child dentries, even those with the + * same case-insensitive name. + * + * Note: this function clobbers the tmp_list field of the child dentries. */ +int +for_dentry_child(const struct wim_dentry *dentry, + int (*visitor)(struct wim_dentry *, void *), + void *arg) +{ + return for_dentry_in_rbtree(dentry->d_inode->i_children.rb_node, + visitor, + arg); +} + +/* Calls a function on all directory entries in a WIM dentry tree. Logically, + * this is a pre-order traversal (the function is called on a parent dentry + * before its children), but sibling dentries will be visited in order as well. + * */ +int +for_dentry_in_tree(struct wim_dentry *root, + int (*visitor)(struct wim_dentry*, void*), void *arg) +{ + int ret; + + if (root == NULL) + return 0; + ret = (*visitor)(root, arg); + if (ret) + return ret; + return for_dentry_tree_in_rbtree(root->d_inode->i_children.rb_node, + visitor, + arg); +} + +/* Like for_dentry_in_tree(), but the visitor function is always called on a + * dentry's children before on itself. */ +int +for_dentry_in_tree_depth(struct wim_dentry *root, + int (*visitor)(struct wim_dentry*, void*), void *arg) { - char *full_path; - u32 full_path_len; + int ret; + + if (root == NULL) + return 0; + ret = for_dentry_tree_in_rbtree_depth(root->d_inode->i_children.rb_node, + visitor, arg); + if (ret) + return ret; + return (*visitor)(root, arg); +} + +/* Calculate the full path of @dentry. The full path of its parent must have + * already been calculated, or it must be the root dentry. */ +int +calculate_dentry_full_path(struct wim_dentry *dentry) +{ + tchar *full_path; + u32 full_path_nbytes; + int ret; + + if (dentry->_full_path) + return 0; + if (dentry_is_root(dentry)) { - full_path = MALLOC(2); - if (!full_path) - goto oom; - full_path[0] = '/'; - full_path[1] = '\0'; - full_path_len = 1; + static const tchar _root_path[] = {WIM_PATH_SEPARATOR, T('\0')}; + full_path = TSTRDUP(_root_path); + if (full_path == NULL) + return WIMLIB_ERR_NOMEM; + full_path_nbytes = 1 * sizeof(tchar); } else { - char *parent_full_path; - u32 parent_full_path_len; - const struct dentry *parent = dentry->parent; + struct wim_dentry *parent; + tchar *parent_full_path; + u32 parent_full_path_nbytes; + size_t filename_nbytes; + parent = dentry->parent; if (dentry_is_root(parent)) { - parent_full_path = ""; - parent_full_path_len = 0; + parent_full_path = T(""); + parent_full_path_nbytes = 0; } else { - parent_full_path = parent->full_path_utf8; - parent_full_path_len = parent->full_path_utf8_len; + if (parent->_full_path == NULL) { + ret = calculate_dentry_full_path(parent); + if (ret) + return ret; + } + parent_full_path = parent->_full_path; + parent_full_path_nbytes = parent->full_path_nbytes; } - full_path_len = parent_full_path_len + 1 + - dentry->file_name_utf8_len; - full_path = MALLOC(full_path_len + 1); - if (!full_path) - goto oom; - - memcpy(full_path, parent_full_path, parent_full_path_len); - full_path[parent_full_path_len] = '/'; - memcpy(full_path + parent_full_path_len + 1, - dentry->file_name_utf8, - dentry->file_name_utf8_len); - full_path[full_path_len] = '\0'; - } - FREE(dentry->full_path_utf8); - dentry->full_path_utf8 = full_path; - dentry->full_path_utf8_len = full_path_len; + /* Append this dentry's name as a tchar string to the full path + * of the parent followed by the path separator */ + #if TCHAR_IS_UTF16LE + filename_nbytes = dentry->file_name_nbytes; + #else + { + int ret = utf16le_to_tstr_nbytes(dentry->file_name, + dentry->file_name_nbytes, + &filename_nbytes); + if (ret) + return ret; + } + #endif + + full_path_nbytes = parent_full_path_nbytes + sizeof(tchar) + + filename_nbytes; + full_path = MALLOC(full_path_nbytes + sizeof(tchar)); + if (full_path == NULL) + return WIMLIB_ERR_NOMEM; + memcpy(full_path, parent_full_path, parent_full_path_nbytes); + full_path[parent_full_path_nbytes / sizeof(tchar)] = WIM_PATH_SEPARATOR; + #if TCHAR_IS_UTF16LE + memcpy(&full_path[parent_full_path_nbytes / sizeof(tchar) + 1], + dentry->file_name, + filename_nbytes + sizeof(tchar)); + #else + utf16le_to_tstr_buf(dentry->file_name, + dentry->file_name_nbytes, + &full_path[parent_full_path_nbytes / + sizeof(tchar) + 1]); + #endif + } + dentry->_full_path = full_path; + dentry->full_path_nbytes= full_path_nbytes; return 0; -oom: - ERROR("Out of memory while calculating dentry full path"); - return WIMLIB_ERR_NOMEM; } -/* - * Recursively calculates the subdir offsets for a directory tree. +static int +do_calculate_dentry_full_path(struct wim_dentry *dentry, void *_ignore) +{ + return calculate_dentry_full_path(dentry); +} + +int +calculate_dentry_tree_full_paths(struct wim_dentry *root) +{ + return for_dentry_in_tree(root, do_calculate_dentry_full_path, NULL); +} + +tchar * +dentry_full_path(struct wim_dentry *dentry) +{ + calculate_dentry_full_path(dentry); + return dentry->_full_path; +} + +static int +increment_subdir_offset(struct wim_dentry *dentry, void *subdir_offset_p) +{ + *(u64*)subdir_offset_p += dentry_out_total_length(dentry); + return 0; +} + +static int +call_calculate_subdir_offsets(struct wim_dentry *dentry, void *subdir_offset_p) +{ + calculate_subdir_offsets(dentry, subdir_offset_p); + return 0; +} + +/* + * Recursively calculates the subdir offsets for a directory tree. * * @dentry: The root of the directory tree. * @subdir_offset_p: The current subdirectory offset; i.e., the subdirectory - * offset for @dentry. + * offset for @dentry. */ -void calculate_subdir_offsets(struct dentry *dentry, u64 *subdir_offset_p) +void +calculate_subdir_offsets(struct wim_dentry *dentry, u64 *subdir_offset_p) { - struct dentry *child; + struct rb_node *node; - child = dentry->children; dentry->subdir_offset = *subdir_offset_p; - if (child) { - + node = dentry->d_inode->i_children.rb_node; + if (node) { /* Advance the subdir offset by the amount of space the children * of this dentry take up. */ - do { - *subdir_offset_p += child->length; - child = child->next; - } while (child != dentry->children); + for_dentry_in_rbtree(node, increment_subdir_offset, subdir_offset_p); /* End-of-directory dentry on disk. */ *subdir_offset_p += 8; /* Recursively call calculate_subdir_offsets() on all the * children. */ - do { - calculate_subdir_offsets(child, subdir_offset_p); - child = child->next; - } while (child != dentry->children); + for_dentry_in_rbtree(node, call_calculate_subdir_offsets, subdir_offset_p); } else { /* On disk, childless directories have a valid subdir_offset * that points to an 8-byte end-of-directory dentry. Regular - * files have a subdir_offset of 0. */ + * files or reparse points have a subdir_offset of 0. */ if (dentry_is_directory(dentry)) *subdir_offset_p += 8; else @@ -234,688 +571,1405 @@ void calculate_subdir_offsets(struct dentry *dentry, u64 *subdir_offset_p) } } +static int +dentry_compare_names_case_insensitive(const struct wim_dentry *d1, + const struct wim_dentry *d2) +{ + return cmp_utf16le_strings(d1->file_name, + d1->file_name_nbytes / 2, + d2->file_name, + d2->file_name_nbytes / 2, + true); +} -/* Returns the child of @dentry that has the file name @name. - * Returns NULL if no child has the name. */ -struct dentry *get_dentry_child_with_name(const struct dentry *dentry, - const char *name) +static int +dentry_compare_names_case_sensitive(const struct wim_dentry *d1, + const struct wim_dentry *d2) { - struct dentry *child; - size_t name_len; - - child = dentry->children; - if (child) { - name_len = strlen(name); - do { - if (dentry_has_name(child, name, name_len)) - return child; - child = child->next; - } while (child != dentry->children); + return cmp_utf16le_strings(d1->file_name, + d1->file_name_nbytes / 2, + d2->file_name, + d2->file_name_nbytes / 2, + false); +} + +/* Default case sensitivity behavior for searches with + * WIMLIB_CASE_PLATFORM_DEFAULT specified. This can be modified by + * wimlib_global_init(). */ +bool default_ignore_case = +#ifdef __WIN32__ + true +#else + false +#endif +; + +/* Given a UTF-16LE filename and a directory, look up the dentry for the file. + * Return it if found, otherwise NULL. This is case-sensitive on UNIX and + * case-insensitive on Windows. */ +struct wim_dentry * +get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry, + const utf16lechar *name, + size_t name_nbytes, + CASE_SENSITIVITY_TYPE case_ctype) +{ + struct rb_node *node; + + bool ignore_case = will_ignore_case(case_ctype); + + if (ignore_case) + node = dentry->d_inode->i_children_case_insensitive.rb_node; + else + node = dentry->d_inode->i_children.rb_node; + + struct wim_dentry *child; + while (node) { + if (ignore_case) + child = rb_entry(node, struct wim_dentry, rb_node_case_insensitive); + else + child = rb_entry(node, struct wim_dentry, rb_node); + + int result = cmp_utf16le_strings(name, + name_nbytes / 2, + child->file_name, + child->file_name_nbytes / 2, + ignore_case); + if (result < 0) { + node = node->rb_left; + } else if (result > 0) { + node = node->rb_right; + } else if (!ignore_case || + list_empty(&child->case_insensitive_conflict_list)) { + return child; + } else { + /* Multiple dentries have the same case-insensitive + * name, and a case-insensitive lookup is being + * performed. Choose the dentry with the same + * case-sensitive name, if one exists; otherwise print a + * warning and choose one arbitrarily. */ + struct wim_dentry *alt = child; + size_t num_alts = 0; + + do { + num_alts++; + if (0 == cmp_utf16le_strings(name, + name_nbytes / 2, + alt->file_name, + alt->file_name_nbytes / 2, + false)) + return alt; + alt = list_entry(alt->case_insensitive_conflict_list.next, + struct wim_dentry, + case_insensitive_conflict_list); + } while (alt != child); + + WARNING("Result of case-insensitive lookup is ambiguous\n" + " (returning \"%"TS"\" of %zu " + "possible files, including \"%"TS"\")", + dentry_full_path(child), + num_alts, + dentry_full_path(list_entry(child->case_insensitive_conflict_list.next, + struct wim_dentry, + case_insensitive_conflict_list))); + return child; + } } return NULL; } -/* Retrieves the dentry that has the UTF-8 @path relative to the dentry - * @cur_dir. Returns NULL if no dentry having the path is found. */ -static struct dentry *get_dentry_relative_path(struct dentry *cur_dir, const char *path) +/* Returns the child of @dentry that has the file name @name. Returns NULL if + * no child has the name. */ +struct wim_dentry * +get_dentry_child_with_name(const struct wim_dentry *dentry, const tchar *name, + CASE_SENSITIVITY_TYPE case_type) { - struct dentry *child; - size_t base_len; - const char *new_path; +#if TCHAR_IS_UTF16LE + return get_dentry_child_with_utf16le_name(dentry, name, + tstrlen(name) * sizeof(tchar), + case_type); +#else + utf16lechar *utf16le_name; + size_t utf16le_name_nbytes; + int ret; + struct wim_dentry *child; + + ret = tstr_to_utf16le(name, tstrlen(name) * sizeof(tchar), + &utf16le_name, &utf16le_name_nbytes); + if (ret) { + child = NULL; + } else { + child = get_dentry_child_with_utf16le_name(dentry, + utf16le_name, + utf16le_name_nbytes, + case_type); + FREE(utf16le_name); + } + return child; +#endif +} + +static struct wim_dentry * +get_dentry_utf16le(WIMStruct *wim, const utf16lechar *path, + CASE_SENSITIVITY_TYPE case_type) +{ + struct wim_dentry *cur_dentry; + const utf16lechar *name_start, *name_end; + + /* Start with the root directory of the image. Note: this will be NULL + * if an image has been added directly with wimlib_add_empty_image() but + * no files have been added yet; in that case we fail with ENOENT. */ + cur_dentry = wim_root_dentry(wim); + + name_start = path; + for (;;) { + if (cur_dentry == NULL) { + errno = ENOENT; + return NULL; + } + + if (*name_start && !dentry_is_directory(cur_dentry)) { + errno = ENOTDIR; + return NULL; + } + + while (*name_start == cpu_to_le16(WIM_PATH_SEPARATOR)) + name_start++; - if (*path == '\0') - return cur_dir; + if (!*name_start) + return cur_dentry; - child = cur_dir->children; - if (child) { - new_path = path_next_part(path, &base_len); + name_end = name_start; do { - if (dentry_has_name(child, path, base_len)) - return get_dentry_relative_path(child, new_path); - child = child->next; - } while (child != cur_dir->children); + ++name_end; + } while (*name_end != cpu_to_le16(WIM_PATH_SEPARATOR) && *name_end); + + cur_dentry = get_dentry_child_with_utf16le_name(cur_dentry, + name_start, + (u8*)name_end - (u8*)name_start, + case_type); + name_start = name_end; } - return NULL; } -/* Returns the dentry corresponding to the UTF-8 @path, or NULL if there is no - * such dentry. */ -struct dentry *get_dentry(WIMStruct *w, const char *path) +/* + * WIM path lookup: translate a path in the currently selected WIM image to the + * corresponding dentry, if it exists. + * + * @wim + * The WIMStruct for the WIM. The search takes place in the currently + * selected image. + * + * @path + * The path to look up, given relative to the root of the WIM image. + * Characters with value WIM_PATH_SEPARATOR are taken to be path + * separators. Leading path separators are ignored, whereas one or more + * trailing path separators cause the path to only match a directory. + * + * @case_type + * The case-sensitivity behavior of this function, as one of the following + * constants: + * + * - WIMLIB_CASE_SENSITIVE: Perform the search case sensitively. This means + * that names must match exactly. + * + * - WIMLIB_CASE_INSENSITIVE: Perform the search case insensitively. This + * means that names are considered to match if they are equal when + * transformed to upper case. If a path component matches multiple names + * case-insensitively, the name that matches the path component + * case-sensitively is chosen, if existent; otherwise one + * case-insensitively matching name is chosen arbitrarily. + * + * - WIMLIB_CASE_PLATFORM_DEFAULT: Perform either case-sensitive or + * case-insensitive search, depending on the value of the global variable + * default_ignore_case. + * + * In any case, no Unicode normalization is done before comparing strings. + * + * Returns a pointer to the dentry that is the result of the lookup, or NULL if + * no such dentry exists. If NULL is returned, errno is set to one of the + * following values: + * + * ENOTDIR if one of the path components used as a directory existed but + * was not, in fact, a directory. + * + * ENOENT otherwise. + * + * Additional notes: + * + * - This function does not consider a reparse point to be a directory, even + * if it has FILE_ATTRIBUTE_DIRECTORY set. + * + * - This function does not dereference symbolic links or junction points + * when performing the search. + * + * - Since this function ignores leading slashes, the empty path is valid and + * names the root directory of the WIM image. + * + * - An image added with wimlib_add_empty_image() does not have a root + * directory yet, and this function will fail with ENOENT for any path on + * such an image. + */ +struct wim_dentry * +get_dentry(WIMStruct *wim, const tchar *path, CASE_SENSITIVITY_TYPE case_type) { - struct dentry *root = wim_root_dentry(w); - while (*path == '/') - path++; - return get_dentry_relative_path(root, path); +#if TCHAR_IS_UTF16LE + return get_dentry_utf16le(wim, path, case_type); +#else + utf16lechar *path_utf16le; + size_t path_utf16le_nbytes; + int ret; + struct wim_dentry *dentry; + + ret = tstr_to_utf16le(path, tstrlen(path) * sizeof(tchar), + &path_utf16le, &path_utf16le_nbytes); + if (ret) + return NULL; + dentry = get_dentry_utf16le(wim, path_utf16le, case_type); + FREE(path_utf16le); + return dentry; +#endif } -/* Returns the parent directory for the @path. */ -struct dentry *get_parent_dentry(WIMStruct *w, const char *path) +/* Takes in a path of length @len in @buf, and transforms it into a string for + * the path of its parent directory. */ +static void +to_parent_name(tchar *buf, size_t len) { - size_t path_len = strlen(path); - char buf[path_len + 1]; + ssize_t i = (ssize_t)len - 1; + while (i >= 0 && buf[i] == WIM_PATH_SEPARATOR) + i--; + while (i >= 0 && buf[i] != WIM_PATH_SEPARATOR) + i--; + while (i >= 0 && buf[i] == WIM_PATH_SEPARATOR) + i--; + buf[i + 1] = T('\0'); +} - memcpy(buf, path, path_len + 1); +/* Similar to get_dentry(), but returns the dentry named by @path with the last + * component stripped off. + * + * Note: The returned dentry is NOT guaranteed to be a directory. */ +struct wim_dentry * +get_parent_dentry(WIMStruct *wim, const tchar *path, + CASE_SENSITIVITY_TYPE case_type) +{ + size_t path_len = tstrlen(path); + tchar buf[path_len + 1]; + tmemcpy(buf, path, path_len + 1); to_parent_name(buf, path_len); + return get_dentry(wim, buf, case_type); +} - return get_dentry(w, buf); +#ifdef WITH_FUSE +/* Finds the dentry, lookup table entry, and stream index for a WIM file stream, + * given a path name. + * + * Currently, lookups of this type are only needed if FUSE is enabled. */ +int +wim_pathname_to_stream(WIMStruct *wim, + const tchar *path, + int lookup_flags, + struct wim_dentry **dentry_ret, + struct wim_lookup_table_entry **lte_ret, + u16 *stream_idx_ret) +{ + struct wim_dentry *dentry; + struct wim_lookup_table_entry *lte; + u16 stream_idx; + const tchar *stream_name = NULL; + struct wim_inode *inode; + tchar *p = NULL; + + if (lookup_flags & LOOKUP_FLAG_ADS_OK) { + stream_name = path_stream_name(path); + if (stream_name) { + p = (tchar*)stream_name - 1; + *p = T('\0'); + } + } + + dentry = get_dentry(wim, path, WIMLIB_CASE_SENSITIVE); + if (p) + *p = T(':'); + if (!dentry) + return -errno; + + inode = dentry->d_inode; + + if (!inode->i_resolved) + if (inode_resolve_streams(inode, wim->lookup_table, false)) + return -EIO; + + if (!(lookup_flags & LOOKUP_FLAG_DIRECTORY_OK) + && inode_is_directory(inode)) + return -EISDIR; + + if (stream_name) { + struct wim_ads_entry *ads_entry; + u16 ads_idx; + ads_entry = inode_get_ads_entry(inode, stream_name, + &ads_idx); + if (ads_entry) { + stream_idx = ads_idx + 1; + lte = ads_entry->lte; + goto out; + } else { + return -ENOENT; + } + } else { + lte = inode_unnamed_stream_resolved(inode, &stream_idx); + } +out: + if (dentry_ret) + *dentry_ret = dentry; + if (lte_ret) + *lte_ret = lte; + if (stream_idx_ret) + *stream_idx_ret = stream_idx; + return 0; } +#endif /* WITH_FUSE */ /* Prints the full path of a dentry. */ -int print_dentry_full_path(struct dentry *dentry, void *ignore) +int +print_dentry_full_path(struct wim_dentry *dentry, void *_ignore) { - if (dentry->full_path_utf8) - puts(dentry->full_path_utf8); + int ret = calculate_dentry_full_path(dentry); + if (ret) + return ret; + tprintf(T("%"TS"\n"), dentry->_full_path); return 0; } +/* We want to be able to show the names of the file attribute flags that are + * set. */ struct file_attr_flag { u32 flag; - const char *name; + const tchar *name; }; struct file_attr_flag file_attr_flags[] = { - {WIM_FILE_ATTRIBUTE_READONLY, "READONLY"}, - {WIM_FILE_ATTRIBUTE_HIDDEN, "HIDDEN"}, - {WIM_FILE_ATTRIBUTE_SYSTEM, "SYSTEM"}, - {WIM_FILE_ATTRIBUTE_DIRECTORY, "DIRECTORY"}, - {WIM_FILE_ATTRIBUTE_ARCHIVE, "ARCHIVE"}, - {WIM_FILE_ATTRIBUTE_DEVICE, "DEVICE"}, - {WIM_FILE_ATTRIBUTE_NORMAL, "NORMAL"}, - {WIM_FILE_ATTRIBUTE_TEMPORARY, "TEMPORARY"}, - {WIM_FILE_ATTRIBUTE_SPARSE_FILE, "SPARSE_FILE"}, - {WIM_FILE_ATTRIBUTE_REPARSE_POINT, "REPARSE_POINT"}, - {WIM_FILE_ATTRIBUTE_COMPRESSED, "COMPRESSED"}, - {WIM_FILE_ATTRIBUTE_OFFLINE, "OFFLINE"}, - {WIM_FILE_ATTRIBUTE_NOT_CONTENT_INDEXED,"NOT_CONTENT_INDEXED"}, - {WIM_FILE_ATTRIBUTE_ENCRYPTED, "ENCRYPTED"}, - {WIM_FILE_ATTRIBUTE_VIRTUAL, "VIRTUAL"}, + {FILE_ATTRIBUTE_READONLY, T("READONLY")}, + {FILE_ATTRIBUTE_HIDDEN, T("HIDDEN")}, + {FILE_ATTRIBUTE_SYSTEM, T("SYSTEM")}, + {FILE_ATTRIBUTE_DIRECTORY, T("DIRECTORY")}, + {FILE_ATTRIBUTE_ARCHIVE, T("ARCHIVE")}, + {FILE_ATTRIBUTE_DEVICE, T("DEVICE")}, + {FILE_ATTRIBUTE_NORMAL, T("NORMAL")}, + {FILE_ATTRIBUTE_TEMPORARY, T("TEMPORARY")}, + {FILE_ATTRIBUTE_SPARSE_FILE, T("SPARSE_FILE")}, + {FILE_ATTRIBUTE_REPARSE_POINT, T("REPARSE_POINT")}, + {FILE_ATTRIBUTE_COMPRESSED, T("COMPRESSED")}, + {FILE_ATTRIBUTE_OFFLINE, T("OFFLINE")}, + {FILE_ATTRIBUTE_NOT_CONTENT_INDEXED,T("NOT_CONTENT_INDEXED")}, + {FILE_ATTRIBUTE_ENCRYPTED, T("ENCRYPTED")}, + {FILE_ATTRIBUTE_VIRTUAL, T("VIRTUAL")}, }; -/* Prints a directory entry. @lookup_table is a pointer to the lookup table, or - * NULL if the resource entry for the dentry is not to be printed. */ -int print_dentry(struct dentry *dentry, void *lookup_table) +/* Prints a directory entry. @lookup_table is a pointer to the lookup table, if + * available. If the dentry is unresolved and the lookup table is NULL, the + * lookup table entries will not be printed. Otherwise, they will be. */ +int +print_dentry(struct wim_dentry *dentry, void *lookup_table) { - struct lookup_table_entry *lte; - unsigned i; - - printf("[DENTRY]\n"); - printf("Length = %"PRIu64"\n", dentry->length); - printf("Attributes = 0x%x\n", dentry->attributes); - for (i = 0; i < ARRAY_LEN(file_attr_flags); i++) - if (file_attr_flags[i].flag & dentry->attributes) - printf(" WIM_FILE_ATTRIBUTE_%s is set\n", + const u8 *hash; + struct wim_lookup_table_entry *lte; + const struct wim_inode *inode = dentry->d_inode; + tchar buf[50]; + + tprintf(T("[DENTRY]\n")); + tprintf(T("Length = %"PRIu64"\n"), dentry->length); + tprintf(T("Attributes = 0x%x\n"), inode->i_attributes); + for (size_t i = 0; i < ARRAY_LEN(file_attr_flags); i++) + if (file_attr_flags[i].flag & inode->i_attributes) + tprintf(T(" FILE_ATTRIBUTE_%"TS" is set\n"), file_attr_flags[i].name); -#ifdef ENABLE_SECURITY_DATA - printf("Security ID = %d\n", dentry->security_id); -#endif - printf("Subdir offset = %"PRIu64"\n", dentry->subdir_offset); - /*printf("Unused1 = %"PRIu64"\n", dentry->unused1);*/ - /*printf("Unused2 = %"PRIu64"\n", dentry->unused2);*/ - printf("Creation Time = %"PRIu64"\n", dentry->creation_time); - printf("Last Access Time = %"PRIu64"\n", dentry->last_access_time); - printf("Last Write Time = %"PRIu64"\n", dentry->last_write_time); - printf("Creation Time = 0x%"PRIx64"\n", dentry->creation_time); - printf("Hash = "); - print_hash(dentry->hash); - putchar('\n'); - /*printf("Reparse Tag = %u\n", dentry->reparse_tag);*/ - printf("Hard Link Group = %"PRIu64"\n", dentry->hard_link); - /*printf("Number of Streams = %hu\n", dentry->streams);*/ - printf("Filename = \""); - print_string(dentry->file_name, dentry->file_name_len); - puts("\""); - printf("Filename Length = %hu\n", dentry->file_name_len); - printf("Filename (UTF-8) = \"%s\"\n", dentry->file_name_utf8); - printf("Filename (UTF-8) Length = %hu\n", dentry->file_name_utf8_len); - printf("Short Name = \""); - print_string(dentry->short_name, dentry->short_name_len); - puts("\""); - printf("Short Name Length = %hu\n", dentry->short_name_len); - printf("Full Path (UTF-8) = \"%s\"\n", dentry->full_path_utf8); - if (lookup_table) { - lte = lookup_resource(lookup_table, dentry->hash); - if (lte) - print_lookup_table_entry(lte, NULL); - else - putchar('\n'); + tprintf(T("Security ID = %d\n"), inode->i_security_id); + tprintf(T("Subdir offset = %"PRIu64"\n"), dentry->subdir_offset); + + wim_timestamp_to_str(inode->i_creation_time, buf, sizeof(buf)); + tprintf(T("Creation Time = %"TS"\n"), buf); + + wim_timestamp_to_str(inode->i_last_access_time, buf, sizeof(buf)); + tprintf(T("Last Access Time = %"TS"\n"), buf); + + wim_timestamp_to_str(inode->i_last_write_time, buf, sizeof(buf)); + tprintf(T("Last Write Time = %"TS"\n"), buf); + + if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { + tprintf(T("Reparse Tag = 0x%"PRIx32"\n"), inode->i_reparse_tag); + tprintf(T("Reparse Point Flags = 0x%"PRIx16"\n"), + inode->i_not_rpfixed); + tprintf(T("Reparse Point Unknown 2 = 0x%"PRIx32"\n"), + inode->i_rp_unknown_2); + } + tprintf(T("Reparse Point Unknown 1 = 0x%"PRIx32"\n"), + inode->i_rp_unknown_1); + tprintf(T("Hard Link Group = 0x%"PRIx64"\n"), inode->i_ino); + tprintf(T("Hard Link Group Size = %"PRIu32"\n"), inode->i_nlink); + tprintf(T("Number of Alternate Data Streams = %hu\n"), inode->i_num_ads); + if (dentry_has_long_name(dentry)) + wimlib_printf(T("Filename = \"%"WS"\"\n"), dentry->file_name); + if (dentry_has_short_name(dentry)) + wimlib_printf(T("Short Name \"%"WS"\"\n"), dentry->short_name); + if (dentry->_full_path) + tprintf(T("Full Path = \"%"TS"\"\n"), dentry->_full_path); + + lte = inode_stream_lte(dentry->d_inode, 0, lookup_table); + if (lte) { + print_lookup_table_entry(lte, stdout); } else { - putchar('\n'); + hash = inode_stream_hash(inode, 0); + if (hash) { + tprintf(T("Hash = 0x")); + print_hash(hash, stdout); + tputchar(T('\n')); + tputchar(T('\n')); + } + } + for (u16 i = 0; i < inode->i_num_ads; i++) { + tprintf(T("[Alternate Stream Entry %u]\n"), i); + wimlib_printf(T("Name = \"%"WS"\"\n"), + inode->i_ads_entries[i].stream_name); + tprintf(T("Name Length (UTF16 bytes) = %hu\n"), + inode->i_ads_entries[i].stream_name_nbytes); + hash = inode_stream_hash(inode, i + 1); + if (hash) { + tprintf(T("Hash = 0x")); + print_hash(hash, stdout); + tputchar(T('\n')); + } + print_lookup_table_entry(inode_stream_lte(inode, i + 1, lookup_table), + stdout); } return 0; } -static inline void dentry_common_init(struct dentry *dentry) +/* Initializations done on every `struct wim_dentry'. */ +static void +dentry_common_init(struct wim_dentry *dentry) { - memset(dentry, 0, sizeof(struct dentry)); - dentry->refcnt = 1; + memset(dentry, 0, sizeof(struct wim_dentry)); } -/* - * Creates an unlinked directory entry. - * - * @name: The base name of the new dentry. - * @return: A pointer to the new dentry, or NULL if out of memory. - */ -struct dentry *new_dentry(const char *name) +/* Creates an unlinked directory entry. */ +int +new_dentry(const tchar *name, struct wim_dentry **dentry_ret) { - struct dentry *dentry; - - dentry = MALLOC(sizeof(struct dentry)); - if (!dentry) - return NULL; + struct wim_dentry *dentry; + int ret; + + dentry = MALLOC(sizeof(struct wim_dentry)); + if (dentry == NULL) + return WIMLIB_ERR_NOMEM; dentry_common_init(dentry); - if (change_dentry_name(dentry, name) != 0) { - FREE(dentry); - return NULL; + if (*name) { + ret = dentry_set_name(dentry, name); + if (ret) { + FREE(dentry); + ERROR("Failed to set name on new dentry with name \"%"TS"\"", + name); + return ret; + } } - - dentry_update_all_timestamps(dentry); - dentry->next = dentry; - dentry->prev = dentry; dentry->parent = dentry; - return dentry; + *dentry_ret = dentry; + return 0; } +static int +_new_dentry_with_inode(const tchar *name, struct wim_dentry **dentry_ret, + bool timeless) +{ + struct wim_dentry *dentry; + int ret; + + ret = new_dentry(name, &dentry); + if (ret) + return ret; + + if (timeless) + dentry->d_inode = new_timeless_inode(); + else + dentry->d_inode = new_inode(); + if (dentry->d_inode == NULL) { + free_dentry(dentry); + return WIMLIB_ERR_NOMEM; + } -void free_dentry(struct dentry *dentry) + inode_add_dentry(dentry, dentry->d_inode); + *dentry_ret = dentry; + return 0; +} + +int +new_dentry_with_timeless_inode(const tchar *name, struct wim_dentry **dentry_ret) { - FREE(dentry->file_name); - FREE(dentry->file_name_utf8); - FREE(dentry->short_name); - FREE(dentry->full_path_utf8); - FREE(dentry); + return _new_dentry_with_inode(name, dentry_ret, true); } -/* Arguments for do_free_dentry(). */ -struct free_dentry_args { - struct lookup_table *lookup_table; - bool lt_decrement_refcnt; -}; +int +new_dentry_with_inode(const tchar *name, struct wim_dentry **dentry_ret) +{ + return _new_dentry_with_inode(name, dentry_ret, false); +} -/* - * This function is passed as an argument to for_dentry_in_tree_depth() in order - * to free a directory tree. __args is a pointer to a `struct free_dentry_args'. - */ -static int do_free_dentry(struct dentry *dentry, void *__args) +int +new_filler_directory(const tchar *name, struct wim_dentry **dentry_ret) +{ + int ret; + struct wim_dentry *dentry; + + DEBUG("Creating filler directory \"%"TS"\"", name); + ret = new_dentry_with_inode(name, &dentry); + if (ret) + return ret; + /* Leave the inode number as 0; this is allowed for non + * hard-linked files. */ + dentry->d_inode->i_resolved = 1; + dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY; + *dentry_ret = dentry; + return 0; +} + +static int +dentry_clear_inode_visited(struct wim_dentry *dentry, void *_ignore) { - struct free_dentry_args *args = (struct free_dentry_args*)__args; + dentry->d_inode->i_visited = 0; + return 0; +} - if (args->lt_decrement_refcnt && !dentry_is_directory(dentry)) { - lookup_table_decrement_refcnt(args->lookup_table, - dentry->hash); +void +dentry_tree_clear_inode_visited(struct wim_dentry *root) +{ + for_dentry_in_tree(root, dentry_clear_inode_visited, NULL); +} + +/* Frees a WIM dentry. + * + * The corresponding inode (if any) is freed only if its link count is + * decremented to 0. */ +void +free_dentry(struct wim_dentry *dentry) +{ + if (dentry) { + FREE(dentry->file_name); + FREE(dentry->short_name); + FREE(dentry->_full_path); + if (dentry->d_inode) + put_inode(dentry->d_inode); + FREE(dentry); } +} - wimlib_assert(dentry->refcnt >= 1); - if (--dentry->refcnt == 0) - free_dentry(dentry); +/* This function is passed as an argument to for_dentry_in_tree_depth() in order + * to free a directory tree. */ +static int +do_free_dentry(struct wim_dentry *dentry, void *_lookup_table) +{ + struct wim_lookup_table *lookup_table = _lookup_table; + + if (lookup_table) { + struct wim_inode *inode = dentry->d_inode; + for (unsigned i = 0; i <= inode->i_num_ads; i++) { + struct wim_lookup_table_entry *lte; + + lte = inode_stream_lte(inode, i, lookup_table); + if (lte) + lte_decrement_refcnt(lte, lookup_table); + } + } + free_dentry(dentry); return 0; } -/* +/* * Unlinks and frees a dentry tree. * - * @root: The root of the tree. - * @lookup_table: The lookup table for dentries. - * @decrement_refcnt: True if the dentries in the tree are to have their - * reference counts in the lookup table decremented. + * @root: + * The root of the tree. + * + * @lookup_table: + * The lookup table for dentries. If non-NULL, the reference counts in the + * lookup table for the lookup table entries corresponding to the dentries + * will be decremented. */ -void free_dentry_tree(struct dentry *root, struct lookup_table *lookup_table, - bool lt_decrement_refcnt) +void +free_dentry_tree(struct wim_dentry *root, struct wim_lookup_table *lookup_table) { - if (!root || !root->parent) - return; - - struct free_dentry_args args; - args.lookup_table = lookup_table; - args.lt_decrement_refcnt = lt_decrement_refcnt; - for_dentry_in_tree_depth(root, do_free_dentry, &args); + for_dentry_in_tree_depth(root, do_free_dentry, lookup_table); } -int increment_dentry_refcnt(struct dentry *dentry, void *ignore) +/* Insert a dentry into the case insensitive index for a directory. + * + * This is a red-black tree, but when multiple dentries share the same + * case-insensitive name, only one is inserted into the tree itself; the rest + * are connected in a list. + */ +static struct wim_dentry * +dentry_add_child_case_insensitive(struct wim_dentry *parent, + struct wim_dentry *child) { - dentry->refcnt++; - return 0; + struct rb_root *root; + struct rb_node **new; + struct rb_node *rb_parent; + + root = &parent->d_inode->i_children_case_insensitive; + new = &root->rb_node; + rb_parent = NULL; + while (*new) { + struct wim_dentry *this = container_of(*new, struct wim_dentry, + rb_node_case_insensitive); + int result = dentry_compare_names_case_insensitive(child, this); + + rb_parent = *new; + + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else + return this; + } + rb_link_node(&child->rb_node_case_insensitive, rb_parent, new); + rb_insert_color(&child->rb_node_case_insensitive, root); + return NULL; } -/* +/* * Links a dentry into the directory tree. * - * @dentry: The dentry to link. - * @parent: The dentry that will be the parent of @dentry. + * @parent: The dentry that will be the parent of @child. + * @child: The dentry to link. + * + * Returns NULL if successful. If @parent already contains a dentry with the + * same case-sensitive name as @child, the pointer to this duplicate dentry is + * returned. */ -void link_dentry(struct dentry *dentry, struct dentry *parent) -{ - dentry->parent = parent; - if (parent->children) { - /* Not an only child; link to siblings. */ - dentry->next = parent->children; - dentry->prev = parent->children->prev; - dentry->next->prev = dentry; - dentry->prev->next = dentry; - } else { - /* Only child; link to parent. */ - parent->children = dentry; - dentry->next = dentry; - dentry->prev = dentry; +struct wim_dentry * +dentry_add_child(struct wim_dentry * restrict parent, + struct wim_dentry * restrict child) +{ + struct rb_root *root; + struct rb_node **new; + struct rb_node *rb_parent; + + wimlib_assert(dentry_is_directory(parent)); + wimlib_assert(parent != child); + + /* Case sensitive child dentry index */ + root = &parent->d_inode->i_children; + new = &root->rb_node; + rb_parent = NULL; + while (*new) { + struct wim_dentry *this = rbnode_dentry(*new); + int result = dentry_compare_names_case_sensitive(child, this); + + rb_parent = *new; + + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else + return this; + } + child->parent = parent; + rb_link_node(&child->rb_node, rb_parent, new); + rb_insert_color(&child->rb_node, root); + + /* Case insensitive child dentry index */ + { + struct wim_dentry *existing; + existing = dentry_add_child_case_insensitive(parent, child); + if (existing) { + list_add(&child->case_insensitive_conflict_list, + &existing->case_insensitive_conflict_list); + child->rb_node_case_insensitive.__rb_parent_color = 0; + } else { + INIT_LIST_HEAD(&child->case_insensitive_conflict_list); + } } + return NULL; } -/* Unlink a dentry from the directory tree. */ -void unlink_dentry(struct dentry *dentry) +/* Unlink a WIM dentry from the directory entry tree. */ +void +unlink_dentry(struct wim_dentry *dentry) { - if (dentry_is_root(dentry)) + struct wim_dentry *parent = dentry->parent; + + if (parent == dentry) return; - if (dentry_is_only_child(dentry)) { - dentry->parent->children = NULL; - } else { - if (dentry_is_first_sibling(dentry)) - dentry->parent->children = dentry->next; - dentry->next->prev = dentry->prev; - dentry->prev->next = dentry->next; + rb_erase(&dentry->rb_node, &parent->d_inode->i_children); + + if (dentry->rb_node_case_insensitive.__rb_parent_color) { + /* This dentry was in the case-insensitive red-black tree. */ + rb_erase(&dentry->rb_node_case_insensitive, + &parent->d_inode->i_children_case_insensitive); + if (!list_empty(&dentry->case_insensitive_conflict_list)) { + /* Make a different case-insensitively-the-same dentry + * be the "representative" in the red-black tree. */ + struct list_head *next; + struct wim_dentry *other; + struct wim_dentry *existing; + + next = dentry->case_insensitive_conflict_list.next; + other = list_entry(next, struct wim_dentry, case_insensitive_conflict_list); + existing = dentry_add_child_case_insensitive(parent, other); + wimlib_assert(existing == NULL); + } } + list_del(&dentry->case_insensitive_conflict_list); } - -/* Recalculates the length of @dentry based on its file name length and short - * name length. */ -static inline void recalculate_dentry_size(struct dentry *dentry) +static int +free_dentry_full_path(struct wim_dentry *dentry, void *_ignore) { - dentry->length = WIM_DENTRY_DISK_SIZE + dentry->file_name_len + - 2 + dentry->short_name_len; - /* Must be multiple of 8. */ - dentry->length += (8 - dentry->length % 8) % 8; + FREE(dentry->_full_path); + dentry->_full_path = NULL; + return 0; } -/* Changes the name of a dentry to @new_name. Only changes the file_name and - * file_name_utf8 fields; does not change the short_name, short_name_utf8, or - * full_path_utf8 fields. Also recalculates its length. */ -int change_dentry_name(struct dentry *dentry, const char *new_name) +/* Rename a file or directory in the WIM. */ +int +rename_wim_path(WIMStruct *wim, const tchar *from, const tchar *to, + CASE_SENSITIVITY_TYPE case_type) { - size_t utf8_len; - size_t utf16_len; - - FREE(dentry->file_name); + struct wim_dentry *src; + struct wim_dentry *dst; + struct wim_dentry *parent_of_dst; + int ret; - utf8_len = strlen(new_name); + /* This rename() implementation currently only supports actual files + * (not alternate data streams) */ - dentry->file_name = utf8_to_utf16(new_name, utf8_len, &utf16_len); + src = get_dentry(wim, from, case_type); + if (!src) + return -errno; - if (!dentry->file_name) - return WIMLIB_ERR_NOMEM; + dst = get_dentry(wim, to, case_type); - FREE(dentry->file_name_utf8); - dentry->file_name_utf8 = MALLOC(utf8_len + 1); - if (!dentry->file_name_utf8) { - FREE(dentry->file_name); - dentry->file_name = NULL; - return WIMLIB_ERR_NOMEM; - } - - dentry->file_name_len = utf16_len; - dentry->file_name_utf8_len = utf8_len; - memcpy(dentry->file_name_utf8, new_name, utf8_len + 1); - recalculate_dentry_size(dentry); - return 0; -} + if (dst) { + /* Destination file exists */ -/* Parameters for calculate_dentry_statistics(). */ -struct image_statistics { - struct lookup_table *lookup_table; - u64 *dir_count; - u64 *file_count; - u64 *total_bytes; - u64 *hard_link_bytes; -}; + if (src == dst) /* Same file */ + return 0; -static int calculate_dentry_statistics(struct dentry *dentry, void *arg) -{ - struct image_statistics *stats; - struct lookup_table_entry *lte; - - stats = arg; - lte = lookup_resource(stats->lookup_table, dentry->hash); + if (!dentry_is_directory(src)) { + /* Cannot rename non-directory to directory. */ + if (dentry_is_directory(dst)) + return -EISDIR; + } else { + /* Cannot rename directory to a non-directory or a non-empty + * directory */ + if (!dentry_is_directory(dst)) + return -ENOTDIR; + if (dentry_has_children(dst)) + return -ENOTEMPTY; + } + parent_of_dst = dst->parent; + } else { + /* Destination does not exist */ + parent_of_dst = get_parent_dentry(wim, to, case_type); + if (!parent_of_dst) + return -errno; - if (dentry_is_directory(dentry) && !dentry_is_root(dentry)) - ++*stats->dir_count; - else - ++*stats->file_count; + if (!dentry_is_directory(parent_of_dst)) + return -ENOTDIR; + } - if (lte) { - u64 size = lte->resource_entry.original_size; - *stats->total_bytes += size; - if (++lte->out_refcnt == 1) - *stats->hard_link_bytes += size; + ret = dentry_set_name(src, path_basename(to)); + if (ret) + return -ENOMEM; + if (dst) { + unlink_dentry(dst); + free_dentry_tree(dst, wim->lookup_table); } + unlink_dentry(src); + dentry_add_child(parent_of_dst, src); + if (src->_full_path) + for_dentry_in_tree(src, free_dentry_full_path, NULL); return 0; } -void calculate_dir_tree_statistics(struct dentry *root, struct lookup_table *table, - u64 *dir_count_ret, u64 *file_count_ret, - u64 *total_bytes_ret, - u64 *hard_link_bytes_ret) -{ - struct image_statistics stats; - *dir_count_ret = 0; - *file_count_ret = 0; - *total_bytes_ret = 0; - *hard_link_bytes_ret = 0; - stats.lookup_table = table; - stats.dir_count = dir_count_ret; - stats.file_count = file_count_ret; - stats.total_bytes = total_bytes_ret; - stats.hard_link_bytes = hard_link_bytes_ret; - for_lookup_table_entry(table, zero_out_refcnts, NULL); - for_dentry_in_tree(root, calculate_dentry_statistics, &stats); -} - -/* - * Reads a directory entry from the metadata resource. - */ -int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len, - u64 offset, struct dentry *dentry) +/* Reads a WIM directory entry, including all alternate data stream entries that + * follow it, from the WIM image's metadata resource. */ +static int +read_dentry(const u8 * restrict buf, size_t buf_len, + u64 offset, struct wim_dentry **dentry_ret) { + u64 length; const u8 *p; + const struct wim_dentry_on_disk *disk_dentry; + struct wim_dentry *dentry; + struct wim_inode *inode; + u16 short_name_nbytes; + u16 file_name_nbytes; u64 calculated_size; - char *file_name; - char *file_name_utf8; - char *short_name; - u16 short_name_len; - u16 file_name_len; - size_t file_name_utf8_len; + int ret; - dentry_common_init(dentry); + BUILD_BUG_ON(sizeof(struct wim_dentry_on_disk) != WIM_DENTRY_DISK_SIZE); + + /* Before reading the whole dentry, we need to read just the length. + * This is because a dentry of length 8 (that is, just the length field) + * terminates the list of sibling directory entries. */ - /*Make sure the dentry really fits into the metadata resource.*/ - if (offset + 8 > metadata_resource_len) { + /* Check for buffer overrun. */ + if (unlikely(offset + sizeof(u64) > buf_len || + offset + sizeof(u64) < offset)) + { ERROR("Directory entry starting at %"PRIu64" ends past the " - "end of the metadata resource (size %"PRIu64")", - offset, metadata_resource_len); - return WIMLIB_ERR_INVALID_DENTRY; + "end of the metadata resource (size %zu)", + offset, buf_len); + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; } - /* Before reading the whole entry, we need to read just the length. - * This is because an entry of length 8 (that is, just the length field) - * terminates the list of sibling directory entries. */ + /* Get pointer to the dentry data. */ + p = &buf[offset]; + disk_dentry = (const struct wim_dentry_on_disk*)p; - p = get_u64(&metadata_resource[offset], &dentry->length); + if (unlikely((uintptr_t)p & 7)) + WARNING("WIM dentry is not 8-byte aligned"); - /* A zero length field (really a length of 8, since that's how big the - * directory entry is...) indicates that this is the end of directory - * dentry. We do not read it into memory as an actual dentry, so just - * return true in that case. */ - if (dentry->length == 0) - return 0; + /* Get dentry length. */ + length = le64_to_cpu(disk_dentry->length); - if (offset + dentry->length >= metadata_resource_len) { - ERROR("Directory entry at offset %"PRIu64" and with size " - "%"PRIu64" ends past the end of the metadata resource " - "(size %"PRIu64")", - offset, dentry->length, metadata_resource_len); - return WIMLIB_ERR_INVALID_DENTRY; + /* Check for end-of-directory. */ + if (length <= 8) { + *dentry_ret = NULL; + return 0; } - /* If it is a recognized length, read the rest of the directory entry. - * Note: The root directory entry has no name, and its length does not - * include the short name length field. */ - if (dentry->length < WIM_DENTRY_DISK_SIZE) { + /* Validate dentry length. */ + if (unlikely(length < sizeof(struct wim_dentry_on_disk))) { ERROR("Directory entry has invalid length of %"PRIu64" bytes", - dentry->length); - return WIMLIB_ERR_INVALID_DENTRY; + length); + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; } - p = get_u32(p, &dentry->attributes); -#ifdef ENABLE_SECURITY_DATA - p = get_u32(p, (u32*)&dentry->security_id); -#else - p += sizeof(u32); -#endif - p = get_u64(p, &dentry->subdir_offset); - - /* 2 unused fields */ - p += 2 * sizeof(u64); + /* Check for buffer overrun. */ + if (unlikely(offset + length > buf_len || + offset + length < offset)) + { + ERROR("Directory entry at offset %"PRIu64" and with size " + "%"PRIu64" ends past the end of the metadata resource " + "(size %zu)", offset, length, buf_len); + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; + } - p = get_u64(p, &dentry->creation_time); - p = get_u64(p, &dentry->last_access_time); - p = get_u64(p, &dentry->last_write_time); + /* Allocate new dentry structure, along with a preliminary inode. */ + ret = new_dentry_with_timeless_inode(T(""), &dentry); + if (ret) + return ret; - p = get_bytes(p, WIM_HASH_SIZE, dentry->hash); - - /* Currently ignoring reparse_tag. */ - p += sizeof(u32); + dentry->length = length; + inode = dentry->d_inode; + + /* Read more fields: some into the dentry, and some into the inode. */ + inode->i_attributes = le32_to_cpu(disk_dentry->attributes); + inode->i_security_id = le32_to_cpu(disk_dentry->security_id); + dentry->subdir_offset = le64_to_cpu(disk_dentry->subdir_offset); + dentry->d_unused_1 = le64_to_cpu(disk_dentry->unused_1); + dentry->d_unused_2 = le64_to_cpu(disk_dentry->unused_2); + inode->i_creation_time = le64_to_cpu(disk_dentry->creation_time); + inode->i_last_access_time = le64_to_cpu(disk_dentry->last_access_time); + inode->i_last_write_time = le64_to_cpu(disk_dentry->last_write_time); + copy_hash(inode->i_hash, disk_dentry->unnamed_stream_hash); + + /* I don't know what's going on here. It seems like M$ screwed up the + * reparse points, then put the fields in the same place and didn't + * document it. So we have some fields we read for reparse points, and + * some fields in the same place for non-reparse-points. */ + if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { + inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->reparse.rp_unknown_1); + inode->i_reparse_tag = le32_to_cpu(disk_dentry->reparse.reparse_tag); + inode->i_rp_unknown_2 = le16_to_cpu(disk_dentry->reparse.rp_unknown_2); + inode->i_not_rpfixed = le16_to_cpu(disk_dentry->reparse.not_rpfixed); + /* Leave inode->i_ino at 0. Note that this means the WIM file + * cannot archive hard-linked reparse points. Such a thing + * doesn't really make sense anyway, although I believe it's + * theoretically possible to have them on NTFS. */ + } else { + inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->nonreparse.rp_unknown_1); + inode->i_ino = le64_to_cpu(disk_dentry->nonreparse.hard_link_group_id); + } + inode->i_num_ads = le16_to_cpu(disk_dentry->num_alternate_data_streams); - /* The reparse_reserved field does not actually exist. */ + /* Now onto reading the names. There are two of them: the (long) file + * name, and the short name. */ - p = get_u64(p, &dentry->hard_link); - - /* Currently ignoring streams. */ - p += sizeof(u16); + short_name_nbytes = le16_to_cpu(disk_dentry->short_name_nbytes); + file_name_nbytes = le16_to_cpu(disk_dentry->file_name_nbytes); - p = get_u16(p, &short_name_len); - p = get_u16(p, &file_name_len); + if (unlikely((short_name_nbytes & 1) | (file_name_nbytes & 1))) { + ERROR("Dentry name is not valid UTF-16 (odd number of bytes)!"); + ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; + goto err_free_dentry; + } - calculated_size = WIM_DENTRY_DISK_SIZE + file_name_len + 2 + - short_name_len; + /* We now know the length of the file name and short name. Make sure + * the length of the dentry is large enough to actually hold them. + * + * The calculated length here is unaligned to allow for the possibility + * that the dentry->length names an unaligned length, although this + * would be unexpected. */ + calculated_size = dentry_correct_length_unaligned(file_name_nbytes, + short_name_nbytes); - if (dentry->length < calculated_size) { + if (unlikely(dentry->length < calculated_size)) { ERROR("Unexpected end of directory entry! (Expected " - "%"PRIu64" bytes, got %"PRIu64" bytes. " - "short_name_len = %hu, file_name_len = %hu)", - calculated_size, dentry->length, - short_name_len, file_name_len); - return WIMLIB_ERR_INVALID_DENTRY; - } - - /* Read the filename. */ - file_name = MALLOC(file_name_len); - if (!file_name) { - ERROR("Failed to allocate %hu bytes for dentry file name", - file_name_len); - return WIMLIB_ERR_NOMEM; + "at least %"PRIu64" bytes, got %"PRIu64" bytes.)", + calculated_size, dentry->length); + ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; + goto err_free_dentry; } - p = get_bytes(p, file_name_len, file_name); - /* Convert filename to UTF-8. */ - file_name_utf8 = utf16_to_utf8(file_name, file_name_len, - &file_name_utf8_len); + /* Advance p to point past the base dentry, to the first name. */ + p += sizeof(struct wim_dentry_on_disk); - if (!file_name_utf8) { - ERROR("Failed to allocate memory to convert UTF-16 " - "filename (%hu bytes) to UTF-8", file_name_len); - goto out_free_file_name; + /* Read the filename if present. Note: if the filename is empty, there + * is no null terminator following it. */ + if (file_name_nbytes) { + dentry->file_name = MALLOC(file_name_nbytes + 2); + if (dentry->file_name == NULL) { + ret = WIMLIB_ERR_NOMEM; + goto err_free_dentry; + } + dentry->file_name_nbytes = file_name_nbytes; + memcpy(dentry->file_name, p, file_name_nbytes); + p += file_name_nbytes + 2; + dentry->file_name[file_name_nbytes / 2] = cpu_to_le16(0); } - /* Undocumented padding between file name and short name. This probably - * is supposed to be a terminating null character. */ - p += 2; - - /* Read the short filename. */ - short_name = MALLOC(short_name_len); - if (!short_name) { - ERROR("Failed to allocate %hu bytes for short filename", - short_name_len); - goto out_free_file_name_utf8; + /* Read the short filename if present. Note: if there is no short + * filename, there is no null terminator following it. */ + if (short_name_nbytes) { + dentry->short_name = MALLOC(short_name_nbytes + 2); + if (dentry->short_name == NULL) { + ret = WIMLIB_ERR_NOMEM; + goto err_free_dentry; + } + dentry->short_name_nbytes = short_name_nbytes; + memcpy(dentry->short_name, p, short_name_nbytes); + p += short_name_nbytes + 2; + dentry->short_name[short_name_nbytes / 2] = cpu_to_le16(0); } - get_bytes(p, short_name_len, short_name); + /* Align the dentry length. */ + dentry->length = (dentry->length + 7) & ~7; + + /* Read the alternate data streams, if present. inode->i_num_ads tells + * us how many they are, and they will directly follow the dentry in the + * metadata resource buffer. + * + * Note that each alternate data stream entry begins on an 8-byte + * aligned boundary, and the alternate data stream entries seem to NOT + * be included in the dentry->length field for some reason. */ + if (unlikely(inode->i_num_ads != 0)) { + ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; + if (offset + dentry->length > buf_len || + (ret = read_ads_entries(&buf[offset + dentry->length], + inode, + buf_len - offset - dentry->length))) + { + ERROR("Failed to read alternate data stream " + "entries of WIM dentry \"%"WS"\"", + dentry->file_name); + goto err_free_dentry; + } + } - dentry->file_name = file_name; - dentry->file_name_utf8 = file_name_utf8; - dentry->short_name = short_name; - dentry->file_name_len = file_name_len; - dentry->file_name_utf8_len = file_name_utf8_len; - dentry->short_name_len = short_name_len; + *dentry_ret = dentry; return 0; -out_free_file_name_utf8: - FREE(dentry->file_name_utf8); -out_free_file_name: - FREE(dentry->file_name); - return WIMLIB_ERR_NOMEM; + +err_free_dentry: + free_dentry(dentry); + return ret; } -/* - * Writes a dentry to an output buffer. - * - * @dentry: The dentry structure. - * @p: The memory location to write the data to. - * @return: True on success, false on failure. - */ -static u8 *write_dentry(const struct dentry *dentry, u8 *p) +static const tchar * +dentry_get_file_type_string(const struct wim_dentry *dentry) { - u8 *orig_p = p; - memset(p, 0, dentry->length); - p = put_u64(p, dentry->length); - p = put_u32(p, dentry->attributes); -#ifdef ENABLE_SECURITY_DATA - p = put_u32(p, dentry->security_id); -#else - p = put_u32(p, (u32)(-1)); -#endif - p = put_u64(p, dentry->subdir_offset); - p = put_u64(p, 0); /* unused1 */ - p = put_u64(p, 0); /* unused2 */ - p = put_u64(p, dentry->creation_time); - p = put_u64(p, dentry->last_access_time); - p = put_u64(p, dentry->last_write_time); - if (!is_empty_file_hash(dentry->hash)) - memcpy(p, dentry->hash, WIM_HASH_SIZE); + const struct wim_inode *inode = dentry->d_inode; + if (inode_is_directory(inode)) + return T("directory"); + else if (inode_is_symlink(inode)) + return T("symbolic link"); else - DEBUG("zero hash for %s\n", dentry->file_name_utf8); - p += WIM_HASH_SIZE; - p = put_u32(p, 0); /* reparse_tag */ - p = put_u64(p, dentry->hard_link); - p = put_u16(p, 0); /*streams */ - p = put_u16(p, dentry->short_name_len); - p = put_u16(p, dentry->file_name_len); - p = put_bytes(p, dentry->file_name_len, (u8*)dentry->file_name); - p = put_u16(p, 0); /* filename padding, 2 bytes. */ - p = put_bytes(p, dentry->short_name_len, (u8*)dentry->short_name); - return orig_p + dentry->length; -} - -/* Recursive function that writes a dentry tree rooted at @tree, not including - * @tree itself, which has already been written, except in the case of the root - * dentry, which is written right away, along with an end-of-directory entry. */ -u8 *write_dentry_tree(const struct dentry *tree, u8 *p) -{ - const struct dentry *child; - - if (dentry_is_root(tree)) { - p = write_dentry(tree, p); - - /* write end of directory entry */ - p = put_u64(p, 0); - } else { - /* Nothing to do for a regular file. */ - if (dentry_is_regular_file(tree)) - return p; + return T("file"); +} + +static bool +dentry_is_dot_or_dotdot(const struct wim_dentry *dentry) +{ + if (dentry->file_name_nbytes <= 4) { + if (dentry->file_name_nbytes == 4) { + if (dentry->file_name[0] == cpu_to_le16('.') && + dentry->file_name[1] == cpu_to_le16('.')) + return true; + } else if (dentry->file_name_nbytes == 2) { + if (dentry->file_name[0] == cpu_to_le16('.')) + return true; + } } + return false; +} - /* Write child dentries and end-of-directory entry. */ - child = tree->children; - if (child) { - do { - p = write_dentry(child, p); - child = child->next; - } while (child != tree->children); +static int +read_dentry_tree_recursive(const u8 * restrict buf, size_t buf_len, + struct wim_dentry * restrict dir) +{ + u64 cur_offset = dir->subdir_offset; + + /* Check for cyclic directory structure, which would cause infinite + * recursion if not handled. */ + for (struct wim_dentry *d = dir->parent; + !dentry_is_root(d); d = d->parent) + { + if (unlikely(d->subdir_offset == cur_offset)) { + ERROR("Cyclic directory structure detected: children " + "of \"%"TS"\" coincide with children of \"%"TS"\"", + dentry_full_path(dir), dentry_full_path(d)); + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; + } } - /* write end of directory entry */ - p = put_u64(p, 0); + for (;;) { + struct wim_dentry *child; + struct wim_dentry *duplicate; + int ret; - /* Recurse on children. */ - if (child) { - do { - p = write_dentry_tree(child, p); - child = child->next; - } while (child != tree->children); + /* Read next child of @dir. */ + ret = read_dentry(buf, buf_len, cur_offset, &child); + if (ret) + return ret; + + /* Check for end of directory. */ + if (child == NULL) + return 0; + + /* Advance to the offset of the next child. Note: We need to + * advance by the TOTAL length of the dentry, not by the length + * child->length, which although it does take into account the + * padding, it DOES NOT take into account alternate stream + * entries. */ + cur_offset += dentry_in_total_length(child); + + /* All dentries except the root should be named. */ + if (unlikely(!dentry_has_long_name(child))) { + WARNING("Ignoring unnamed dentry in " + "directory \"%"TS"\"", dentry_full_path(dir)); + free_dentry(child); + continue; + } + + /* Don't allow files named "." or "..". */ + if (unlikely(dentry_is_dot_or_dotdot(child))) { + WARNING("Ignoring file named \".\" or \"..\"; " + "potentially malicious archive!!!"); + free_dentry(child); + continue; + } + + /* Link the child into the directory. */ + duplicate = dentry_add_child(dir, child); + if (unlikely(duplicate)) { + /* We already found a dentry with this same + * case-sensitive long name. Only keep the first one. + */ + const tchar *child_type, *duplicate_type; + child_type = dentry_get_file_type_string(child); + duplicate_type = dentry_get_file_type_string(duplicate); + WARNING("Ignoring duplicate %"TS" \"%"TS"\" " + "(the WIM image already contains a %"TS" " + "at that path with the exact same name)", + child_type, dentry_full_path(duplicate), + duplicate_type); + free_dentry(child); + continue; + } + + /* If this child is a directory that itself has children, call + * this procedure recursively. */ + if (child->subdir_offset != 0) { + if (likely(dentry_is_directory(child))) { + ret = read_dentry_tree_recursive(buf, + buf_len, + child); + if (ret) + return ret; + } else { + WARNING("Ignoring children of " + "non-directory file \"%"TS"\"", + dentry_full_path(child)); + } + } } - return p; } -/* Reads the children of a dentry, and all their children, ..., etc. from the - * metadata resource and into the dentry tree. +/* + * Read a tree of dentries (directory entries) from a WIM metadata resource. + * + * @buf: + * Buffer containing an uncompressed WIM metadata resource. + * + * @buf_len: + * Length of the uncompressed metadata resource, in bytes. * - * @metadata_resource: An array that contains the uncompressed metadata - * resource for the WIM file. - * @metadata_resource_len: The length of @metadata_resource. - * @dentry: A pointer to a struct dentry that is the root of the directory - * tree and has already been read from the metadata resource. It - * does not need to be the real root because this procedure is - * called recursively. + * @root_offset + * Offset in the metadata resource of the root of the dentry tree. * - * @return: Zero on success, nonzero on failure. + * @root_ret: + * On success, either NULL or a pointer to the root dentry is written to + * this location. The former case only occurs in the unexpected case that + * the tree began with an end-of-directory entry. + * + * Return values: + * WIMLIB_ERR_SUCCESS (0) + * WIMLIB_ERR_INVALID_METADATA_RESOURCE + * WIMLIB_ERR_NOMEM */ -int read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, - struct dentry *dentry) -{ - u64 cur_offset = dentry->subdir_offset; - struct dentry *prev_child = NULL; - struct dentry *first_child = NULL; - struct dentry *child; - struct dentry cur_child; +int +read_dentry_tree(const u8 *buf, size_t buf_len, + u64 root_offset, struct wim_dentry **root_ret) +{ int ret; + struct wim_dentry *root; - /* If @dentry is a regular file, nothing more needs to be done for this - * branch. */ - if (cur_offset == 0) - return 0; + DEBUG("Reading dentry tree (root_offset=%"PRIu64")", root_offset); - /* Find and read all the children of @dentry. */ - while (1) { - - /* Read next child of @dentry into @cur_child. */ - ret = read_dentry(metadata_resource, metadata_resource_len, - cur_offset, &cur_child); - if (ret != 0) - break; + ret = read_dentry(buf, buf_len, root_offset, &root); + if (ret) + return ret; - /* Check for end of directory. */ - if (cur_child.length == 0) { - ret = 0; - break; + if (likely(root != NULL)) { + if (unlikely(dentry_has_long_name(root) || + dentry_has_short_name(root))) + { + WARNING("The root directory has a nonempty name; " + "removing it."); + FREE(root->file_name); + FREE(root->short_name); + root->file_name = NULL; + root->short_name = NULL; + root->file_name_nbytes = 0; + root->short_name_nbytes = 0; } - /* Not end of directory. Allocate this child permanently and - * link it to the parent and previous child. */ - child = MALLOC(sizeof(struct dentry)); - if (!child) { - ERROR("Failed to allocate %zu bytes for new dentry", - sizeof(struct dentry)); - ret = WIMLIB_ERR_NOMEM; - break; + if (unlikely(!dentry_is_directory(root))) { + ERROR("The root of the WIM image is not a directory!"); + ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; + goto err_free_dentry_tree; } - memcpy(child, &cur_child, sizeof(struct dentry)); - if (prev_child) { - prev_child->next = child; - child->prev = prev_child; - } else { - first_child = child; + if (likely(root->subdir_offset != 0)) { + ret = read_dentry_tree_recursive(buf, buf_len, root); + if (ret) + goto err_free_dentry_tree; } + } else { + WARNING("The metadata resource has no directory entries; " + "treating as an empty image."); + } + *root_ret = root; + return 0; - child->parent = dentry; - prev_child = child; +err_free_dentry_tree: + free_dentry_tree(root, NULL); + return ret; +} - /* If there are children of this child, call this procedure - * recursively. */ - if (child->subdir_offset != 0) { - ret = read_dentry_tree(metadata_resource, - metadata_resource_len, child); - if (ret != 0) - break; - } +/* + * Writes a WIM alternate data stream (ADS) entry to an output buffer. + * + * @ads_entry: The ADS entry structure. + * @hash: The hash field to use (instead of the one in the ADS entry). + * @p: The memory location to write the data to. + * + * Returns a pointer to the byte after the last byte written. + */ +static u8 * +write_ads_entry(const struct wim_ads_entry *ads_entry, + const u8 *hash, u8 * restrict p) +{ + struct wim_ads_entry_on_disk *disk_ads_entry = + (struct wim_ads_entry_on_disk*)p; + u8 *orig_p = p; + + disk_ads_entry->reserved = cpu_to_le64(ads_entry->reserved); + copy_hash(disk_ads_entry->hash, hash); + disk_ads_entry->stream_name_nbytes = cpu_to_le16(ads_entry->stream_name_nbytes); + p += sizeof(struct wim_ads_entry_on_disk); + if (ads_entry->stream_name_nbytes) { + p = mempcpy(p, ads_entry->stream_name, + ads_entry->stream_name_nbytes + 2); + } + /* Align to 8-byte boundary */ + while ((uintptr_t)p & 7) + *p++ = 0; + disk_ads_entry->length = cpu_to_le64(p - orig_p); + return p; +} - /* Advance to the offset of the next child. */ - cur_offset += child->length; +/* + * Writes a WIM dentry to an output buffer. + * + * @dentry: The dentry structure. + * @p: The memory location to write the data to. + * + * Returns the pointer to the byte after the last byte we wrote as part of the + * dentry, including any alternate data stream entries. + */ +static u8 * +write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p) +{ + const struct wim_inode *inode; + struct wim_dentry_on_disk *disk_dentry; + const u8 *orig_p; + const u8 *hash; + bool use_dummy_stream; + u16 num_ads; + + wimlib_assert(((uintptr_t)p & 7) == 0); /* 8 byte aligned */ + orig_p = p; + + inode = dentry->d_inode; + use_dummy_stream = inode_needs_dummy_stream(inode); + disk_dentry = (struct wim_dentry_on_disk*)p; + + disk_dentry->attributes = cpu_to_le32(inode->i_attributes); + disk_dentry->security_id = cpu_to_le32(inode->i_security_id); + disk_dentry->subdir_offset = cpu_to_le64(dentry->subdir_offset); + disk_dentry->unused_1 = cpu_to_le64(dentry->d_unused_1); + disk_dentry->unused_2 = cpu_to_le64(dentry->d_unused_2); + disk_dentry->creation_time = cpu_to_le64(inode->i_creation_time); + disk_dentry->last_access_time = cpu_to_le64(inode->i_last_access_time); + disk_dentry->last_write_time = cpu_to_le64(inode->i_last_write_time); + if (use_dummy_stream) + hash = zero_hash; + else + hash = inode_stream_hash(inode, 0); + copy_hash(disk_dentry->unnamed_stream_hash, hash); + if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { + disk_dentry->reparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1); + disk_dentry->reparse.reparse_tag = cpu_to_le32(inode->i_reparse_tag); + disk_dentry->reparse.rp_unknown_2 = cpu_to_le16(inode->i_rp_unknown_2); + disk_dentry->reparse.not_rpfixed = cpu_to_le16(inode->i_not_rpfixed); + } else { + disk_dentry->nonreparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1); + disk_dentry->nonreparse.hard_link_group_id = + cpu_to_le64((inode->i_nlink == 1) ? 0 : inode->i_ino); + } + num_ads = inode->i_num_ads; + if (use_dummy_stream) + num_ads++; + disk_dentry->num_alternate_data_streams = cpu_to_le16(num_ads); + disk_dentry->short_name_nbytes = cpu_to_le16(dentry->short_name_nbytes); + disk_dentry->file_name_nbytes = cpu_to_le16(dentry->file_name_nbytes); + p += sizeof(struct wim_dentry_on_disk); + + wimlib_assert(dentry_is_root(dentry) != dentry_has_long_name(dentry)); + + if (dentry_has_long_name(dentry)) + p = mempcpy(p, dentry->file_name, dentry->file_name_nbytes + 2); + + if (dentry_has_short_name(dentry)) + p = mempcpy(p, dentry->short_name, dentry->short_name_nbytes + 2); + + /* Align to 8-byte boundary */ + while ((uintptr_t)p & 7) + *p++ = 0; + + /* We calculate the correct length of the dentry ourselves because the + * dentry->length field may been set to an unexpected value from when we + * read the dentry in (for example, there may have been unknown data + * appended to the end of the dentry...). Furthermore, the dentry may + * have been renamed, thus changing its needed length. */ + disk_dentry->length = cpu_to_le64(p - orig_p); + + if (use_dummy_stream) { + hash = inode_unnamed_stream_hash(inode); + p = write_ads_entry(&(struct wim_ads_entry){}, hash, p); } - /* Link last child to first one, and set parent's - * children pointer to the first child. */ - if (prev_child) { - prev_child->next = first_child; - first_child->prev = prev_child; + /* Write the alternate data streams entries, if any. */ + for (u16 i = 0; i < inode->i_num_ads; i++) { + hash = inode_stream_hash(inode, i + 1); + p = write_ads_entry(&inode->i_ads_entries[i], hash, p); } - dentry->children = first_child; - return ret; + + return p; +} + +static int +write_dentry_cb(struct wim_dentry *dentry, void *_p) +{ + u8 **p = _p; + *p = write_dentry(dentry, *p); + return 0; +} + +static u8 * +write_dentry_tree_recursive(const struct wim_dentry *parent, u8 *p); + +static int +write_dentry_tree_recursive_cb(struct wim_dentry *dentry, void *_p) +{ + u8 **p = _p; + *p = write_dentry_tree_recursive(dentry, *p); + return 0; +} + +/* Recursive function that writes a dentry tree rooted at @parent, not including + * @parent itself, which has already been written. */ +static u8 * +write_dentry_tree_recursive(const struct wim_dentry *parent, u8 *p) +{ + /* Nothing to do if this dentry has no children. */ + if (parent->subdir_offset == 0) + return p; + + /* Write child dentries and end-of-directory entry. + * + * Note: we need to write all of this dentry's children before + * recursively writing the directory trees rooted at each of the child + * dentries, since the on-disk dentries for a dentry's children are + * always located at consecutive positions in the metadata resource! */ + for_dentry_child(parent, write_dentry_cb, &p); + + /* write end of directory entry */ + *(le64*)p = cpu_to_le64(0); + p += 8; + + /* Recurse on children. */ + for_dentry_child(parent, write_dentry_tree_recursive_cb, &p); + return p; +} + +/* Writes a directory tree to the metadata resource. + * + * @root: Root of the dentry tree. + * @p: Pointer to a buffer with enough space for the dentry tree. + * + * Returns pointer to the byte after the last byte we wrote. + */ +u8 * +write_dentry_tree(const struct wim_dentry * restrict root, u8 * restrict p) +{ + DEBUG("Writing dentry tree."); + wimlib_assert(dentry_is_root(root)); + + /* If we're the root dentry, we have no parent that already + * wrote us, so we need to write ourselves. */ + p = write_dentry(root, p); + + /* Write end of directory entry after the root dentry just to be safe; + * however the root dentry obviously cannot have any siblings. */ + *(le64*)p = cpu_to_le64(0); + p += 8; + + /* Recursively write the rest of the dentry tree. */ + return write_dentry_tree_recursive(root, p); }