X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fdentry.c;h=e87bfefc0b46046ecdef6622e24e9f7f5933e191;hp=559d7e8ba71c5fca359c750a5cf94f86a0db3fb7;hb=d7aa64b64fc9836418293781b2b093f090414b6e;hpb=6cca349b45e66a2b2b82aa5dcd269a4bf61c50db diff --git a/src/dentry.c b/src/dentry.c index 559d7e8b..e87bfefc 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -1,16 +1,15 @@ /* * dentry.c * - * A dentry (directory entry) contains the metadata for a file. In the WIM file - * format, the dentries are stored in the "metadata resource" section right - * after the security data. Each image in the WIM file has its own metadata - * resource with its own security data and dentry tree. Dentries in different - * images may share file resources by referring to the same lookup table - * entries. + * In the WIM file format, the dentries are stored in the "metadata resource" + * section right after the security data. Each image in the WIM file has its + * own metadata resource with its own security data and dentry tree. Dentries + * in different images may share file resources by referring to the same lookup + * table entries. */ /* - * Copyright (C) 2012 Eric Biggers + * Copyright (C) 2012, 2013 Eric Biggers * * This file is part of wimlib, a library for working with WIM files. * @@ -27,30 +26,207 @@ * wimlib; if not, see http://www.gnu.org/licenses/. */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "wimlib.h" +#include "wimlib/dentry.h" +#include "wimlib/encoding.h" +#include "wimlib/endianness.h" +#include "wimlib/error.h" +#include "wimlib/lookup_table.h" +#include "wimlib/metadata.h" +#include "wimlib/paths.h" +#include "wimlib/resource.h" +#include "wimlib/security.h" +#include "wimlib/sha1.h" +#include "wimlib/timestamp.h" + #include -#include -#include -#include -#include "dentry.h" -#include "io.h" -#include "lookup_table.h" -#include "sha1.h" -#include "timestamp.h" -#include "wimlib_internal.h" +/* WIM alternate data stream entry (on-disk format) */ +struct wim_ads_entry_on_disk { + /* Length of the entry, in bytes. This apparently includes all + * fixed-length fields, plus the stream name and null terminator if + * present, and the padding up to an 8 byte boundary. wimlib is a + * little less strict when reading the entries, and only requires that + * the number of bytes from this field is at least as large as the size + * of the fixed length fields and stream name without null terminator. + * */ + le64 length; + + le64 reserved; + + /* SHA1 message digest of the uncompressed stream; or, alternatively, + * can be all zeroes if the stream has zero length. */ + u8 hash[SHA1_HASH_SIZE]; + + /* Length of the stream name, in bytes. 0 if the stream is unnamed. */ + le16 stream_name_nbytes; + + /* Stream name in UTF-16LE. It is @stream_name_nbytes bytes long, + * excluding the the null terminator. There is a null terminator + * character if @stream_name_nbytes != 0; i.e., if this stream is named. + * */ + utf16lechar stream_name[]; +} _packed_attribute; + +#define WIM_ADS_ENTRY_DISK_SIZE 38 + +/* On-disk format of a WIM dentry (directory entry), located in the metadata + * resource for a WIM image. */ +struct wim_dentry_on_disk { + + /* Length of this directory entry in bytes, not including any alternate + * data stream entries. Should be a multiple of 8 so that the following + * dentry or alternate data stream entry is aligned on an 8-byte + * boundary. (If not, wimlib will round it up.) It must be at least as + * long as the fixed-length fields of the dentry (WIM_DENTRY_DISK_SIZE), + * plus the lengths of the file name and/or short name if present. + * + * It is also possible for this field to be 0. This situation, which is + * undocumented, indicates the end of a list of sibling nodes in a + * directory. It also means the real length is 8, because the dentry + * included only the length field, but that takes up 8 bytes. */ + le64 length; + + /* Attributes of the file or directory. This is a bitwise OR of the + * FILE_ATTRIBUTE_* constants and should correspond to the value + * retrieved by GetFileAttributes() on Windows. */ + le32 attributes; + + /* A value that specifies the security descriptor for this file or + * directory. If -1, the file or directory has no security descriptor. + * Otherwise, it is a 0-based index into the WIM image's table of + * security descriptors (see: `struct wim_security_data') */ + sle32 security_id; + + /* Offset, in bytes, from the start of the uncompressed metadata + * resource of this directory's child directory entries, or 0 if this + * directory entry does not correspond to a directory or otherwise does + * not have any children. */ + le64 subdir_offset; + + /* Reserved fields */ + le64 unused_1; + le64 unused_2; + + + /* Creation time, last access time, and last write time, in + * 100-nanosecond intervals since 12:00 a.m UTC January 1, 1601. They + * should correspond to the times gotten by calling GetFileTime() on + * Windows. */ + le64 creation_time; + le64 last_access_time; + le64 last_write_time; + + /* Vaguely, the SHA-1 message digest ("hash") of the file's contents. + * More specifically, this is for the "unnamed data stream" rather than + * any "alternate data streams". This hash value is used to look up the + * corresponding entry in the WIM's stream lookup table to actually find + * the file contents within the WIM. + * + * If the file has no unnamed data stream (e.g. is a directory), then + * this field will be all zeroes. If the unnamed data stream is empty + * (i.e. an "empty file"), then this field is also expected to be all + * zeroes. (It will be if wimlib created the WIM image, at least; + * otherwise it can't be ruled out that the SHA-1 message digest of 0 + * bytes of data is given explicitly.) + * + * If the file has reparse data, then this field will instead specify + * the SHA-1 message digest of the reparse data. If it is somehow + * possible for a file to have both an unnamed data stream and reparse + * data, then this is not handled by wimlib. + * + * As a further special case, if this field is all zeroes but there is + * an alternate data stream entry with no name and a nonzero SHA-1 + * message digest field, then that hash must be used instead of this + * one. In fact, when named data streams are present, some versions of + * Windows PE contain a bug where they only look in the alternate data + * stream entries for the unnamed data stream, not here. + */ + u8 unnamed_stream_hash[SHA1_HASH_SIZE]; + /* The format of the following data is not yet completely known and they + * do not correspond to Microsoft's documentation. + * + * If this directory entry is for a reparse point (has + * FILE_ATTRIBUTE_REPARSE_POINT set in the attributes field), then the + * version of the following fields containing the reparse tag is valid. + * Furthermore, the field notated as not_rpfixed, as far as I can tell, + * is supposed to be set to 1 if reparse point fixups (a.k.a. fixing the + * targets of absolute symbolic links) were *not* done, and otherwise 0. + * + * If this directory entry is not for a reparse point, then the version + * of the following fields containing the hard_link_group_id is valid. + * All MS says about this field is that "If this file is part of a hard + * link set, all the directory entries in the set will share the same + * value in this field.". However, more specifically I have observed + * the following: + * - If the file is part of a hard link set of size 1, then the + * hard_link_group_id should be set to either 0, which is treated + * specially as indicating "not hardlinked", or any unique value. + * - The specific nonzero values used to identity hard link sets do + * not matter, as long as they are unique. + * - However, due to bugs in Microsoft's software, it is actually NOT + * guaranteed that directory entries that share the same hard link + * group ID are actually hard linked to each either. We have to + * handle this by using special code to use distinguishing features + * (which is possible because some information about the underlying + * inode is repeated in each dentry) to split up these fake hard link + * groups into what they actually are supposed to be. + */ + union { + struct { + le32 rp_unknown_1; + le32 reparse_tag; + le16 rp_unknown_2; + le16 not_rpfixed; + } _packed_attribute reparse; + struct { + le32 rp_unknown_1; + le64 hard_link_group_id; + } _packed_attribute nonreparse; + }; + + /* Number of alternate data stream entries that directly follow this + * dentry on-disk. */ + le16 num_alternate_data_streams; + + /* Length of this file's UTF-16LE encoded short name (8.3 DOS-compatible + * name), if present, in bytes, excluding the null terminator. If this + * file has no short name, then this field should be 0. */ + le16 short_name_nbytes; + + /* Length of this file's UTF-16LE encoded "long" name, excluding the + * null terminator. If this file has no short name, then this field + * should be 0. It's expected that only the root dentry has this field + * set to 0. */ + le16 file_name_nbytes; + + /* Followed by variable length file name, in UTF16-LE, if + * file_name_nbytes != 0. Includes null terminator. */ + /*utf16lechar file_name[];*/ + + /* Followed by variable length short name, in UTF16-LE, if + * short_name_nbytes != 0. Includes null terminator. */ + /*utf16lechar short_name[];*/ +} _packed_attribute; + +#define WIM_DENTRY_DISK_SIZE 102 /* Calculates the unaligned length, in bytes, of an on-disk WIM dentry that has * a file name and short name that take the specified numbers of bytes. This * excludes any alternate data stream entries that may follow the dentry. */ -static u64 __dentry_correct_length_unaligned(u16 file_name_len, - u16 short_name_len) -{ - u64 length = WIM_DENTRY_DISK_SIZE; - if (file_name_len) - length += file_name_len + 2; - if (short_name_len) - length += short_name_len + 2; +static u64 +dentry_correct_length_unaligned(u16 file_name_nbytes, u16 short_name_nbytes) +{ + u64 length = sizeof(struct wim_dentry_on_disk); + if (file_name_nbytes) + length += file_name_nbytes + 2; + if (short_name_nbytes) + length += short_name_nbytes + 2; return length; } @@ -58,220 +234,157 @@ static u64 __dentry_correct_length_unaligned(u16 file_name_len, * the file name length and short name length. Note that dentry->length is * ignored; also, this excludes any alternate data stream entries that may * follow the dentry. */ -static u64 dentry_correct_length_unaligned(const struct dentry *dentry) -{ - return __dentry_correct_length_unaligned(dentry->file_name_len, - dentry->short_name_len); -} - -/* Return the "correct" value to write in the length field of a WIM dentry, - * based on the file name length and short name length. */ -static u64 dentry_correct_length(const struct dentry *dentry) -{ - return (dentry_correct_length_unaligned(dentry) + 7) & ~7; -} - -/* Return %true iff @dentry has the UTF-8 file name @name that has length - * @name_len bytes. */ -static bool dentry_has_name(const struct dentry *dentry, const char *name, - size_t name_len) +static u64 +dentry_correct_length_aligned(const struct wim_dentry *dentry) { - if (dentry->file_name_utf8_len != name_len) - return false; - return memcmp(dentry->file_name_utf8, name, name_len) == 0; -} + u64 len; -/* Return %true iff the alternate data stream entry @entry has the UTF-8 stream - * name @name that has length @name_len bytes. */ -static inline bool ads_entry_has_name(const struct ads_entry *entry, - const char *name, size_t name_len) -{ - if (entry->stream_name_utf8_len != name_len) - return false; - return memcmp(entry->stream_name_utf8, name, name_len) == 0; + len = dentry_correct_length_unaligned(dentry->file_name_nbytes, + dentry->short_name_nbytes); + return (len + 7) & ~7; } -/* Duplicates a UTF-8 name into UTF-8 and UTF-16 strings and returns the strings - * and their lengths in the pointer arguments */ -int get_names(char **name_utf16_ret, char **name_utf8_ret, - u16 *name_utf16_len_ret, u16 *name_utf8_len_ret, - const char *name) +/* Duplicates a string of system-dependent encoding into a UTF-16LE string and + * returns the string and its length, in bytes, in the pointer arguments. Frees + * any existing string at the return location before overwriting it. */ +static int +get_utf16le_name(const tchar *name, utf16lechar **name_utf16le_ret, + u16 *name_utf16le_nbytes_ret) { - size_t utf8_len; - size_t utf16_len; - char *name_utf16, *name_utf8; - - utf8_len = strlen(name); - - name_utf16 = utf8_to_utf16(name, utf8_len, &utf16_len); - - if (!name_utf16) + utf16lechar *name_utf16le; + size_t name_utf16le_nbytes; + int ret; +#if TCHAR_IS_UTF16LE + name_utf16le_nbytes = tstrlen(name) * sizeof(utf16lechar); + name_utf16le = MALLOC(name_utf16le_nbytes + sizeof(utf16lechar)); + if (name_utf16le == NULL) return WIMLIB_ERR_NOMEM; + memcpy(name_utf16le, name, name_utf16le_nbytes + sizeof(utf16lechar)); + ret = 0; +#else - name_utf8 = MALLOC(utf8_len + 1); - if (!name_utf8) { - FREE(name_utf8); - return WIMLIB_ERR_NOMEM; + ret = tstr_to_utf16le(name, tstrlen(name), &name_utf16le, + &name_utf16le_nbytes); + if (ret == 0) { + if (name_utf16le_nbytes > 0xffff) { + FREE(name_utf16le); + ERROR("Multibyte string \"%"TS"\" is too long!", name); + ret = WIMLIB_ERR_INVALID_UTF8_STRING; + } } - memcpy(name_utf8, name, utf8_len + 1); - FREE(*name_utf8_ret); - FREE(*name_utf16_ret); - *name_utf8_ret = name_utf8; - *name_utf16_ret = name_utf16; - *name_utf8_len_ret = utf8_len; - *name_utf16_len_ret = utf16_len; - return 0; +#endif + if (ret == 0) { + FREE(*name_utf16le_ret); + *name_utf16le_ret = name_utf16le; + *name_utf16le_nbytes_ret = name_utf16le_nbytes; + } + return ret; } -/* Changes the name of a dentry to @new_name. Only changes the file_name and - * file_name_utf8 fields; does not change the short_name, short_name_utf8, or - * full_path_utf8 fields. Also recalculates its length. */ -static int change_dentry_name(struct dentry *dentry, const char *new_name) +/* Sets the name of a WIM dentry from a multibyte string. */ +int +set_dentry_name(struct wim_dentry *dentry, const tchar *new_name) { int ret; - - ret = get_names(&dentry->file_name, &dentry->file_name_utf8, - &dentry->file_name_len, &dentry->file_name_utf8_len, - new_name); - FREE(dentry->short_name); - dentry->short_name_len = 0; - if (ret == 0) - dentry->length = dentry_correct_length(dentry); + ret = get_utf16le_name(new_name, &dentry->file_name, + &dentry->file_name_nbytes); + if (ret == 0) { + /* Clear the short name and recalculate the dentry length */ + if (dentry_has_short_name(dentry)) { + FREE(dentry->short_name); + dentry->short_name = NULL; + dentry->short_name_nbytes = 0; + } + } return ret; } -/* - * Changes the name of an alternate data stream */ -static int change_ads_name(struct ads_entry *entry, const char *new_name) -{ - return get_names(&entry->stream_name, &entry->stream_name_utf8, - &entry->stream_name_len, - &entry->stream_name_utf8_len, - new_name); -} - /* Returns the total length of a WIM alternate data stream entry on-disk, * including the stream name, the null terminator, AND the padding after the - * entry to align the next one (or the next dentry) on an 8-byte boundary. */ -static u64 ads_entry_total_length(const struct ads_entry *entry) + * entry to align the next ADS entry or dentry on an 8-byte boundary. */ +static u64 +ads_entry_total_length(const struct wim_ads_entry *entry) { - u64 len = WIM_ADS_ENTRY_DISK_SIZE; - if (entry->stream_name_len) - len += entry->stream_name_len + 2; + u64 len = sizeof(struct wim_ads_entry_on_disk); + if (entry->stream_name_nbytes) + len += entry->stream_name_nbytes + 2; return (len + 7) & ~7; } - -static u64 __dentry_total_length(const struct dentry *dentry, u64 length) -{ - const struct inode *inode = dentry->d_inode; - for (u16 i = 0; i < inode->num_ads; i++) - length += ads_entry_total_length(&inode->ads_entries[i]); - return (length + 7) & ~7; -} - -/* Calculate the aligned *total* length of an on-disk WIM dentry. This includes - * all alternate data streams. */ -u64 dentry_correct_total_length(const struct dentry *dentry) -{ - return __dentry_total_length(dentry, - dentry_correct_length_unaligned(dentry)); -} - -/* Like dentry_correct_total_length(), but use the existing dentry->length field - * instead of calculating its "correct" value. */ -static u64 dentry_total_length(const struct dentry *dentry) +/* + * Determine whether to include a "dummy" stream when writing a WIM dentry: + * + * Some versions of Microsoft's WIM software (the boot driver(s) in WinPE 3.0, + * for example) contain a bug where they assume the first alternate data stream + * (ADS) entry of a dentry with a nonzero ADS count specifies the unnamed + * stream, even if it has a name and the unnamed stream is already specified in + * the hash field of the dentry itself. + * + * wimlib has to work around this behavior by carefully emulating the behavior + * of (most versions of) ImageX/WIMGAPI, which move the unnamed stream reference + * into the alternate stream entries whenever there are named data streams, even + * though there is already a field in the dentry itself for the unnamed stream + * reference, which then goes to waste. + */ +static inline bool inode_needs_dummy_stream(const struct wim_inode *inode) { - return __dentry_total_length(dentry, dentry->length); + return (inode->i_num_ads > 0 && + inode->i_num_ads < 0xffff && /* overflow check */ + inode->i_canonical_streams); /* assume the dentry is okay if it + already had an unnamed ADS entry + when it was read in */ } -/* Transfers file attributes from a `stat' buffer to a WIM "inode". */ -void stbuf_to_inode(const struct stat *stbuf, struct inode *inode) +/* Calculate the total number of bytes that will be consumed when a WIM dentry + * is written. This includes base dentry and name fields as well as all + * alternate data stream entries and alignment bytes. */ +u64 +dentry_out_total_length(const struct wim_dentry *dentry) { - if (S_ISLNK(stbuf->st_mode)) { - inode->attributes = FILE_ATTRIBUTE_REPARSE_POINT; - inode->reparse_tag = WIM_IO_REPARSE_TAG_SYMLINK; - } else if (S_ISDIR(stbuf->st_mode)) { - inode->attributes = FILE_ATTRIBUTE_DIRECTORY; - } else { - inode->attributes = FILE_ATTRIBUTE_NORMAL; - } - if (sizeof(ino_t) >= 8) - inode->ino = (u64)stbuf->st_ino; - else - inode->ino = (u64)stbuf->st_ino | - ((u64)stbuf->st_dev << ((sizeof(ino_t) * 8) & 63)); - /* Set timestamps */ - inode->creation_time = timespec_to_wim_timestamp(&stbuf->st_mtim); - inode->last_write_time = timespec_to_wim_timestamp(&stbuf->st_mtim); - inode->last_access_time = timespec_to_wim_timestamp(&stbuf->st_atim); -} + u64 length = dentry_correct_length_aligned(dentry); + const struct wim_inode *inode = dentry->d_inode; -#ifdef WITH_FUSE -/* Transfers file attributes from a struct inode to a `stat' buffer. - * - * The lookup table entry tells us which stream in the inode we are statting. - * For a named data stream, everything returned is the same as the unnamed data - * stream except possibly the size and block count. */ -int inode_to_stbuf(const struct inode *inode, struct lookup_table_entry *lte, - struct stat *stbuf) -{ - if (inode_is_symlink(inode)) - stbuf->st_mode = S_IFLNK | 0777; - else if (inode_is_directory(inode)) - stbuf->st_mode = S_IFDIR | 0755; - else - stbuf->st_mode = S_IFREG | 0755; + if (inode_needs_dummy_stream(inode)) + length += ads_entry_total_length(&(struct wim_ads_entry){}); - stbuf->st_ino = (ino_t)inode->ino; - stbuf->st_nlink = inode->link_count; - stbuf->st_uid = getuid(); - stbuf->st_gid = getgid(); + for (u16 i = 0; i < inode->i_num_ads; i++) + length += ads_entry_total_length(&inode->i_ads_entries[i]); - if (lte) { - if (lte->resource_location == RESOURCE_IN_STAGING_FILE) { - wimlib_assert(lte->staging_file_name); - struct stat native_stat; - if (stat(lte->staging_file_name, &native_stat) != 0) { - DEBUG("Failed to stat `%s': %m", - lte->staging_file_name); - return -errno; - } - stbuf->st_size = native_stat.st_size; - } else { - stbuf->st_size = wim_resource_size(lte); - } - } else { - stbuf->st_size = 0; - } + return length; +} - stbuf->st_atime = wim_timestamp_to_unix(inode->last_access_time); - stbuf->st_mtime = wim_timestamp_to_unix(inode->last_write_time); - stbuf->st_ctime = wim_timestamp_to_unix(inode->creation_time); - stbuf->st_blocks = (stbuf->st_size + 511) / 512; - return 0; +/* Calculate the aligned, total length of a dentry, including all alternate data + * stream entries. Uses dentry->length. */ +static u64 +dentry_in_total_length(const struct wim_dentry *dentry) +{ + u64 length = dentry->length; + const struct wim_inode *inode = dentry->d_inode; + for (u16 i = 0; i < inode->i_num_ads; i++) + length += ads_entry_total_length(&inode->i_ads_entries[i]); + return (length + 7) & ~7; } -#endif -int for_dentry_in_rbtree(struct rb_node *root, - int (*visitor)(struct dentry *, void *), - void *arg) +int +for_dentry_in_rbtree(struct rb_node *root, + int (*visitor)(struct wim_dentry *, void *), + void *arg) { int ret; struct rb_node *node = root; LIST_HEAD(stack); - while (true) { + while (1) { if (node) { list_add(&rbnode_dentry(node)->tmp_list, &stack); node = node->rb_left; } else { struct list_head *next; - struct dentry *dentry; + struct wim_dentry *dentry; next = stack.next; if (next == &stack) return 0; - dentry = container_of(next, struct dentry, tmp_list); + dentry = container_of(next, struct wim_dentry, tmp_list); list_del(next); ret = visitor(dentry, arg); if (ret != 0) @@ -281,9 +394,10 @@ int for_dentry_in_rbtree(struct rb_node *root, } } -static int for_dentry_tree_in_rbtree_depth(struct rb_node *node, - int (*visitor)(struct dentry*, void*), - void *arg) +static int +for_dentry_tree_in_rbtree_depth(struct rb_node *node, + int (*visitor)(struct wim_dentry*, void*), + void *arg) { int ret; if (node) { @@ -302,306 +416,184 @@ static int for_dentry_tree_in_rbtree_depth(struct rb_node *node, return 0; } -/*#define RECURSIVE_FOR_DENTRY_IN_TREE*/ - -#ifdef RECURSIVE_FOR_DENTRY_IN_TREE -static int for_dentry_tree_in_rbtree(struct rb_node *node, - int (*visitor)(struct dentry*, void*), - void *arg) +static int +for_dentry_tree_in_rbtree(struct rb_node *node, + int (*visitor)(struct wim_dentry*, void*), + void *arg) { int ret; if (node) { ret = for_dentry_tree_in_rbtree(node->rb_left, visitor, arg); - if (ret != 0) + if (ret) return ret; ret = for_dentry_in_tree(rbnode_dentry(node), visitor, arg); - if (ret != 0) + if (ret) return ret; ret = for_dentry_tree_in_rbtree(node->rb_right, visitor, arg); - if (ret != 0) + if (ret) return ret; } return 0; } -#endif /* - * Calls a function on all directory entries in a WIM dentry tree. Logically, + * Iterate over all children of @dentry, calling the function @visitor, passing + * it a child dentry and the extra argument @arg. + * + * Note: this function iterates over ALL child dentries, even those with the + * same case-insensitive name. + * + * Note: this function clobbers the tmp_list field of the child dentries. */ +int +for_dentry_child(const struct wim_dentry *dentry, + int (*visitor)(struct wim_dentry *, void *), + void *arg) +{ + return for_dentry_in_rbtree(dentry->d_inode->i_children.rb_node, + visitor, + arg); +} + +/* Calls a function on all directory entries in a WIM dentry tree. Logically, * this is a pre-order traversal (the function is called on a parent dentry * before its children), but sibling dentries will be visited in order as well. - * - * In reality, the data structures are more complicated than the above might - * suggest because there is a separate red-black tree for each dentry that - * contains its direct children. - */ -int for_dentry_in_tree(struct dentry *root, - int (*visitor)(struct dentry*, void*), void *arg) + * */ +int +for_dentry_in_tree(struct wim_dentry *root, + int (*visitor)(struct wim_dentry*, void*), void *arg) { -#ifdef RECURSIVE_FOR_DENTRY_IN_TREE - int ret = visitor(root, arg); - if (ret != 0) - return ret; - return for_dentry_tree_in_rbtree(root->d_inode->children.rb_node, visitor, arg); -#else int ret; - struct list_head main_stack; - struct list_head sibling_stack; - struct list_head *sibling_stack_bottom; - struct dentry *main_dentry; - struct rb_node *node; - struct list_head *next_sibling; - struct dentry *dentry; - ret = visitor(root, arg); - if (ret != 0) + if (root == NULL) + return 0; + ret = (*visitor)(root, arg); + if (ret) return ret; - - main_dentry = root; - sibling_stack_bottom = &sibling_stack; - INIT_LIST_HEAD(&main_stack); - INIT_LIST_HEAD(&sibling_stack); - - list_add(&root->tmp_list, &main_stack); - node = root->d_inode->children.rb_node; - - while (1) { - // Prepare for non-recursive in-order traversal of the red-black - // tree of this dentry's children - - while (node) { - // Push this node to the sibling stack and examine the - // left neighbor, if any - list_add(&rbnode_dentry(node)->tmp_list, &sibling_stack); - node = node->rb_left; - } - - next_sibling = sibling_stack.next; - if (next_sibling == sibling_stack_bottom) { - // Done with all siblings. Pop the main dentry to move - // back up one level. - main_dentry = container_of(main_stack.next, - struct dentry, - tmp_list); - list_del(&main_dentry->tmp_list); - - if (main_dentry == root) - goto out; - - // Restore sibling stack bottom from the previous level - sibling_stack_bottom = (void*)main_dentry->parent; - - // Restore the just-popped main dentry's parent - main_dentry->parent = container_of(main_stack.next, - struct dentry, - tmp_list); - - // The next sibling to traverse in the previous level, - // in the in-order traversal of the red-black tree, is - // the one to the right. - node = main_dentry->rb_node.rb_right; - } else { - // The sibling stack is not empty, so there are more to - // go! - - // Pop a sibling from the stack. - list_del(next_sibling); - dentry = container_of(next_sibling, struct dentry, tmp_list); - - // Visit the sibling. - ret = visitor(dentry, arg); - if (ret != 0) { - // Failed. Restore parent pointers for the - // dentries in the main stack - list_for_each_entry(dentry, &main_stack, tmp_list) { - dentry->parent = container_of(dentry->tmp_list.next, - struct dentry, - tmp_list); - } - goto out; - } - - // We'd like to recursively visit the dentry tree rooted - // at this sibling. To do this, add it to the main - // stack, save the bottom of this level's sibling stack - // in the dentry->parent field, re-set the bottom of the - // sibling stack to be its current height, and set - // main_dentry to the sibling so it becomes the parent - // dentry in the next iteration through the outer loop. - if (inode_has_children(dentry->d_inode)) { - list_add(&dentry->tmp_list, &main_stack); - dentry->parent = (void*)sibling_stack_bottom; - sibling_stack_bottom = sibling_stack.next; - - main_dentry = dentry; - node = main_dentry->d_inode->children.rb_node; - } else { - node = dentry->rb_node.rb_right; - } - } - } -out: - root->parent = root; - return ret; -#endif + return for_dentry_tree_in_rbtree(root->d_inode->i_children.rb_node, + visitor, + arg); } -/* - * Like for_dentry_in_tree(), but the visitor function is always called on a - * dentry's children before on itself. - */ -int for_dentry_in_tree_depth(struct dentry *root, - int (*visitor)(struct dentry*, void*), void *arg) +/* Like for_dentry_in_tree(), but the visitor function is always called on a + * dentry's children before on itself. */ +int +for_dentry_in_tree_depth(struct wim_dentry *root, + int (*visitor)(struct wim_dentry*, void*), void *arg) { -#if 1 int ret; - ret = for_dentry_tree_in_rbtree_depth(root->d_inode->children.rb_node, + + if (root == NULL) + return 0; + ret = for_dentry_tree_in_rbtree_depth(root->d_inode->i_children.rb_node, visitor, arg); - if (ret != 0) + if (ret) return ret; - return visitor(root, arg); + return (*visitor)(root, arg); +} -#else +/* Calculate the full path of @dentry. The full path of its parent must have + * already been calculated, or it must be the root dentry. */ +int +calculate_dentry_full_path(struct wim_dentry *dentry) +{ + tchar *full_path; + u32 full_path_nbytes; int ret; - struct list_head main_stack; - struct list_head sibling_stack; - struct list_head *sibling_stack_bottom; - struct dentry *main_dentry; - struct rb_node *node; - struct list_head *next_sibling; - struct dentry *dentry; - main_dentry = root; - sibling_stack_bottom = &sibling_stack; - INIT_LIST_HEAD(&main_stack); - INIT_LIST_HEAD(&sibling_stack); - - list_add(&main_dentry->tmp_list, &main_stack); + if (dentry->_full_path) + return 0; - while (1) { - node = main_dentry->d_inode->children.rb_node; + if (dentry_is_root(dentry)) { + static const tchar _root_path[] = {WIM_PATH_SEPARATOR, T('\0')}; + full_path = TSTRDUP(_root_path); + if (full_path == NULL) + return WIMLIB_ERR_NOMEM; + full_path_nbytes = 1 * sizeof(tchar); + } else { + struct wim_dentry *parent; + tchar *parent_full_path; + u32 parent_full_path_nbytes; + size_t filename_nbytes; - while (1) { - if (node->rb_left) { - list_add(&rbnode_dentry(node)->tmp_list, &sibling_stack); - node = node->rb_left; - continue; - } - if (node->rb_right) { - list_add(&rbnode_dentry(node)->tmp_list, &sibling_stack); - node = node->rb_right; - continue; + parent = dentry->parent; + if (dentry_is_root(parent)) { + parent_full_path = T(""); + parent_full_path_nbytes = 0; + } else { + if (parent->_full_path == NULL) { + ret = calculate_dentry_full_path(parent); + if (ret) + return ret; } - list_add(&rbnode_dentry(node)->tmp_list, &sibling_stack); + parent_full_path = parent->_full_path; + parent_full_path_nbytes = parent->full_path_nbytes; } - pop_sibling: - next_sibling = sibling_stack.next; - if (next_sibling == sibling_stack_bottom) { - main_dentry = container_of(main_stack.next, - struct dentry, - tmp_list); - list_del(&main_dentry->tmp_list); - - - sibling_stack_bottom = (void*)main_dentry->parent; - - if (main_dentry == root) { - main_dentry->parent = main_dentry; - ret = visitor(dentry, arg); - return ret; - } else { - main_dentry->parent = container_of(main_stack.next, - struct dentry, - tmp_list); - } - - ret = visitor(main_dentry, arg); - - if (ret != 0) { - list_del(&root->tmp_list); - list_for_each_entry(dentry, &main_stack, tmp_list) { - dentry->parent = container_of(dentry->tmp_list.next, - struct dentry, - tmp_list); - } - root->parent = root; + /* Append this dentry's name as a tchar string to the full path + * of the parent followed by the path separator */ + #if TCHAR_IS_UTF16LE + filename_nbytes = dentry->file_name_nbytes; + #else + { + int ret = utf16le_to_tstr_nbytes(dentry->file_name, + dentry->file_name_nbytes, + &filename_nbytes); + if (ret) return ret; - } - goto pop_sibling; - } else { - - list_del(next_sibling); - dentry = container_of(next_sibling, struct dentry, tmp_list); - - - list_add(&dentry->tmp_list, &main_stack); - dentry->parent = (void*)sibling_stack_bottom; - sibling_stack_bottom = sibling_stack.next; - - main_dentry = dentry; } + #endif + + full_path_nbytes = parent_full_path_nbytes + sizeof(tchar) + + filename_nbytes; + full_path = MALLOC(full_path_nbytes + sizeof(tchar)); + if (full_path == NULL) + return WIMLIB_ERR_NOMEM; + memcpy(full_path, parent_full_path, parent_full_path_nbytes); + full_path[parent_full_path_nbytes / sizeof(tchar)] = WIM_PATH_SEPARATOR; + #if TCHAR_IS_UTF16LE + memcpy(&full_path[parent_full_path_nbytes / sizeof(tchar) + 1], + dentry->file_name, + filename_nbytes + sizeof(tchar)); + #else + utf16le_to_tstr_buf(dentry->file_name, + dentry->file_name_nbytes, + &full_path[parent_full_path_nbytes / + sizeof(tchar) + 1]); + #endif } -#endif + dentry->_full_path = full_path; + dentry->full_path_nbytes= full_path_nbytes; + return 0; } -/* - * Calculate the full path of @dentry, based on its parent's full path and on - * its UTF-8 file name. - */ -int calculate_dentry_full_path(struct dentry *dentry, void *ignore) +static int +do_calculate_dentry_full_path(struct wim_dentry *dentry, void *_ignore) { - char *full_path; - u32 full_path_len; - if (dentry_is_root(dentry)) { - full_path = MALLOC(2); - if (!full_path) - goto oom; - full_path[0] = '/'; - full_path[1] = '\0'; - full_path_len = 1; - } else { - char *parent_full_path; - u32 parent_full_path_len; - const struct dentry *parent = dentry->parent; + return calculate_dentry_full_path(dentry); +} - if (dentry_is_root(parent)) { - parent_full_path = ""; - parent_full_path_len = 0; - } else { - parent_full_path = parent->full_path_utf8; - parent_full_path_len = parent->full_path_utf8_len; - } +int +calculate_dentry_tree_full_paths(struct wim_dentry *root) +{ + return for_dentry_in_tree(root, do_calculate_dentry_full_path, NULL); +} - full_path_len = parent_full_path_len + 1 + - dentry->file_name_utf8_len; - full_path = MALLOC(full_path_len + 1); - if (!full_path) - goto oom; - - memcpy(full_path, parent_full_path, parent_full_path_len); - full_path[parent_full_path_len] = '/'; - memcpy(full_path + parent_full_path_len + 1, - dentry->file_name_utf8, - dentry->file_name_utf8_len); - full_path[full_path_len] = '\0'; - } - FREE(dentry->full_path_utf8); - dentry->full_path_utf8 = full_path; - dentry->full_path_utf8_len = full_path_len; - return 0; -oom: - ERROR("Out of memory while calculating dentry full path"); - return WIMLIB_ERR_NOMEM; +tchar * +dentry_full_path(struct wim_dentry *dentry) +{ + calculate_dentry_full_path(dentry); + return dentry->_full_path; } -static int increment_subdir_offset(struct dentry *dentry, void *subdir_offset_p) +static int +increment_subdir_offset(struct wim_dentry *dentry, void *subdir_offset_p) { - *(u64*)subdir_offset_p += dentry_correct_total_length(dentry); + *(u64*)subdir_offset_p += dentry_out_total_length(dentry); return 0; } -static int call_calculate_subdir_offsets(struct dentry *dentry, - void *subdir_offset_p) +static int +call_calculate_subdir_offsets(struct wim_dentry *dentry, void *subdir_offset_p) { calculate_subdir_offsets(dentry, subdir_offset_p); return 0; @@ -614,12 +606,13 @@ static int call_calculate_subdir_offsets(struct dentry *dentry, * @subdir_offset_p: The current subdirectory offset; i.e., the subdirectory * offset for @dentry. */ -void calculate_subdir_offsets(struct dentry *dentry, u64 *subdir_offset_p) +void +calculate_subdir_offsets(struct wim_dentry *dentry, u64 *subdir_offset_p) { struct rb_node *node; dentry->subdir_offset = *subdir_offset_p; - node = dentry->d_inode->children.rb_node; + node = dentry->d_inode->i_children.rb_node; if (node) { /* Advance the subdir offset by the amount of space the children * of this dentry take up. */ @@ -642,117 +635,330 @@ void calculate_subdir_offsets(struct dentry *dentry, u64 *subdir_offset_p) } } -static int compare_names(const char *name_1, u16 len_1, - const char *name_2, u16 len_2) +static int +dentry_compare_names_case_insensitive(const struct wim_dentry *d1, + const struct wim_dentry *d2) { - int result = strncasecmp(name_1, name_2, min(len_1, len_2)); - if (result) { - return result; - } else { - return (int)len_1 - (int)len_2; - } + return cmp_utf16le_strings(d1->file_name, + d1->file_name_nbytes / 2, + d2->file_name, + d2->file_name_nbytes / 2, + true); +} + +static int +dentry_compare_names_case_sensitive(const struct wim_dentry *d1, + const struct wim_dentry *d2) +{ + return cmp_utf16le_strings(d1->file_name, + d1->file_name_nbytes / 2, + d2->file_name, + d2->file_name_nbytes / 2, + false); } -static int dentry_compare_names(const struct dentry *d1, const struct dentry *d2) +/* Return %true iff the alternate data stream entry @entry has the UTF-16LE + * stream name @name that has length @name_nbytes bytes. */ +static inline bool +ads_entry_has_name(const struct wim_ads_entry *entry, + const utf16lechar *name, size_t name_nbytes, + bool ignore_case) { - return compare_names(d1->file_name_utf8, d1->file_name_utf8_len, - d2->file_name_utf8, d2->file_name_utf8_len); + return 0 == cmp_utf16le_strings(name, + name_nbytes / 2, + entry->stream_name, + entry->stream_name_nbytes / 2, + ignore_case); } +/* Default case sensitivity behavior for searches with + * WIMLIB_CASE_PLATFORM_DEFAULT specified. This can be modified by + * wimlib_global_init(). */ +bool default_ignore_case = +#ifdef __WIN32__ + true +#else + false +#endif +; -static struct dentry * -get_rbtree_child_with_name(const struct rb_node *node, - const char *name, size_t name_len) +static bool +will_ignore_case(CASE_SENSITIVITY_TYPE case_type) { - do { - struct dentry *child = rbnode_dentry(node); - int result = compare_names(name, name_len, - child->file_name_utf8, - child->file_name_utf8_len); - if (result < 0) + if (case_type == WIMLIB_CASE_SENSITIVE) + return false; + if (case_type == WIMLIB_CASE_INSENSITIVE) + return true; + + return default_ignore_case; +} + + +/* Given a UTF-16LE filename and a directory, look up the dentry for the file. + * Return it if found, otherwise NULL. This is case-sensitive on UNIX and + * case-insensitive on Windows. */ +struct wim_dentry * +get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry, + const utf16lechar *name, + size_t name_nbytes, + CASE_SENSITIVITY_TYPE case_ctype) +{ + struct rb_node *node; + + bool ignore_case = will_ignore_case(case_ctype); + + if (ignore_case) + node = dentry->d_inode->i_children_case_insensitive.rb_node; + else + node = dentry->d_inode->i_children.rb_node; + + struct wim_dentry *child; + while (node) { + if (ignore_case) + child = rb_entry(node, struct wim_dentry, rb_node_case_insensitive); + else + child = rb_entry(node, struct wim_dentry, rb_node); + + int result = cmp_utf16le_strings(name, + name_nbytes / 2, + child->file_name, + child->file_name_nbytes / 2, + ignore_case); + if (result < 0) { node = node->rb_left; - else if (result > 0) + } else if (result > 0) { node = node->rb_right; - else + } else if (!ignore_case || + list_empty(&child->case_insensitive_conflict_list)) { + return child; + } else { + /* Multiple dentries have the same case-insensitive + * name, and a case-insensitive lookup is being + * performed. Choose the dentry with the same + * case-sensitive name, if one exists; otherwise print a + * warning and choose one arbitrarily. */ + struct wim_dentry *alt = child; + size_t num_alts = 0; + + do { + num_alts++; + if (0 == cmp_utf16le_strings(name, + name_nbytes / 2, + alt->file_name, + alt->file_name_nbytes / 2, + false)) + return alt; + alt = list_entry(alt->case_insensitive_conflict_list.next, + struct wim_dentry, + case_insensitive_conflict_list); + } while (alt != child); + + WARNING("Result of case-insensitive lookup is ambiguous\n" + " (returning \"%"TS"\" of %zu " + "possible files, including \"%"TS"\")", + dentry_full_path(child), + num_alts, + dentry_full_path(list_entry(child->case_insensitive_conflict_list.next, + struct wim_dentry, + case_insensitive_conflict_list))); return child; - } while (node); + } + } return NULL; } -/* Returns the child of @dentry that has the file name @name. - * Returns NULL if no child has the name. */ -struct dentry *get_dentry_child_with_name(const struct dentry *dentry, - const char *name) +/* Returns the child of @dentry that has the file name @name. Returns NULL if + * no child has the name. */ +struct wim_dentry * +get_dentry_child_with_name(const struct wim_dentry *dentry, const tchar *name, + CASE_SENSITIVITY_TYPE case_type) { - struct rb_node *node = dentry->d_inode->children.rb_node; - if (node) - return get_rbtree_child_with_name(node, name, strlen(name)); - else - return NULL; +#if TCHAR_IS_UTF16LE + return get_dentry_child_with_utf16le_name(dentry, name, + tstrlen(name) * sizeof(tchar), + case_type); +#else + utf16lechar *utf16le_name; + size_t utf16le_name_nbytes; + int ret; + struct wim_dentry *child; + + ret = tstr_to_utf16le(name, tstrlen(name) * sizeof(tchar), + &utf16le_name, &utf16le_name_nbytes); + if (ret) { + child = NULL; + } else { + child = get_dentry_child_with_utf16le_name(dentry, + utf16le_name, + utf16le_name_nbytes, + case_type); + FREE(utf16le_name); + } + return child; +#endif } -/* Retrieves the dentry that has the UTF-8 @path relative to the dentry - * @cur_dentry. Returns NULL if no dentry having the path is found. */ -static struct dentry *get_dentry_relative_path(struct dentry *cur_dentry, - const char *path) +static struct wim_dentry * +get_dentry_utf16le(WIMStruct *wim, const utf16lechar *path, + CASE_SENSITIVITY_TYPE case_type) { - if (*path == '\0') - return cur_dentry; + struct wim_dentry *cur_dentry; + const utf16lechar *name_start, *name_end; - struct rb_node *node = cur_dentry->d_inode->children.rb_node; - if (node) { - struct dentry *child; - size_t base_len; - const char *new_path; + /* Start with the root directory of the image. Note: this will be NULL + * if an image has been added directly with wimlib_add_empty_image() but + * no files have been added yet; in that case we fail with ENOENT. */ + cur_dentry = wim_root_dentry(wim); + + name_start = path; + for (;;) { + if (cur_dentry == NULL) { + errno = ENOENT; + return NULL; + } + + if (*name_start && !dentry_is_directory(cur_dentry)) { + errno = ENOTDIR; + return NULL; + } + + while (*name_start == cpu_to_le16(WIM_PATH_SEPARATOR)) + name_start++; - new_path = path_next_part(path, &base_len); + if (!*name_start) + return cur_dentry; - child = get_rbtree_child_with_name(node, path, base_len); - if (child) - return get_dentry_relative_path(child, new_path); + name_end = name_start; + do { + ++name_end; + } while (*name_end != cpu_to_le16(WIM_PATH_SEPARATOR) && *name_end); + + cur_dentry = get_dentry_child_with_utf16le_name(cur_dentry, + name_start, + (u8*)name_end - (u8*)name_start, + case_type); + name_start = name_end; } - return NULL; } -/* Returns the dentry corresponding to the UTF-8 @path, or NULL if there is no - * such dentry. */ -struct dentry *get_dentry(WIMStruct *w, const char *path) +/* + * WIM path lookup: translate a path in the currently selected WIM image to the + * corresponding dentry, if it exists. + * + * @wim + * The WIMStruct for the WIM. The search takes place in the currently + * selected image. + * + * @path + * The path to look up, given relative to the root of the WIM image. + * Characters with value WIM_PATH_SEPARATOR are taken to be path + * separators. Leading path separators are ignored, whereas one or more + * trailing path separators cause the path to only match a directory. + * + * @case_type + * The case-sensitivity behavior of this function, as one of the following + * constants: + * + * - WIMLIB_CASE_SENSITIVE: Perform the search case sensitively. This means + * that names must match exactly. + * + * - WIMLIB_CASE_INSENSITIVE: Perform the search case insensitively. This + * means that names are considered to match if they are equal when + * transformed to upper case. If a path component matches multiple names + * case-insensitively, the name that matches the path component + * case-sensitively is chosen, if existent; otherwise one + * case-insensitively matching name is chosen arbitrarily. + * + * - WIMLIB_CASE_PLATFORM_DEFAULT: Perform either case-sensitive or + * case-insensitive search, depending on the value of the global variable + * default_ignore_case. + * + * In any case, no Unicode normalization is done before comparing strings. + * + * Returns a pointer to the dentry that is the result of the lookup, or NULL if + * no such dentry exists. If NULL is returned, errno is set to one of the + * following values: + * + * ENOTDIR if one of the path components used as a directory existed but + * was not, in fact, a directory. + * + * ENOENT otherwise. + * + * Additional notes: + * + * - This function does not consider a reparse point to be a directory, even + * if it has FILE_ATTRIBUTE_DIRECTORY set. + * + * - This function does not dereference symbolic links or junction points + * when performing the search. + * + * - Since this function ignores leading slashes, the empty path is valid and + * names the root directory of the WIM image. + * + * - An image added with wimlib_add_empty_image() does not have a root + * directory yet, and this function will fail with ENOENT for any path on + * such an image. + */ +struct wim_dentry * +get_dentry(WIMStruct *wim, const tchar *path, CASE_SENSITIVITY_TYPE case_type) { - struct dentry *root = wim_root_dentry(w); - while (*path == '/') - path++; - return get_dentry_relative_path(root, path); -} +#if TCHAR_IS_UTF16LE + return get_dentry_utf16le(wim, path, case_type); +#else + utf16lechar *path_utf16le; + size_t path_utf16le_nbytes; + int ret; + struct wim_dentry *dentry; -struct inode *wim_pathname_to_inode(WIMStruct *w, const char *path) -{ - struct dentry *dentry; - dentry = get_dentry(w, path); - if (dentry) - return dentry->d_inode; - else + ret = tstr_to_utf16le(path, tstrlen(path) * sizeof(tchar), + &path_utf16le, &path_utf16le_nbytes); + if (ret) return NULL; + dentry = get_dentry_utf16le(wim, path_utf16le, case_type); + FREE(path_utf16le); + return dentry; +#endif } -/* Returns the dentry that corresponds to the parent directory of @path, or NULL - * if the dentry is not found. */ -struct dentry *get_parent_dentry(WIMStruct *w, const char *path) +/* Takes in a path of length @len in @buf, and transforms it into a string for + * the path of its parent directory. */ +static void +to_parent_name(tchar *buf, size_t len) { - size_t path_len = strlen(path); - char buf[path_len + 1]; + ssize_t i = (ssize_t)len - 1; + while (i >= 0 && buf[i] == WIM_PATH_SEPARATOR) + i--; + while (i >= 0 && buf[i] != WIM_PATH_SEPARATOR) + i--; + while (i >= 0 && buf[i] == WIM_PATH_SEPARATOR) + i--; + buf[i + 1] = T('\0'); +} - memcpy(buf, path, path_len + 1); +/* Similar to get_dentry(), but returns the dentry named by @path with the last + * component stripped off. + * + * Note: The returned dentry is NOT guaranteed to be a directory. */ +struct wim_dentry * +get_parent_dentry(WIMStruct *wim, const tchar *path, + CASE_SENSITIVITY_TYPE case_type) +{ + size_t path_len = tstrlen(path); + tchar buf[path_len + 1]; + tmemcpy(buf, path, path_len + 1); to_parent_name(buf, path_len); - - return get_dentry(w, buf); + return get_dentry(wim, buf, case_type); } /* Prints the full path of a dentry. */ -int print_dentry_full_path(struct dentry *dentry, void *ignore) +int +print_dentry_full_path(struct wim_dentry *dentry, void *_ignore) { - if (dentry->full_path_utf8) - puts(dentry->full_path_utf8); + int ret = calculate_dentry_full_path(dentry); + if (ret) + return ret; + tprintf(T("%"TS"\n"), dentry->_full_path); return 0; } @@ -760,248 +966,291 @@ int print_dentry_full_path(struct dentry *dentry, void *ignore) * set. */ struct file_attr_flag { u32 flag; - const char *name; + const tchar *name; }; struct file_attr_flag file_attr_flags[] = { - {FILE_ATTRIBUTE_READONLY, "READONLY"}, - {FILE_ATTRIBUTE_HIDDEN, "HIDDEN"}, - {FILE_ATTRIBUTE_SYSTEM, "SYSTEM"}, - {FILE_ATTRIBUTE_DIRECTORY, "DIRECTORY"}, - {FILE_ATTRIBUTE_ARCHIVE, "ARCHIVE"}, - {FILE_ATTRIBUTE_DEVICE, "DEVICE"}, - {FILE_ATTRIBUTE_NORMAL, "NORMAL"}, - {FILE_ATTRIBUTE_TEMPORARY, "TEMPORARY"}, - {FILE_ATTRIBUTE_SPARSE_FILE, "SPARSE_FILE"}, - {FILE_ATTRIBUTE_REPARSE_POINT, "REPARSE_POINT"}, - {FILE_ATTRIBUTE_COMPRESSED, "COMPRESSED"}, - {FILE_ATTRIBUTE_OFFLINE, "OFFLINE"}, - {FILE_ATTRIBUTE_NOT_CONTENT_INDEXED,"NOT_CONTENT_INDEXED"}, - {FILE_ATTRIBUTE_ENCRYPTED, "ENCRYPTED"}, - {FILE_ATTRIBUTE_VIRTUAL, "VIRTUAL"}, + {FILE_ATTRIBUTE_READONLY, T("READONLY")}, + {FILE_ATTRIBUTE_HIDDEN, T("HIDDEN")}, + {FILE_ATTRIBUTE_SYSTEM, T("SYSTEM")}, + {FILE_ATTRIBUTE_DIRECTORY, T("DIRECTORY")}, + {FILE_ATTRIBUTE_ARCHIVE, T("ARCHIVE")}, + {FILE_ATTRIBUTE_DEVICE, T("DEVICE")}, + {FILE_ATTRIBUTE_NORMAL, T("NORMAL")}, + {FILE_ATTRIBUTE_TEMPORARY, T("TEMPORARY")}, + {FILE_ATTRIBUTE_SPARSE_FILE, T("SPARSE_FILE")}, + {FILE_ATTRIBUTE_REPARSE_POINT, T("REPARSE_POINT")}, + {FILE_ATTRIBUTE_COMPRESSED, T("COMPRESSED")}, + {FILE_ATTRIBUTE_OFFLINE, T("OFFLINE")}, + {FILE_ATTRIBUTE_NOT_CONTENT_INDEXED,T("NOT_CONTENT_INDEXED")}, + {FILE_ATTRIBUTE_ENCRYPTED, T("ENCRYPTED")}, + {FILE_ATTRIBUTE_VIRTUAL, T("VIRTUAL")}, }; /* Prints a directory entry. @lookup_table is a pointer to the lookup table, if * available. If the dentry is unresolved and the lookup table is NULL, the * lookup table entries will not be printed. Otherwise, they will be. */ -int print_dentry(struct dentry *dentry, void *lookup_table) +int +print_dentry(struct wim_dentry *dentry, void *lookup_table) { const u8 *hash; - struct lookup_table_entry *lte; - const struct inode *inode = dentry->d_inode; - time_t time; - char *p; - - printf("[DENTRY]\n"); - printf("Length = %"PRIu64"\n", dentry->length); - printf("Attributes = 0x%x\n", inode->attributes); - for (unsigned i = 0; i < ARRAY_LEN(file_attr_flags); i++) - if (file_attr_flags[i].flag & inode->attributes) - printf(" FILE_ATTRIBUTE_%s is set\n", + struct wim_lookup_table_entry *lte; + const struct wim_inode *inode = dentry->d_inode; + tchar buf[50]; + + tprintf(T("[DENTRY]\n")); + tprintf(T("Length = %"PRIu64"\n"), dentry->length); + tprintf(T("Attributes = 0x%x\n"), inode->i_attributes); + for (size_t i = 0; i < ARRAY_LEN(file_attr_flags); i++) + if (file_attr_flags[i].flag & inode->i_attributes) + tprintf(T(" FILE_ATTRIBUTE_%"TS" is set\n"), file_attr_flags[i].name); - printf("Security ID = %d\n", inode->security_id); - printf("Subdir offset = %"PRIu64"\n", dentry->subdir_offset); - - /* Translate the timestamps into something readable */ - time = wim_timestamp_to_unix(inode->creation_time); - p = asctime(gmtime(&time)); - *(strrchr(p, '\n')) = '\0'; - printf("Creation Time = %s UTC\n", p); - - time = wim_timestamp_to_unix(inode->last_access_time); - p = asctime(gmtime(&time)); - *(strrchr(p, '\n')) = '\0'; - printf("Last Access Time = %s UTC\n", p); - - time = wim_timestamp_to_unix(inode->last_write_time); - p = asctime(gmtime(&time)); - *(strrchr(p, '\n')) = '\0'; - printf("Last Write Time = %s UTC\n", p); - - printf("Reparse Tag = 0x%"PRIx32"\n", inode->reparse_tag); - printf("Hard Link Group = 0x%"PRIx64"\n", inode->ino); - printf("Hard Link Group Size = %"PRIu32"\n", inode->link_count); - printf("Number of Alternate Data Streams = %hu\n", inode->num_ads); - printf("Filename = \""); - print_string(dentry->file_name, dentry->file_name_len); - puts("\""); - printf("Filename Length = %hu\n", dentry->file_name_len); - printf("Filename (UTF-8) = \"%s\"\n", dentry->file_name_utf8); - printf("Filename (UTF-8) Length = %hu\n", dentry->file_name_utf8_len); - printf("Short Name = \""); - print_string(dentry->short_name, dentry->short_name_len); - puts("\""); - printf("Short Name Length = %hu\n", dentry->short_name_len); - printf("Full Path (UTF-8) = \"%s\"\n", dentry->full_path_utf8); + tprintf(T("Security ID = %d\n"), inode->i_security_id); + tprintf(T("Subdir offset = %"PRIu64"\n"), dentry->subdir_offset); + + wim_timestamp_to_str(inode->i_creation_time, buf, sizeof(buf)); + tprintf(T("Creation Time = %"TS"\n"), buf); + + wim_timestamp_to_str(inode->i_last_access_time, buf, sizeof(buf)); + tprintf(T("Last Access Time = %"TS"\n"), buf); + + wim_timestamp_to_str(inode->i_last_write_time, buf, sizeof(buf)); + tprintf(T("Last Write Time = %"TS"\n"), buf); + + if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { + tprintf(T("Reparse Tag = 0x%"PRIx32"\n"), inode->i_reparse_tag); + tprintf(T("Reparse Point Flags = 0x%"PRIx16"\n"), + inode->i_not_rpfixed); + tprintf(T("Reparse Point Unknown 2 = 0x%"PRIx32"\n"), + inode->i_rp_unknown_2); + } + tprintf(T("Reparse Point Unknown 1 = 0x%"PRIx32"\n"), + inode->i_rp_unknown_1); + tprintf(T("Hard Link Group = 0x%"PRIx64"\n"), inode->i_ino); + tprintf(T("Hard Link Group Size = %"PRIu32"\n"), inode->i_nlink); + tprintf(T("Number of Alternate Data Streams = %hu\n"), inode->i_num_ads); + if (dentry_has_long_name(dentry)) + wimlib_printf(T("Filename = \"%"WS"\"\n"), dentry->file_name); + if (dentry_has_short_name(dentry)) + wimlib_printf(T("Short Name \"%"WS"\"\n"), dentry->short_name); + if (dentry->_full_path) + tprintf(T("Full Path = \"%"TS"\"\n"), dentry->_full_path); + lte = inode_stream_lte(dentry->d_inode, 0, lookup_table); if (lte) { - print_lookup_table_entry(lte); + print_lookup_table_entry(lte, stdout); } else { hash = inode_stream_hash(inode, 0); if (hash) { - printf("Hash = 0x"); - print_hash(hash); - putchar('\n'); - putchar('\n'); + tprintf(T("Hash = 0x")); + print_hash(hash, stdout); + tputchar(T('\n')); + tputchar(T('\n')); } } - for (u16 i = 0; i < inode->num_ads; i++) { - printf("[Alternate Stream Entry %u]\n", i); - printf("Name = \"%s\"\n", inode->ads_entries[i].stream_name_utf8); - printf("Name Length (UTF-16) = %u\n", - inode->ads_entries[i].stream_name_len); + for (u16 i = 0; i < inode->i_num_ads; i++) { + tprintf(T("[Alternate Stream Entry %u]\n"), i); + wimlib_printf(T("Name = \"%"WS"\"\n"), + inode->i_ads_entries[i].stream_name); + tprintf(T("Name Length (UTF16 bytes) = %hu\n"), + inode->i_ads_entries[i].stream_name_nbytes); hash = inode_stream_hash(inode, i + 1); if (hash) { - printf("Hash = 0x"); - print_hash(hash); - putchar('\n'); + tprintf(T("Hash = 0x")); + print_hash(hash, stdout); + tputchar(T('\n')); } - print_lookup_table_entry(inode_stream_lte(inode, i + 1, - lookup_table)); + print_lookup_table_entry(inode_stream_lte(inode, i + 1, lookup_table), + stdout); } return 0; } -/* Initializations done on every `struct dentry'. */ -static void dentry_common_init(struct dentry *dentry) +/* Initializations done on every `struct wim_dentry'. */ +static void +dentry_common_init(struct wim_dentry *dentry) { - memset(dentry, 0, sizeof(struct dentry)); - dentry->refcnt = 1; + memset(dentry, 0, sizeof(struct wim_dentry)); } -static struct inode *new_timeless_inode() +struct wim_inode * +new_timeless_inode(void) { - struct inode *inode = CALLOC(1, sizeof(struct inode)); + struct wim_inode *inode = CALLOC(1, sizeof(struct wim_inode)); if (inode) { - inode->security_id = -1; - inode->link_count = 1; - #ifdef WITH_FUSE - inode->next_stream_id = 1; - if (pthread_mutex_init(&inode->i_mutex, NULL) != 0) { - ERROR_WITH_ERRNO("Error initializing mutex"); - FREE(inode); - return NULL; - } - #endif - INIT_LIST_HEAD(&inode->dentry_list); + inode->i_security_id = -1; + inode->i_nlink = 1; + inode->i_next_stream_id = 1; + inode->i_not_rpfixed = 1; + inode->i_canonical_streams = 1; + INIT_LIST_HEAD(&inode->i_list); + INIT_LIST_HEAD(&inode->i_dentry); } return inode; } -static struct inode *new_inode() +static struct wim_inode * +new_inode(void) { - struct inode *inode = new_timeless_inode(); + struct wim_inode *inode = new_timeless_inode(); if (inode) { u64 now = get_wim_timestamp(); - inode->creation_time = now; - inode->last_access_time = now; - inode->last_write_time = now; + inode->i_creation_time = now; + inode->i_last_access_time = now; + inode->i_last_write_time = now; } return inode; } -/* - * Creates an unlinked directory entry. - * - * @name: The UTF-8 filename of the new dentry. - * - * Returns a pointer to the new dentry, or NULL if out of memory. - */ -struct dentry *new_dentry(const char *name) +/* Creates an unlinked directory entry. */ +int +new_dentry(const tchar *name, struct wim_dentry **dentry_ret) { - struct dentry *dentry; + struct wim_dentry *dentry; + int ret; - dentry = MALLOC(sizeof(struct dentry)); - if (!dentry) - goto err; + dentry = MALLOC(sizeof(struct wim_dentry)); + if (dentry == NULL) + return WIMLIB_ERR_NOMEM; dentry_common_init(dentry); - if (change_dentry_name(dentry, name) != 0) - goto err; - - dentry->parent = dentry; - - return dentry; -err: - FREE(dentry); - ERROR("Failed to allocate new dentry"); - return NULL; + ret = set_dentry_name(dentry, name); + if (ret == 0) { + dentry->parent = dentry; + *dentry_ret = dentry; + } else { + FREE(dentry); + ERROR("Failed to set name on new dentry with name \"%"TS"\"", + name); + } + return ret; } -static struct dentry *__new_dentry_with_inode(const char *name, bool timeless) +static int +_new_dentry_with_inode(const tchar *name, struct wim_dentry **dentry_ret, + bool timeless) { - struct dentry *dentry; - dentry = new_dentry(name); - if (dentry) { - if (timeless) - dentry->d_inode = new_timeless_inode(); - else - dentry->d_inode = new_inode(); - if (dentry->d_inode) { - inode_add_dentry(dentry, dentry->d_inode); - } else { - free_dentry(dentry); - dentry = NULL; - } + struct wim_dentry *dentry; + int ret; + + ret = new_dentry(name, &dentry); + if (ret) + return ret; + + if (timeless) + dentry->d_inode = new_timeless_inode(); + else + dentry->d_inode = new_inode(); + if (dentry->d_inode == NULL) { + free_dentry(dentry); + return WIMLIB_ERR_NOMEM; } - return dentry; + + inode_add_dentry(dentry, dentry->d_inode); + *dentry_ret = dentry; + return 0; } -struct dentry *new_dentry_with_timeless_inode(const char *name) +int +new_dentry_with_timeless_inode(const tchar *name, struct wim_dentry **dentry_ret) { - return __new_dentry_with_inode(name, true); + return _new_dentry_with_inode(name, dentry_ret, true); } -struct dentry *new_dentry_with_inode(const char *name) +int +new_dentry_with_inode(const tchar *name, struct wim_dentry **dentry_ret) { - return __new_dentry_with_inode(name, false); + return _new_dentry_with_inode(name, dentry_ret, false); } +int +new_filler_directory(const tchar *name, struct wim_dentry **dentry_ret) +{ + int ret; + struct wim_dentry *dentry; + + DEBUG("Creating filler directory \"%"TS"\"", name); + ret = new_dentry_with_inode(name, &dentry); + if (ret) + return ret; + /* Leave the inode number as 0; this is allowed for non + * hard-linked files. */ + dentry->d_inode->i_resolved = 1; + dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY; + *dentry_ret = dentry; + return 0; +} + +static int +dentry_clear_inode_visited(struct wim_dentry *dentry, void *_ignore) +{ + dentry->d_inode->i_visited = 0; + return 0; +} -static int init_ads_entry(struct ads_entry *ads_entry, const char *name) +void +dentry_tree_clear_inode_visited(struct wim_dentry *root) +{ + for_dentry_in_tree(root, dentry_clear_inode_visited, NULL); +} + +static int +init_ads_entry(struct wim_ads_entry *ads_entry, const void *name, + size_t name_nbytes, bool is_utf16le) { int ret = 0; memset(ads_entry, 0, sizeof(*ads_entry)); - if (name && *name) - ret = change_ads_name(ads_entry, name); + + if (is_utf16le) { + utf16lechar *p = MALLOC(name_nbytes + sizeof(utf16lechar)); + if (p == NULL) + return WIMLIB_ERR_NOMEM; + memcpy(p, name, name_nbytes); + p[name_nbytes / 2] = cpu_to_le16(0); + ads_entry->stream_name = p; + ads_entry->stream_name_nbytes = name_nbytes; + } else { + if (name && *(const tchar*)name != T('\0')) { + ret = get_utf16le_name(name, &ads_entry->stream_name, + &ads_entry->stream_name_nbytes); + } + } return ret; } -static void destroy_ads_entry(struct ads_entry *ads_entry) +static void +destroy_ads_entry(struct wim_ads_entry *ads_entry) { FREE(ads_entry->stream_name); - FREE(ads_entry->stream_name_utf8); } - /* Frees an inode. */ -void free_inode(struct inode *inode) +void +free_inode(struct wim_inode *inode) { if (inode) { - if (inode->ads_entries) { - for (u16 i = 0; i < inode->num_ads; i++) - destroy_ads_entry(&inode->ads_entries[i]); - FREE(inode->ads_entries); + if (inode->i_ads_entries) { + for (u16 i = 0; i < inode->i_num_ads; i++) + destroy_ads_entry(&inode->i_ads_entries[i]); + FREE(inode->i_ads_entries); } - #ifdef WITH_FUSE - wimlib_assert(inode->num_opened_fds == 0); - FREE(inode->fds); - pthread_mutex_destroy(&inode->i_mutex); - #endif - FREE(inode->extracted_file); + /* HACK: This may instead delete the inode from i_list, but the + * hlist_del() behaves the same as list_del(). */ + if (!hlist_unhashed(&inode->i_hlist)) + hlist_del(&inode->i_hlist); FREE(inode); } } /* Decrements link count on an inode and frees it if the link count reaches 0. * */ -static void put_inode(struct inode *inode) +static void +put_inode(struct wim_inode *inode) { - wimlib_assert(inode); - wimlib_assert(inode->link_count); - if (--inode->link_count == 0) { + wimlib_assert(inode->i_nlink != 0); + if (--inode->i_nlink == 0) { #ifdef WITH_FUSE - if (inode->num_opened_fds == 0) + if (inode->i_num_opened_fds == 0) #endif { free_inode(inode); @@ -1011,93 +1260,124 @@ static void put_inode(struct inode *inode) /* Frees a WIM dentry. * - * The inode is freed only if its link count is decremented to 0. + * The corresponding inode (if any) is freed only if its link count is + * decremented to 0. */ -void free_dentry(struct dentry *dentry) -{ - wimlib_assert(dentry != NULL); - FREE(dentry->file_name); - FREE(dentry->file_name_utf8); - FREE(dentry->short_name); - FREE(dentry->full_path_utf8); - if (dentry->d_inode) - put_inode(dentry->d_inode); - FREE(dentry); -} - -void put_dentry(struct dentry *dentry) +void +free_dentry(struct wim_dentry *dentry) { - wimlib_assert(dentry != NULL); - wimlib_assert(dentry->refcnt != 0); - - if (--dentry->refcnt == 0) - free_dentry(dentry); + if (dentry) { + FREE(dentry->file_name); + FREE(dentry->short_name); + FREE(dentry->_full_path); + if (dentry->d_inode) + put_inode(dentry->d_inode); + FREE(dentry); + } } -/* - * This function is passed as an argument to for_dentry_in_tree_depth() in order - * to free a directory tree. __args is a pointer to a `struct free_dentry_args'. - */ -static int do_free_dentry(struct dentry *dentry, void *__lookup_table) +/* This function is passed as an argument to for_dentry_in_tree_depth() in order + * to free a directory tree. */ +static int +do_free_dentry(struct wim_dentry *dentry, void *_lookup_table) { - struct lookup_table *lookup_table = __lookup_table; - unsigned i; + struct wim_lookup_table *lookup_table = _lookup_table; if (lookup_table) { - struct lookup_table_entry *lte; - struct inode *inode = dentry->d_inode; - wimlib_assert(inode->link_count); - for (i = 0; i <= inode->num_ads; i++) { + struct wim_inode *inode = dentry->d_inode; + for (unsigned i = 0; i <= inode->i_num_ads; i++) { + struct wim_lookup_table_entry *lte; + lte = inode_stream_lte(inode, i, lookup_table); if (lte) lte_decrement_refcnt(lte, lookup_table); } } - - put_dentry(dentry); + free_dentry(dentry); return 0; } /* * Unlinks and frees a dentry tree. * - * @root: The root of the tree. - * @lookup_table: The lookup table for dentries. If non-NULL, the - * reference counts in the lookup table for the lookup - * table entries corresponding to the dentries will be - * decremented. + * @root: + * The root of the tree. + * + * @lookup_table: + * The lookup table for dentries. If non-NULL, the reference counts in the + * lookup table for the lookup table entries corresponding to the dentries + * will be decremented. */ -void free_dentry_tree(struct dentry *root, struct lookup_table *lookup_table) +void +free_dentry_tree(struct wim_dentry *root, struct wim_lookup_table *lookup_table) { - if (!root || !root->parent) - return; for_dentry_in_tree_depth(root, do_free_dentry, lookup_table); } -int increment_dentry_refcnt(struct dentry *dentry, void *ignore) +/* Insert a dentry into the case insensitive index for a directory. + * + * This is a red-black tree, but when multiple dentries share the same + * case-insensitive name, only one is inserted into the tree itself; the rest + * are connected in a list. + */ +static struct wim_dentry * +dentry_add_child_case_insensitive(struct wim_dentry *parent, + struct wim_dentry *child) { - dentry->refcnt++; - return 0; + struct rb_root *root; + struct rb_node **new; + struct rb_node *rb_parent; + + root = &parent->d_inode->i_children_case_insensitive; + new = &root->rb_node; + rb_parent = NULL; + while (*new) { + struct wim_dentry *this = container_of(*new, struct wim_dentry, + rb_node_case_insensitive); + int result = dentry_compare_names_case_insensitive(child, this); + + rb_parent = *new; + + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else + return this; + } + rb_link_node(&child->rb_node_case_insensitive, rb_parent, new); + rb_insert_color(&child->rb_node_case_insensitive, root); + return NULL; } /* * Links a dentry into the directory tree. * - * @dentry: The dentry to link. - * @parent: The dentry that will be the parent of @dentry. + * @parent: The dentry that will be the parent of @child. + * @child: The dentry to link. + * + * Returns NULL if successful. If @parent already contains a dentry with the + * same case-sensitive name as @child, the pointer to this duplicate dentry is + * returned. */ -bool dentry_add_child(struct dentry * restrict parent, - struct dentry * restrict child) +struct wim_dentry * +dentry_add_child(struct wim_dentry * restrict parent, + struct wim_dentry * restrict child) { - wimlib_assert(dentry_is_directory(parent)); + struct rb_root *root; + struct rb_node **new; + struct rb_node *rb_parent; - struct rb_root *root = &parent->d_inode->children; - struct rb_node **new = &(root->rb_node); - struct rb_node *rb_parent = NULL; + wimlib_assert(dentry_is_directory(parent)); + wimlib_assert(parent != child); + /* Case sensitive child dentry index */ + root = &parent->d_inode->i_children; + new = &root->rb_node; + rb_parent = NULL; while (*new) { - struct dentry *this = rbnode_dentry(*new); - int result = dentry_compare_names(child, this); + struct wim_dentry *this = rbnode_dentry(*new); + int result = dentry_compare_names_case_sensitive(child, this); rb_parent = *new; @@ -1106,242 +1386,334 @@ bool dentry_add_child(struct dentry * restrict parent, else if (result > 0) new = &((*new)->rb_right); else - return false; + return this; } child->parent = parent; rb_link_node(&child->rb_node, rb_parent, new); rb_insert_color(&child->rb_node, root); - return true; + + /* Case insensitive child dentry index */ + { + struct wim_dentry *existing; + existing = dentry_add_child_case_insensitive(parent, child); + if (existing) { + list_add(&child->case_insensitive_conflict_list, + &existing->case_insensitive_conflict_list); + child->rb_node_case_insensitive.__rb_parent_color = 0; + } else { + INIT_LIST_HEAD(&child->case_insensitive_conflict_list); + } + } + return NULL; } -#ifdef WITH_FUSE -/* - * Unlink a dentry from the directory tree. - * - * Note: This merely removes it from the in-memory tree structure. - */ -void unlink_dentry(struct dentry *dentry) +/* Unlink a WIM dentry from the directory entry tree. */ +void +unlink_dentry(struct wim_dentry *dentry) { - struct dentry *parent = dentry->parent; + struct wim_dentry *parent = dentry->parent; + if (parent == dentry) return; - rb_erase(&dentry->rb_node, &parent->d_inode->children); + rb_erase(&dentry->rb_node, &parent->d_inode->i_children); + + if (dentry->rb_node_case_insensitive.__rb_parent_color) { + /* This dentry was in the case-insensitive red-black tree. */ + rb_erase(&dentry->rb_node_case_insensitive, + &parent->d_inode->i_children_case_insensitive); + if (!list_empty(&dentry->case_insensitive_conflict_list)) { + /* Make a different case-insensitively-the-same dentry + * be the "representative" in the red-black tree. */ + struct list_head *next; + struct wim_dentry *other; + struct wim_dentry *existing; + + next = dentry->case_insensitive_conflict_list.next; + other = list_entry(next, struct wim_dentry, case_insensitive_conflict_list); + existing = dentry_add_child_case_insensitive(parent, other); + wimlib_assert(existing == NULL); + } + } + list_del(&dentry->case_insensitive_conflict_list); } -#endif -static inline struct dentry *inode_first_dentry(struct inode *inode) +static int +free_dentry_full_path(struct wim_dentry *dentry, void *_ignore) { - wimlib_assert(inode->dentry_list.next != &inode->dentry_list); - return container_of(inode->dentry_list.next, struct dentry, - inode_dentry_list); + FREE(dentry->_full_path); + dentry->_full_path = NULL; + return 0; } -static int verify_inode(struct inode *inode, const WIMStruct *w) +/* Rename a file or directory in the WIM. */ +int +rename_wim_path(WIMStruct *wim, const tchar *from, const tchar *to, + CASE_SENSITIVITY_TYPE case_type) { - const struct lookup_table *table = w->lookup_table; - const struct wim_security_data *sd = wim_const_security_data(w); - const struct dentry *first_dentry = inode_first_dentry(inode); - int ret = WIMLIB_ERR_INVALID_DENTRY; - - /* Check the security ID */ - if (inode->security_id < -1) { - ERROR("Dentry `%s' has an invalid security ID (%d)", - first_dentry->full_path_utf8, inode->security_id); - goto out; - } - if (inode->security_id >= sd->num_entries) { - ERROR("Dentry `%s' has an invalid security ID (%d) " - "(there are only %u entries in the security table)", - first_dentry->full_path_utf8, inode->security_id, - sd->num_entries); - goto out; - } + struct wim_dentry *src; + struct wim_dentry *dst; + struct wim_dentry *parent_of_dst; + int ret; - /* Check that lookup table entries for all the resources exist, except - * if the SHA1 message digest is all 0's, which indicates there is - * intentionally no resource there. */ - if (w->hdr.total_parts == 1) { - for (unsigned i = 0; i <= inode->num_ads; i++) { - struct lookup_table_entry *lte; - const u8 *hash; - hash = inode_stream_hash_unresolved(inode, i); - lte = __lookup_resource(table, hash); - if (!lte && !is_zero_hash(hash)) { - ERROR("Could not find lookup table entry for stream " - "%u of dentry `%s'", i, first_dentry->full_path_utf8); - goto out; - } - if (lte && (lte->real_refcnt += inode->link_count) > lte->refcnt) - { - #ifdef ENABLE_ERROR_MESSAGES - WARNING("The following lookup table entry " - "has a reference count of %u, but", - lte->refcnt); - WARNING("We found %u references to it", - lte->real_refcnt); - WARNING("(One dentry referencing it is at `%s')", - first_dentry->full_path_utf8); - - print_lookup_table_entry(lte); - #endif - /* Guess what! install.wim for Windows 8 - * contains a stream with 2 dentries referencing - * it, but the lookup table entry has reference - * count of 1. So we will need to handle this - * case and not just make it be an error... I'm - * just setting the reference count to the - * number of references we found. - * (Unfortunately, even after doing this, the - * reference count could be too low if it's also - * referenced in other WIM images) */ - - #if 1 - lte->refcnt = lte->real_refcnt; - WARNING("Fixing reference count"); - #else - goto out; - #endif - } - } - } + /* This rename() implementation currently only supports actual files + * (not alternate data streams) */ - /* Make sure there is only one un-named stream. */ - unsigned num_unnamed_streams = 0; - for (unsigned i = 0; i <= inode->num_ads; i++) { - const u8 *hash; - hash = inode_stream_hash_unresolved(inode, i); - if (!inode_stream_name_len(inode, i) && !is_zero_hash(hash)) - num_unnamed_streams++; - } - if (num_unnamed_streams > 1) { - ERROR("Dentry `%s' has multiple (%u) un-named streams", - first_dentry->full_path_utf8, num_unnamed_streams); - goto out; - } - inode->verified = true; - ret = 0; -out: - return ret; -} + src = get_dentry(wim, from, case_type); + if (!src) + return -errno; -/* Run some miscellaneous verifications on a WIM dentry */ -int verify_dentry(struct dentry *dentry, void *wim) -{ - int ret; + dst = get_dentry(wim, to, case_type); - if (!dentry->d_inode->verified) { - ret = verify_inode(dentry->d_inode, wim); - if (ret != 0) - return ret; - } + if (dst) { + /* Destination file exists */ - /* Cannot have a short name but no long name */ - if (dentry->short_name_len && !dentry->file_name_len) { - ERROR("Dentry `%s' has a short name but no long name", - dentry->full_path_utf8); - return WIMLIB_ERR_INVALID_DENTRY; - } + if (src == dst) /* Same file */ + return 0; - /* Make sure root dentry is unnamed */ - if (dentry_is_root(dentry)) { - if (dentry->file_name_len) { - ERROR("The root dentry is named `%s', but it must " - "be unnamed", dentry->file_name_utf8); - return WIMLIB_ERR_INVALID_DENTRY; + if (!dentry_is_directory(src)) { + /* Cannot rename non-directory to directory. */ + if (dentry_is_directory(dst)) + return -EISDIR; + } else { + /* Cannot rename directory to a non-directory or a non-empty + * directory */ + if (!dentry_is_directory(dst)) + return -ENOTDIR; + if (dentry_has_children(dst)) + return -ENOTEMPTY; } - } + parent_of_dst = dst->parent; + } else { + /* Destination does not exist */ + parent_of_dst = get_parent_dentry(wim, to, case_type); + if (!parent_of_dst) + return -errno; -#if 0 - /* Check timestamps */ - if (inode->last_access_time < inode->creation_time || - inode->last_write_time < inode->creation_time) { - WARNING("Dentry `%s' was created after it was last accessed or " - "written to", dentry->full_path_utf8); + if (!dentry_is_directory(parent_of_dst)) + return -ENOTDIR; } -#endif + ret = set_dentry_name(src, path_basename(to)); + if (ret) + return -ENOMEM; + if (dst) { + unlink_dentry(dst); + free_dentry_tree(dst, wim->lookup_table); + } + unlink_dentry(src); + dentry_add_child(parent_of_dst, src); + if (src->_full_path) + for_dentry_in_tree(src, free_dentry_full_path, NULL); return 0; } - -#ifdef WITH_FUSE -/* Returns the alternate data stream entry belonging to @inode that has the - * stream name @stream_name. */ -struct ads_entry *inode_get_ads_entry(struct inode *inode, - const char *stream_name, - u16 *idx_ret) +/* + * Returns the alternate data stream entry belonging to @inode that has the + * stream name @stream_name, or NULL if the inode has no alternate data stream + * with that name. + * + * If @p stream_name is the empty string, NULL is returned --- that is, this + * function will not return "unnamed" alternate data stream entries. + */ +struct wim_ads_entry * +inode_get_ads_entry(struct wim_inode *inode, const tchar *stream_name, + u16 *idx_ret) { - size_t stream_name_len; - if (!stream_name) + if (inode->i_num_ads == 0) { return NULL; - if (inode->num_ads) { - u16 i = 0; - stream_name_len = strlen(stream_name); + } else { + size_t stream_name_utf16le_nbytes; + u16 i; + struct wim_ads_entry *result; + + if (stream_name[0] == T('\0')) + return NULL; + + #if TCHAR_IS_UTF16LE + const utf16lechar *stream_name_utf16le; + + stream_name_utf16le = stream_name; + stream_name_utf16le_nbytes = tstrlen(stream_name) * sizeof(tchar); + #else + utf16lechar *stream_name_utf16le; + + { + int ret = tstr_to_utf16le(stream_name, + tstrlen(stream_name) * + sizeof(tchar), + &stream_name_utf16le, + &stream_name_utf16le_nbytes); + if (ret) + return NULL; + } + #endif + i = 0; + result = NULL; do { - if (ads_entry_has_name(&inode->ads_entries[i], - stream_name, stream_name_len)) + if (ads_entry_has_name(&inode->i_ads_entries[i], + stream_name_utf16le, + stream_name_utf16le_nbytes, + default_ignore_case)) { if (idx_ret) *idx_ret = i; - return &inode->ads_entries[i]; + result = &inode->i_ads_entries[i]; + break; } - } while (++i != inode->num_ads); + } while (++i != inode->i_num_ads); + #if !TCHAR_IS_UTF16LE + FREE(stream_name_utf16le); + #endif + return result; } - return NULL; } -#endif -#if defined(WITH_FUSE) || defined(WITH_NTFS_3G) -/* - * Add an alternate stream entry to an inode and return a pointer to it, or NULL - * if memory could not be allocated. - */ -struct ads_entry *inode_add_ads(struct inode *inode, const char *stream_name) +static struct wim_ads_entry * +do_inode_add_ads(struct wim_inode *inode, const void *stream_name, + size_t stream_name_nbytes, bool is_utf16le) { u16 num_ads; - struct ads_entry *ads_entries; - struct ads_entry *new_entry; + struct wim_ads_entry *ads_entries; + struct wim_ads_entry *new_entry; - DEBUG("Add alternate data stream \"%s\"", stream_name); + wimlib_assert(stream_name_nbytes != 0); - if (inode->num_ads >= 0xfffe) { + if (inode->i_num_ads >= 0xfffe) { ERROR("Too many alternate data streams in one inode!"); return NULL; } - num_ads = inode->num_ads + 1; - ads_entries = REALLOC(inode->ads_entries, - num_ads * sizeof(inode->ads_entries[0])); - if (!ads_entries) { + num_ads = inode->i_num_ads + 1; + ads_entries = REALLOC(inode->i_ads_entries, + num_ads * sizeof(inode->i_ads_entries[0])); + if (ads_entries == NULL) { ERROR("Failed to allocate memory for new alternate data stream"); return NULL; } - inode->ads_entries = ads_entries; + inode->i_ads_entries = ads_entries; - new_entry = &inode->ads_entries[num_ads - 1]; - if (init_ads_entry(new_entry, stream_name) != 0) + new_entry = &inode->i_ads_entries[num_ads - 1]; + if (init_ads_entry(new_entry, stream_name, stream_name_nbytes, is_utf16le)) return NULL; -#ifdef WITH_FUSE - new_entry->stream_id = inode->next_stream_id++; -#endif - inode->num_ads = num_ads; + new_entry->stream_id = inode->i_next_stream_id++; + inode->i_num_ads = num_ads; return new_entry; } -#endif -#ifdef WITH_FUSE -/* Remove an alternate data stream from the inode */ -void inode_remove_ads(struct inode *inode, u16 idx, - struct lookup_table *lookup_table) +struct wim_ads_entry * +inode_add_ads_utf16le(struct wim_inode *inode, + const utf16lechar *stream_name, + size_t stream_name_nbytes) { - struct ads_entry *ads_entry; - struct lookup_table_entry *lte; + DEBUG("Add alternate data stream \"%"WS"\"", stream_name); + return do_inode_add_ads(inode, stream_name, stream_name_nbytes, true); +} + +/* + * Add an alternate stream entry to a WIM inode. On success, returns a pointer + * to the new entry; on failure, returns NULL. + * + * @stream_name must be a nonempty string. + */ +struct wim_ads_entry * +inode_add_ads(struct wim_inode *inode, const tchar *stream_name) +{ + DEBUG("Add alternate data stream \"%"TS"\"", stream_name); + return do_inode_add_ads(inode, stream_name, + tstrlen(stream_name) * sizeof(tchar), + TCHAR_IS_UTF16LE); +} - wimlib_assert(idx < inode->num_ads); - wimlib_assert(inode->resolved); +static struct wim_lookup_table_entry * +add_stream_from_data_buffer(const void *buffer, size_t size, + struct wim_lookup_table *lookup_table) +{ + u8 hash[SHA1_HASH_SIZE]; + struct wim_lookup_table_entry *lte, *existing_lte; + + sha1_buffer(buffer, size, hash); + existing_lte = lookup_resource(lookup_table, hash); + if (existing_lte) { + wimlib_assert(existing_lte->size == size); + lte = existing_lte; + lte->refcnt++; + } else { + void *buffer_copy; + lte = new_lookup_table_entry(); + if (lte == NULL) + return NULL; + buffer_copy = memdup(buffer, size); + if (buffer_copy == NULL) { + free_lookup_table_entry(lte); + return NULL; + } + lte->resource_location = RESOURCE_IN_ATTACHED_BUFFER; + lte->attached_buffer = buffer_copy; + lte->size = size; + copy_hash(lte->hash, hash); + lookup_table_insert(lookup_table, lte); + } + return lte; +} + +int +inode_add_ads_with_data(struct wim_inode *inode, const tchar *name, + const void *value, size_t size, + struct wim_lookup_table *lookup_table) +{ + struct wim_ads_entry *new_ads_entry; + + wimlib_assert(inode->i_resolved); + + new_ads_entry = inode_add_ads(inode, name); + if (new_ads_entry == NULL) + return WIMLIB_ERR_NOMEM; + + new_ads_entry->lte = add_stream_from_data_buffer(value, size, + lookup_table); + if (new_ads_entry->lte == NULL) { + inode_remove_ads(inode, new_ads_entry - inode->i_ads_entries, + lookup_table); + return WIMLIB_ERR_NOMEM; + } + return 0; +} + +bool +inode_has_named_stream(const struct wim_inode *inode) +{ + for (u16 i = 0; i < inode->i_num_ads; i++) + if (ads_entry_is_named_stream(&inode->i_ads_entries[i])) + return true; + return false; +} + +/* Set the unnamed stream of a WIM inode, given a data buffer containing the + * stream contents. */ +int +inode_set_unnamed_stream(struct wim_inode *inode, const void *data, size_t len, + struct wim_lookup_table *lookup_table) +{ + inode->i_lte = add_stream_from_data_buffer(data, len, lookup_table); + if (inode->i_lte == NULL) + return WIMLIB_ERR_NOMEM; + inode->i_resolved = 1; + return 0; +} + +/* Remove an alternate data stream from a WIM inode */ +void +inode_remove_ads(struct wim_inode *inode, u16 idx, + struct wim_lookup_table *lookup_table) +{ + struct wim_ads_entry *ads_entry; + struct wim_lookup_table_entry *lte; - ads_entry = &inode->ads_entries[idx]; + wimlib_assert(idx < inode->i_num_ads); + wimlib_assert(inode->i_resolved); - DEBUG("Remove alternate data stream \"%s\"", ads_entry->stream_name_utf8); + ads_entry = &inode->i_ads_entries[idx]; + + DEBUG("Remove alternate data stream \"%"WS"\"", ads_entry->stream_name); lte = ads_entry->lte; if (lte) @@ -1349,293 +1721,367 @@ void inode_remove_ads(struct inode *inode, u16 idx, destroy_ads_entry(ads_entry); - memcpy(&inode->ads_entries[idx], - &inode->ads_entries[idx + 1], - (inode->num_ads - idx - 1) * sizeof(inode->ads_entries[0])); - inode->num_ads--; + memmove(&inode->i_ads_entries[idx], + &inode->i_ads_entries[idx + 1], + (inode->i_num_ads - idx - 1) * sizeof(inode->i_ads_entries[0])); + inode->i_num_ads--; } -#endif +bool +inode_has_unix_data(const struct wim_inode *inode) +{ + for (u16 i = 0; i < inode->i_num_ads; i++) + if (ads_entry_is_unix_data(&inode->i_ads_entries[i])) + return true; + return false; +} + +#ifndef __WIN32__ +int +inode_get_unix_data(const struct wim_inode *inode, + struct wimlib_unix_data *unix_data, + u16 *stream_idx_ret) +{ + const struct wim_ads_entry *ads_entry; + const struct wim_lookup_table_entry *lte; + size_t size; + int ret; + + wimlib_assert(inode->i_resolved); + + ads_entry = inode_get_ads_entry((struct wim_inode*)inode, + WIMLIB_UNIX_DATA_TAG, NULL); + if (ads_entry == NULL) + return NO_UNIX_DATA; + + if (stream_idx_ret) + *stream_idx_ret = ads_entry - inode->i_ads_entries; + + lte = ads_entry->lte; + if (lte == NULL) + return NO_UNIX_DATA; + + size = lte->size; + if (size != sizeof(struct wimlib_unix_data)) + return BAD_UNIX_DATA; + + ret = read_full_stream_into_buf(lte, unix_data); + if (ret) + return ret; + + if (unix_data->version != 0) + return BAD_UNIX_DATA; + return 0; +} +int +inode_set_unix_data(struct wim_inode *inode, uid_t uid, gid_t gid, mode_t mode, + struct wim_lookup_table *lookup_table, int which) +{ + struct wimlib_unix_data unix_data; + int ret; + bool have_good_unix_data = false; + bool have_unix_data = false; + u16 stream_idx; + + if (!(which & UNIX_DATA_CREATE)) { + ret = inode_get_unix_data(inode, &unix_data, &stream_idx); + if (ret == 0 || ret == BAD_UNIX_DATA || ret > 0) + have_unix_data = true; + if (ret == 0) + have_good_unix_data = true; + } + unix_data.version = 0; + if (which & UNIX_DATA_UID || !have_good_unix_data) + unix_data.uid = uid; + if (which & UNIX_DATA_GID || !have_good_unix_data) + unix_data.gid = gid; + if (which & UNIX_DATA_MODE || !have_good_unix_data) + unix_data.mode = mode; + ret = inode_add_ads_with_data(inode, WIMLIB_UNIX_DATA_TAG, + &unix_data, + sizeof(struct wimlib_unix_data), + lookup_table); + if (ret == 0 && have_unix_data) + inode_remove_ads(inode, stream_idx, lookup_table); + return ret; +} +#endif /* !__WIN32__ */ /* - * Reads the alternate data stream entries for a dentry. - * - * @p: Pointer to buffer that starts with the first alternate stream entry. - * - * @inode: Inode to load the alternate data streams into. - * @inode->num_ads must have been set to the number of - * alternate data streams that are expected. + * Reads the alternate data stream entries of a WIM dentry. * - * @remaining_size: Number of bytes of data remaining in the buffer pointed - * to by @p. + * @p: + * Pointer to buffer that starts with the first alternate stream entry. * - * The format of the on-disk alternate stream entries is as follows: + * @inode: + * Inode to load the alternate data streams into. @inode->i_num_ads must + * have been set to the number of alternate data streams that are expected. * - * struct ads_entry_on_disk { - * u64 length; // Length of the entry, in bytes. This includes - * all fields (including the stream name and - * null terminator if present, AND the padding!). - * u64 reserved; // Seems to be unused - * u8 hash[20]; // SHA1 message digest of the uncompressed stream - * u16 stream_name_len; // Length of the stream name, in bytes - * char stream_name[]; // Stream name in UTF-16LE, @stream_name_len bytes long, - * not including null terminator - * u16 zero; // UTF-16 null terminator for the stream name, NOT - * included in @stream_name_len. Based on what - * I've observed from filenames in dentries, - * this field should not exist when - * (@stream_name_len == 0), but you can't - * actually tell because of the padding anyway - * (provided that the padding is zeroed, which - * it always seems to be). - * char padding[]; // Padding to make the size a multiple of 8 bytes. - * }; + * @remaining_size: + * Number of bytes of data remaining in the buffer pointed to by @p. * - * In addition, the entries are 8-byte aligned. + * On success, inode->i_ads_entries is set to an array of `struct + * wim_ads_entry's of length inode->i_num_ads. On failure, @inode is not + * modified. * - * Return 0 on success or nonzero on failure. On success, inode->ads_entries - * is set to an array of `struct ads_entry's of length inode->num_ads. On - * failure, @inode is not modified. + * Return values: + * WIMLIB_ERR_SUCCESS (0) + * WIMLIB_ERR_INVALID_METADATA_RESOURCE + * WIMLIB_ERR_NOMEM */ -static int read_ads_entries(const u8 *p, struct inode *inode, - u64 remaining_size) +static int +read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode, + size_t nbytes_remaining) { u16 num_ads; - struct ads_entry *ads_entries; + struct wim_ads_entry *ads_entries; int ret; - num_ads = inode->num_ads; - ads_entries = CALLOC(num_ads, sizeof(inode->ads_entries[0])); - if (!ads_entries) { - ERROR("Could not allocate memory for %"PRIu16" " - "alternate data stream entries", num_ads); - return WIMLIB_ERR_NOMEM; - } + BUILD_BUG_ON(sizeof(struct wim_ads_entry_on_disk) != WIM_ADS_ENTRY_DISK_SIZE); + + /* Allocate an array for our in-memory representation of the alternate + * data stream entries. */ + num_ads = inode->i_num_ads; + ads_entries = CALLOC(num_ads, sizeof(inode->i_ads_entries[0])); + if (ads_entries == NULL) + goto out_of_memory; + /* Read the entries into our newly allocated buffer. */ for (u16 i = 0; i < num_ads; i++) { - struct ads_entry *cur_entry; u64 length; - u64 length_no_padding; - u64 total_length; - size_t utf8_len; - const u8 *p_save = p; + struct wim_ads_entry *cur_entry; + const struct wim_ads_entry_on_disk *disk_entry = + (const struct wim_ads_entry_on_disk*)p; cur_entry = &ads_entries[i]; - - #ifdef WITH_FUSE ads_entries[i].stream_id = i + 1; - #endif - - /* Read the base stream entry, excluding the stream name. */ - if (remaining_size < WIM_ADS_ENTRY_DISK_SIZE) { - ERROR("Stream entries go past end of metadata resource"); - ERROR("(remaining_size = %"PRIu64")", remaining_size); - ret = WIMLIB_ERR_INVALID_DENTRY; - goto out_free_ads_entries; - } - - p = get_u64(p, &length); - p += 8; /* Skip the reserved field */ - p = get_bytes(p, SHA1_HASH_SIZE, (u8*)cur_entry->hash); - p = get_u16(p, &cur_entry->stream_name_len); - - cur_entry->stream_name = NULL; - cur_entry->stream_name_utf8 = NULL; - /* Length including neither the null terminator nor the padding - * */ - length_no_padding = WIM_ADS_ENTRY_DISK_SIZE + - cur_entry->stream_name_len; - - /* Length including the null terminator and the padding */ - total_length = ((length_no_padding + 2) + 7) & ~7; - - wimlib_assert(total_length == ads_entry_total_length(cur_entry)); - - if (remaining_size < length_no_padding) { - ERROR("Stream entries go past end of metadata resource"); - ERROR("(remaining_size = %"PRIu64" bytes, " - "length_no_padding = %"PRIu64" bytes)", - remaining_size, length_no_padding); - ret = WIMLIB_ERR_INVALID_DENTRY; - goto out_free_ads_entries; - } - - /* The @length field in the on-disk ADS entry is expected to be - * equal to @total_length, which includes all of the entry and - * the padding that follows it to align the next ADS entry to an - * 8-byte boundary. However, to be safe, we'll accept the - * length field as long as it's not less than the un-padded - * total length and not more than the padded total length. */ - if (length < length_no_padding || length > total_length) { - ERROR("Stream entry has unexpected length " - "field (length field = %"PRIu64", " - "unpadded total length = %"PRIu64", " - "padded total length = %"PRIu64")", - length, length_no_padding, total_length); - ret = WIMLIB_ERR_INVALID_DENTRY; - goto out_free_ads_entries; + /* Do we have at least the size of the fixed-length data we know + * need? */ + if (nbytes_remaining < sizeof(struct wim_ads_entry_on_disk)) + goto out_invalid; + + /* Read the length field */ + length = le64_to_cpu(disk_entry->length); + + /* Make sure the length field is neither so small it doesn't + * include all the fixed-length data nor so large it overflows + * the metadata resource buffer. */ + if (length < sizeof(struct wim_ads_entry_on_disk) || + length > nbytes_remaining) + goto out_invalid; + + /* Read the rest of the fixed-length data. */ + + cur_entry->reserved = le64_to_cpu(disk_entry->reserved); + copy_hash(cur_entry->hash, disk_entry->hash); + cur_entry->stream_name_nbytes = le16_to_cpu(disk_entry->stream_name_nbytes); + + /* If stream_name_nbytes != 0, this is a named stream. + * Otherwise this is an unnamed stream, or in some cases (bugs + * in Microsoft's software I guess) a meaningless entry + * distinguished from the real unnamed stream entry, if any, by + * the fact that the real unnamed stream entry has a nonzero + * hash field. */ + if (cur_entry->stream_name_nbytes) { + /* The name is encoded in UTF16-LE, which uses 2-byte + * coding units, so the length of the name had better be + * an even number of bytes... */ + if (cur_entry->stream_name_nbytes & 1) + goto out_invalid; + + /* Add the length of the stream name to get the length + * we actually need to read. Make sure this isn't more + * than the specified length of the entry. */ + if (sizeof(struct wim_ads_entry_on_disk) + + cur_entry->stream_name_nbytes > length) + goto out_invalid; + + cur_entry->stream_name = MALLOC(cur_entry->stream_name_nbytes + 2); + if (cur_entry->stream_name == NULL) + goto out_of_memory; + + memcpy(cur_entry->stream_name, + disk_entry->stream_name, + cur_entry->stream_name_nbytes); + cur_entry->stream_name[cur_entry->stream_name_nbytes / 2] = cpu_to_le16(0); + } else { + /* Mark inode as having weird stream entries. */ + inode->i_canonical_streams = 0; } - if (cur_entry->stream_name_len) { - cur_entry->stream_name = MALLOC(cur_entry->stream_name_len); - if (!cur_entry->stream_name) { - ret = WIMLIB_ERR_NOMEM; - goto out_free_ads_entries; - } - get_bytes(p, cur_entry->stream_name_len, - (u8*)cur_entry->stream_name); - cur_entry->stream_name_utf8 = utf16_to_utf8(cur_entry->stream_name, - cur_entry->stream_name_len, - &utf8_len); - cur_entry->stream_name_utf8_len = utf8_len; - - if (!cur_entry->stream_name_utf8) { - ret = WIMLIB_ERR_NOMEM; - goto out_free_ads_entries; - } - } /* It's expected that the size of every ADS entry is a multiple * of 8. However, to be safe, I'm allowing the possibility of * an ADS entry at the very end of the metadata resource ending * un-aligned. So although we still need to increment the input - * pointer by @total_length to reach the next ADS entry, it's - * possible that less than @total_length is actually remaining - * in the metadata resource. We should set the remaining size to - * 0 bytes if this happens. */ - p = p_save + total_length; - if (remaining_size < total_length) - remaining_size = 0; + * pointer by @length to reach the next ADS entry, it's possible + * that less than @length is actually remaining in the metadata + * resource. We should set the remaining bytes to 0 if this + * happens. */ + length = (length + 7) & ~(u64)7; + p += length; + if (nbytes_remaining < length) + nbytes_remaining = 0; else - remaining_size -= total_length; + nbytes_remaining -= length; } - inode->ads_entries = ads_entries; -#ifdef WITH_FUSE - inode->next_stream_id = inode->num_ads + 1; -#endif - return 0; + inode->i_ads_entries = ads_entries; + inode->i_next_stream_id = inode->i_num_ads + 1; + ret = 0; + goto out; +out_of_memory: + ret = WIMLIB_ERR_NOMEM; + goto out_free_ads_entries; +out_invalid: + ERROR("An alternate data stream entry is invalid"); + ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; out_free_ads_entries: - for (u16 i = 0; i < num_ads; i++) - destroy_ads_entry(&ads_entries[i]); - FREE(ads_entries); + if (ads_entries) { + for (u16 i = 0; i < num_ads; i++) + destroy_ads_entry(&ads_entries[i]); + FREE(ads_entries); + } +out: return ret; } /* - * Reads a directory entry, including all alternate data stream entries that + * Reads a WIM directory entry, including all alternate data stream entries that * follow it, from the WIM image's metadata resource. * - * @metadata_resource: Buffer containing the uncompressed metadata resource. - * @metadata_resource_len: Length of the metadata resource. - * @offset: Offset of this directory entry in the metadata resource. - * @dentry: A `struct dentry' that will be filled in by this function. + * @metadata_resource: + * Pointer to the metadata resource buffer. * - * Return 0 on success or nonzero on failure. On failure, @dentry have been - * modified, bu it will be left with no pointers to any allocated buffers. - * On success, the dentry->length field must be examined. If zero, this was a - * special "end of directory" dentry and not a real dentry. If nonzero, this - * was a real dentry. + * @metadata_resource_len: + * Length of the metadata resource buffer, in bytes. + * + * @offset: Offset of the dentry within the metadata resource. + * + * @dentry: A `struct wim_dentry' that will be filled in by this function. + * + * Return 0 on success or nonzero on failure. On failure, @dentry will have + * been modified, but it will not be left with pointers to any allocated + * buffers. On success, the dentry->length field must be examined. If zero, + * this was a special "end of directory" dentry and not a real dentry. If + * nonzero, this was a real dentry. + * + * Return values: + * WIMLIB_ERR_SUCCESS (0) + * WIMLIB_ERR_INVALID_METADATA_RESOURCE + * WIMLIB_ERR_NOMEM */ -int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len, - u64 offset, struct dentry *dentry) +int +read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len, + u64 offset, struct wim_dentry * restrict dentry) { - const u8 *p; + u64 calculated_size; - char *file_name = NULL; - char *file_name_utf8 = NULL; - char *short_name = NULL; - u16 short_name_len; - u16 file_name_len; - size_t file_name_utf8_len = 0; + utf16lechar *file_name; + utf16lechar *short_name; + u16 short_name_nbytes; + u16 file_name_nbytes; int ret; - struct inode *inode = NULL; + struct wim_inode *inode; + const u8 *p = &metadata_resource[offset]; + const struct wim_dentry_on_disk *disk_dentry = + (const struct wim_dentry_on_disk*)p; + + BUILD_BUG_ON(sizeof(struct wim_dentry_on_disk) != WIM_DENTRY_DISK_SIZE); + + if ((uintptr_t)p & 7) + WARNING("WIM dentry is not 8-byte aligned"); dentry_common_init(dentry); - /*Make sure the dentry really fits into the metadata resource.*/ - if (offset + 8 > metadata_resource_len || offset + 8 < offset) { + /* Before reading the whole dentry, we need to read just the length. + * This is because a dentry of length 8 (that is, just the length field) + * terminates the list of sibling directory entries. */ + if (offset + sizeof(u64) > metadata_resource_len || + offset + sizeof(u64) < offset) + { ERROR("Directory entry starting at %"PRIu64" ends past the " "end of the metadata resource (size %"PRIu64")", offset, metadata_resource_len); - return WIMLIB_ERR_INVALID_DENTRY; + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; } - - /* Before reading the whole dentry, we need to read just the length. - * This is because a dentry of length 8 (that is, just the length field) - * terminates the list of sibling directory entries. */ - - p = get_u64(&metadata_resource[offset], &dentry->length); + dentry->length = le64_to_cpu(disk_dentry->length); /* A zero length field (really a length of 8, since that's how big the * directory entry is...) indicates that this is the end of directory * dentry. We do not read it into memory as an actual dentry, so just - * return successfully in that case. */ + * return successfully in this case. */ + if (dentry->length == 8) + dentry->length = 0; if (dentry->length == 0) return 0; - /* If the dentry does not overflow the metadata resource buffer and is - * not too short, read the rest of it (excluding the alternate data - * streams, but including the file name and short name variable-length - * fields) into memory. */ - if (offset + dentry->length >= metadata_resource_len - || offset + dentry->length < offset) + /* Now that we have the actual length provided in the on-disk structure, + * again make sure it doesn't overflow the metadata resource buffer. */ + if (offset + dentry->length > metadata_resource_len || + offset + dentry->length < offset) { ERROR("Directory entry at offset %"PRIu64" and with size " "%"PRIu64" ends past the end of the metadata resource " "(size %"PRIu64")", offset, dentry->length, metadata_resource_len); - return WIMLIB_ERR_INVALID_DENTRY; + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; } - if (dentry->length < WIM_DENTRY_DISK_SIZE) { + /* Make sure the dentry length is at least as large as the number of + * fixed-length fields */ + if (dentry->length < sizeof(struct wim_dentry_on_disk)) { ERROR("Directory entry has invalid length of %"PRIu64" bytes", dentry->length); - return WIMLIB_ERR_INVALID_DENTRY; + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; } + /* Allocate a `struct wim_inode' for this `struct wim_dentry'. */ inode = new_timeless_inode(); - if (!inode) + if (inode == NULL) return WIMLIB_ERR_NOMEM; - p = get_u32(p, &inode->attributes); - p = get_u32(p, (u32*)&inode->security_id); - p = get_u64(p, &dentry->subdir_offset); - - /* 2 unused fields */ - p += 2 * sizeof(u64); - /*p = get_u64(p, &dentry->unused1);*/ - /*p = get_u64(p, &dentry->unused2);*/ + /* Read more fields; some into the dentry, and some into the inode. */ - p = get_u64(p, &inode->creation_time); - p = get_u64(p, &inode->last_access_time); - p = get_u64(p, &inode->last_write_time); + inode->i_attributes = le32_to_cpu(disk_dentry->attributes); + inode->i_security_id = le32_to_cpu(disk_dentry->security_id); + dentry->subdir_offset = le64_to_cpu(disk_dentry->subdir_offset); + dentry->d_unused_1 = le64_to_cpu(disk_dentry->unused_1); + dentry->d_unused_2 = le64_to_cpu(disk_dentry->unused_2); + inode->i_creation_time = le64_to_cpu(disk_dentry->creation_time); + inode->i_last_access_time = le64_to_cpu(disk_dentry->last_access_time); + inode->i_last_write_time = le64_to_cpu(disk_dentry->last_write_time); + copy_hash(inode->i_hash, disk_dentry->unnamed_stream_hash); - p = get_bytes(p, SHA1_HASH_SIZE, inode->hash); - - /* - * I don't know what's going on here. It seems like M$ screwed up the + /* I don't know what's going on here. It seems like M$ screwed up the * reparse points, then put the fields in the same place and didn't - * document it. The WIM_HDR_FLAG_RP_FIX flag in the WIM header might - * have something to do with this, but it's not documented. - */ - if (inode->attributes & FILE_ATTRIBUTE_REPARSE_POINT) { - /* ??? */ - p += 4; - p = get_u32(p, &inode->reparse_tag); - p += 4; + * document it. So we have some fields we read for reparse points, and + * some fields in the same place for non-reparse-point.s */ + if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { + inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->reparse.rp_unknown_1); + inode->i_reparse_tag = le32_to_cpu(disk_dentry->reparse.reparse_tag); + inode->i_rp_unknown_2 = le16_to_cpu(disk_dentry->reparse.rp_unknown_2); + inode->i_not_rpfixed = le16_to_cpu(disk_dentry->reparse.not_rpfixed); + /* Leave inode->i_ino at 0. Note that this means the WIM file + * cannot archive hard-linked reparse points. Such a thing + * doesn't really make sense anyway, although I believe it's + * theoretically possible to have them on NTFS. */ } else { - p = get_u32(p, &inode->reparse_tag); - p = get_u64(p, &inode->ino); + inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->nonreparse.rp_unknown_1); + inode->i_ino = le64_to_cpu(disk_dentry->nonreparse.hard_link_group_id); } - /* By the way, the reparse_reserved field does not actually exist (at - * least when the file is not a reparse point) */ + inode->i_num_ads = le16_to_cpu(disk_dentry->num_alternate_data_streams); - p = get_u16(p, &inode->num_ads); + short_name_nbytes = le16_to_cpu(disk_dentry->short_name_nbytes); + file_name_nbytes = le16_to_cpu(disk_dentry->file_name_nbytes); - p = get_u16(p, &short_name_len); - p = get_u16(p, &file_name_len); + if ((short_name_nbytes & 1) | (file_name_nbytes & 1)) + { + ERROR("Dentry name is not valid UTF-16LE (odd number of bytes)!"); + ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; + goto out_free_inode; + } /* We now know the length of the file name and short name. Make sure * the length of the dentry is large enough to actually hold them. @@ -1643,180 +2089,139 @@ int read_dentry(const u8 metadata_resource[], u64 metadata_resource_len, * The calculated length here is unaligned to allow for the possibility * that the dentry->length names an unaligned length, although this * would be unexpected. */ - calculated_size = __dentry_correct_length_unaligned(file_name_len, - short_name_len); + calculated_size = dentry_correct_length_unaligned(file_name_nbytes, + short_name_nbytes); if (dentry->length < calculated_size) { ERROR("Unexpected end of directory entry! (Expected " - "at least %"PRIu64" bytes, got %"PRIu64" bytes. " - "short_name_len = %hu, file_name_len = %hu)", - calculated_size, dentry->length, - short_name_len, file_name_len); - ret = WIMLIB_ERR_INVALID_DENTRY; + "at least %"PRIu64" bytes, got %"PRIu64" bytes.)", + calculated_size, dentry->length); + ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; goto out_free_inode; } + p += sizeof(struct wim_dentry_on_disk); + /* Read the filename if present. Note: if the filename is empty, there * is no null terminator following it. */ - if (file_name_len) { - file_name = MALLOC(file_name_len); - if (!file_name) { - ERROR("Failed to allocate %hu bytes for dentry file name", - file_name_len); + if (file_name_nbytes) { + file_name = MALLOC(file_name_nbytes + 2); + if (file_name == NULL) { + ERROR("Failed to allocate %d bytes for dentry file name", + file_name_nbytes + 2); ret = WIMLIB_ERR_NOMEM; goto out_free_inode; } - p = get_bytes(p, file_name_len, file_name); - - /* Convert filename to UTF-8. */ - file_name_utf8 = utf16_to_utf8(file_name, file_name_len, - &file_name_utf8_len); - - if (!file_name_utf8) { - ERROR("Failed to allocate memory to convert UTF-16 " - "filename (%hu bytes) to UTF-8", file_name_len); - ret = WIMLIB_ERR_NOMEM; - goto out_free_file_name; - } - if (*(u16*)p) - WARNING("Expected two zero bytes following the file name " - "`%s', but found non-zero bytes", file_name_utf8); - p += 2; + memcpy(file_name, p, file_name_nbytes); + p += file_name_nbytes + 2; + file_name[file_name_nbytes / 2] = cpu_to_le16(0); + } else { + file_name = NULL; } - /* Align the calculated size */ - calculated_size = (calculated_size + 7) & ~7; - - if (dentry->length > calculated_size) { - /* Weird; the dentry says it's longer than it should be. Note - * that the length field does NOT include the size of the - * alternate stream entries. */ - - /* Strangely, some directory entries inexplicably have a little - * over 70 bytes of extra data. The exact amount of data seems - * to be 72 bytes, but it is aligned on the next 8-byte - * boundary. It does NOT seem to be alternate data stream - * entries. Here's an example of the aligned data: - * - * 01000000 40000000 6c786bba c58ede11 b0bb0026 1870892a b6adb76f - * e63a3e46 8fca8653 0d2effa1 6c786bba c58ede11 b0bb0026 1870892a - * 00000000 00000000 00000000 00000000 - * - * Here's one interpretation of how the data is laid out. - * - * struct unknown { - * u32 field1; (always 0x00000001) - * u32 field2; (always 0x40000000) - * u8 data[48]; (???) - * u64 reserved1; (always 0) - * u64 reserved2; (always 0) - * };*/ - DEBUG("Dentry for file or directory `%s' has %zu extra " - "bytes of data", - file_name_utf8, dentry->length - calculated_size); - } /* Read the short filename if present. Note: if there is no short * filename, there is no null terminator following it. */ - if (short_name_len) { - short_name = MALLOC(short_name_len); - if (!short_name) { - ERROR("Failed to allocate %hu bytes for short filename", - short_name_len); + if (short_name_nbytes) { + short_name = MALLOC(short_name_nbytes + 2); + if (short_name == NULL) { + ERROR("Failed to allocate %d bytes for dentry short name", + short_name_nbytes + 2); ret = WIMLIB_ERR_NOMEM; - goto out_free_file_name_utf8; + goto out_free_file_name; } - - p = get_bytes(p, short_name_len, short_name); - if (*(u16*)p) - WARNING("Expected two zero bytes following the short name of " - "`%s', but found non-zero bytes", file_name_utf8); - p += 2; + memcpy(short_name, p, short_name_nbytes); + p += short_name_nbytes + 2; + short_name[short_name_nbytes / 2] = cpu_to_le16(0); + } else { + short_name = NULL; } + /* Align the dentry length */ + dentry->length = (dentry->length + 7) & ~7; + /* * Read the alternate data streams, if present. dentry->num_ads tells * us how many they are, and they will directly follow the dentry * on-disk. * * Note that each alternate data stream entry begins on an 8-byte - * aligned boundary, and the alternate data stream entries are NOT - * included in the dentry->length field for some reason. + * aligned boundary, and the alternate data stream entries seem to NOT + * be included in the dentry->length field for some reason. */ - if (inode->num_ads != 0) { - - /* Trying different lengths is just a hack to make sure we have - * a chance of reading the ADS entries correctly despite the - * poor documentation. */ - - if (calculated_size != dentry->length) { - WARNING("Trying calculated dentry length (%"PRIu64") " - "instead of dentry->length field (%"PRIu64") " - "to read ADS entries", - calculated_size, dentry->length); - } - u64 lengths_to_try[3] = {calculated_size, - (dentry->length + 7) & ~7, - dentry->length}; - ret = WIMLIB_ERR_INVALID_DENTRY; - for (size_t i = 0; i < ARRAY_LEN(lengths_to_try); i++) { - if (lengths_to_try[i] > metadata_resource_len - offset) - continue; - ret = read_ads_entries(&metadata_resource[offset + lengths_to_try[i]], - inode, - metadata_resource_len - offset - lengths_to_try[i]); - if (ret == 0) - goto out; + if (inode->i_num_ads != 0) { + ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; + if (offset + dentry->length > metadata_resource_len || + (ret = read_ads_entries(&metadata_resource[offset + dentry->length], + inode, + metadata_resource_len - offset - dentry->length))) + { + ERROR("Failed to read alternate data stream " + "entries of WIM dentry \"%"WS"\"", file_name); + goto out_free_short_name; } - ERROR("Failed to read alternate data stream " - "entries of `%s'", dentry->file_name_utf8); - goto out_free_short_name; } -out: - /* We've read all the data for this dentry. Set the names and their * lengths, and we've done. */ - dentry->d_inode = inode; - dentry->file_name = file_name; - dentry->file_name_utf8 = file_name_utf8; - dentry->short_name = short_name; - dentry->file_name_len = file_name_len; - dentry->file_name_utf8_len = file_name_utf8_len; - dentry->short_name_len = short_name_len; - return 0; + dentry->d_inode = inode; + dentry->file_name = file_name; + dentry->short_name = short_name; + dentry->file_name_nbytes = file_name_nbytes; + dentry->short_name_nbytes = short_name_nbytes; + ret = 0; + goto out; out_free_short_name: FREE(short_name); -out_free_file_name_utf8: - FREE(file_name_utf8); out_free_file_name: FREE(file_name); out_free_inode: free_inode(inode); +out: return ret; } +static const tchar * +dentry_get_file_type_string(const struct wim_dentry *dentry) +{ + const struct wim_inode *inode = dentry->d_inode; + if (inode_is_directory(inode)) + return T("directory"); + else if (inode_is_symlink(inode)) + return T("symbolic link"); + else + return T("file"); +} + /* Reads the children of a dentry, and all their children, ..., etc. from the * metadata resource and into the dentry tree. * - * @metadata_resource: An array that contains the uncompressed metadata - * resource for the WIM file. + * @metadata_resource: + * An array that contains the uncompressed metadata resource for the WIM + * file. * - * @metadata_resource_len: The length of the uncompressed metadata resource, in - * bytes. + * @metadata_resource_len: + * The length of the uncompressed metadata resource, in bytes. * - * @dentry: A pointer to a `struct dentry' that is the root of the directory - * tree and has already been read from the metadata resource. It - * does not need to be the real root because this procedure is - * called recursively. + * @dentry: + * A pointer to a `struct wim_dentry' that is the root of the directory + * tree and has already been read from the metadata resource. It does not + * need to be the real root because this procedure is called recursively. * - * @return: Zero on success, nonzero on failure. + * Return values: + * WIMLIB_ERR_SUCCESS (0) + * WIMLIB_ERR_INVALID_METADATA_RESOURCE + * WIMLIB_ERR_NOMEM */ -int read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, - struct dentry *dentry) +int +read_dentry_tree(const u8 * restrict metadata_resource, + u64 metadata_resource_len, + struct wim_dentry * restrict dentry) { u64 cur_offset = dentry->subdir_offset; - struct dentry *child; - struct dentry cur_child; + struct wim_dentry *child; + struct wim_dentry *duplicate; + struct wim_dentry *parent; + struct wim_dentry cur_child; int ret; /* @@ -1828,13 +2233,25 @@ int read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, if (cur_offset == 0) return 0; + /* Check for cyclic directory structure */ + for (parent = dentry->parent; !dentry_is_root(parent); parent = parent->parent) + { + if (unlikely(parent->subdir_offset == cur_offset)) { + ERROR("Cyclic directory structure directed: children " + "of \"%"TS"\" coincide with children of \"%"TS"\"", + dentry_full_path(dentry), + dentry_full_path(parent)); + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; + } + } + /* Find and read all the children of @dentry. */ - while (1) { + for (;;) { /* Read next child of @dentry into @cur_child. */ ret = read_dentry(metadata_resource, metadata_resource_len, cur_offset, &cur_child); - if (ret != 0) + if (ret) break; /* Check for end of directory. */ @@ -1843,128 +2260,196 @@ int read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, /* Not end of directory. Allocate this child permanently and * link it to the parent and previous child. */ - child = MALLOC(sizeof(struct dentry)); - if (!child) { - ERROR("Failed to allocate %zu bytes for new dentry", - sizeof(struct dentry)); + child = memdup(&cur_child, sizeof(struct wim_dentry)); + if (child == NULL) { + ERROR("Failed to allocate new dentry!"); ret = WIMLIB_ERR_NOMEM; break; } - memcpy(child, &cur_child, sizeof(struct dentry)); - dentry_add_child(dentry, child); + /* Advance to the offset of the next child. Note: We need to + * advance by the TOTAL length of the dentry, not by the length + * cur_child.length, which although it does take into account + * the padding, it DOES NOT take into account alternate stream + * entries. */ + cur_offset += dentry_in_total_length(child); + + if (unlikely(!dentry_has_long_name(child))) { + WARNING("Ignoring unnamed dentry in " + "directory \"%"TS"\"", + dentry_full_path(dentry)); + free_dentry(child); + continue; + } - inode_add_dentry(child, child->d_inode); + duplicate = dentry_add_child(dentry, child); + if (unlikely(duplicate)) { + const tchar *child_type, *duplicate_type; + child_type = dentry_get_file_type_string(child); + duplicate_type = dentry_get_file_type_string(duplicate); + WARNING("Ignoring duplicate %"TS" \"%"TS"\" " + "(the WIM image already contains a %"TS" " + "at that path with the exact same name)", + child_type, dentry_full_path(duplicate), + duplicate_type); + free_dentry(child); + continue; + } - /* If there are children of this child, call this procedure - * recursively. */ + inode_add_dentry(child, child->d_inode); + /* If there are children of this child, call this + * procedure recursively. */ if (child->subdir_offset != 0) { - ret = read_dentry_tree(metadata_resource, - metadata_resource_len, child); - if (ret != 0) - break; + if (likely(dentry_is_directory(child))) { + ret = read_dentry_tree(metadata_resource, + metadata_resource_len, + child); + if (ret) + break; + } else { + WARNING("Ignoring children of non-directory \"%"TS"\"", + dentry_full_path(child)); + } } - - /* Advance to the offset of the next child. Note: We need to - * advance by the TOTAL length of the dentry, not by the length - * child->length, which although it does take into account the - * padding, it DOES NOT take into account alternate stream - * entries. */ - cur_offset += dentry_total_length(child); } return ret; } +/* + * Writes a WIM alternate data stream (ADS) entry to an output buffer. + * + * @ads_entry: The ADS entry structure. + * @hash: The hash field to use (instead of the one in the ADS entry). + * @p: The memory location to write the data to. + * + * Returns a pointer to the byte after the last byte written. + */ +static u8 * +write_ads_entry(const struct wim_ads_entry *ads_entry, + const u8 *hash, u8 * restrict p) +{ + struct wim_ads_entry_on_disk *disk_ads_entry = + (struct wim_ads_entry_on_disk*)p; + u8 *orig_p = p; + + disk_ads_entry->reserved = cpu_to_le64(ads_entry->reserved); + copy_hash(disk_ads_entry->hash, hash); + disk_ads_entry->stream_name_nbytes = cpu_to_le16(ads_entry->stream_name_nbytes); + p += sizeof(struct wim_ads_entry_on_disk); + if (ads_entry->stream_name_nbytes) { + p = mempcpy(p, ads_entry->stream_name, + ads_entry->stream_name_nbytes + 2); + } + /* Align to 8-byte boundary */ + while ((uintptr_t)p & 7) + *p++ = 0; + disk_ads_entry->length = cpu_to_le64(p - orig_p); + return p; +} + /* * Writes a WIM dentry to an output buffer. * * @dentry: The dentry structure. * @p: The memory location to write the data to. - * @return: Pointer to the byte after the last byte we wrote as part of the - * dentry. + * + * Returns the pointer to the byte after the last byte we wrote as part of the + * dentry, including any alternate data stream entries. */ -static u8 *write_dentry(const struct dentry *dentry, u8 *p) +static u8 * +write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p) { - u8 *orig_p = p; + const struct wim_inode *inode; + struct wim_dentry_on_disk *disk_dentry; + const u8 *orig_p; const u8 *hash; - const struct inode *inode = dentry->d_inode; + bool use_dummy_stream; + u16 num_ads; + + wimlib_assert(((uintptr_t)p & 7) == 0); /* 8 byte aligned */ + orig_p = p; + + inode = dentry->d_inode; + use_dummy_stream = inode_needs_dummy_stream(inode); + disk_dentry = (struct wim_dentry_on_disk*)p; + + disk_dentry->attributes = cpu_to_le32(inode->i_attributes); + disk_dentry->security_id = cpu_to_le32(inode->i_security_id); + disk_dentry->subdir_offset = cpu_to_le64(dentry->subdir_offset); + disk_dentry->unused_1 = cpu_to_le64(dentry->d_unused_1); + disk_dentry->unused_2 = cpu_to_le64(dentry->d_unused_2); + disk_dentry->creation_time = cpu_to_le64(inode->i_creation_time); + disk_dentry->last_access_time = cpu_to_le64(inode->i_last_access_time); + disk_dentry->last_write_time = cpu_to_le64(inode->i_last_write_time); + if (use_dummy_stream) + hash = zero_hash; + else + hash = inode_stream_hash(inode, 0); + copy_hash(disk_dentry->unnamed_stream_hash, hash); + if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { + disk_dentry->reparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1); + disk_dentry->reparse.reparse_tag = cpu_to_le32(inode->i_reparse_tag); + disk_dentry->reparse.rp_unknown_2 = cpu_to_le16(inode->i_rp_unknown_2); + disk_dentry->reparse.not_rpfixed = cpu_to_le16(inode->i_not_rpfixed); + } else { + disk_dentry->nonreparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1); + disk_dentry->nonreparse.hard_link_group_id = + cpu_to_le64((inode->i_nlink == 1) ? 0 : inode->i_ino); + } + num_ads = inode->i_num_ads; + if (use_dummy_stream) + num_ads++; + disk_dentry->num_alternate_data_streams = cpu_to_le16(num_ads); + disk_dentry->short_name_nbytes = cpu_to_le16(dentry->short_name_nbytes); + disk_dentry->file_name_nbytes = cpu_to_le16(dentry->file_name_nbytes); + p += sizeof(struct wim_dentry_on_disk); + + wimlib_assert(dentry_is_root(dentry) != dentry_has_long_name(dentry)); + + if (dentry_has_long_name(dentry)) + p = mempcpy(p, dentry->file_name, dentry->file_name_nbytes + 2); + + if (dentry_has_short_name(dentry)) + p = mempcpy(p, dentry->short_name, dentry->short_name_nbytes + 2); + + /* Align to 8-byte boundary */ + while ((uintptr_t)p & 7) + *p++ = 0; /* We calculate the correct length of the dentry ourselves because the * dentry->length field may been set to an unexpected value from when we * read the dentry in (for example, there may have been unknown data - * appended to the end of the dentry...) */ - u64 length = dentry_correct_length(dentry); - - p = put_u64(p, length); - p = put_u32(p, inode->attributes); - p = put_u32(p, inode->security_id); - p = put_u64(p, dentry->subdir_offset); - p = put_u64(p, 0); /* unused1 */ - p = put_u64(p, 0); /* unused2 */ - p = put_u64(p, inode->creation_time); - p = put_u64(p, inode->last_access_time); - p = put_u64(p, inode->last_write_time); - hash = inode_stream_hash(inode, 0); - p = put_bytes(p, SHA1_HASH_SIZE, hash); - if (inode->attributes & FILE_ATTRIBUTE_REPARSE_POINT) { - p = put_zeroes(p, 4); - p = put_u32(p, inode->reparse_tag); - p = put_zeroes(p, 4); - } else { - u64 link_group_id; - p = put_u32(p, 0); - if (inode->link_count == 1) - link_group_id = 0; - else - link_group_id = inode->ino; - p = put_u64(p, link_group_id); - } - p = put_u16(p, inode->num_ads); - p = put_u16(p, dentry->short_name_len); - p = put_u16(p, dentry->file_name_len); - if (dentry->file_name_len) { - p = put_bytes(p, dentry->file_name_len, (u8*)dentry->file_name); - p = put_u16(p, 0); /* filename padding, 2 bytes. */ - } - if (dentry->short_name) { - p = put_bytes(p, dentry->short_name_len, (u8*)dentry->short_name); - p = put_u16(p, 0); /* short name padding, 2 bytes */ + * appended to the end of the dentry...). Furthermore, the dentry may + * have been renamed, thus changing its needed length. */ + disk_dentry->length = cpu_to_le64(p - orig_p); + + if (use_dummy_stream) { + hash = inode_unnamed_stream_hash(inode); + p = write_ads_entry(&(struct wim_ads_entry){}, hash, p); } - /* Align to 8-byte boundary */ - wimlib_assert(length >= (p - orig_p) && length - (p - orig_p) <= 7); - p = put_zeroes(p, length - (p - orig_p)); - - /* Write the alternate data streams, if there are any. Please see - * read_ads_entries() for comments about the format of the on-disk - * alternate data stream entries. */ - for (u16 i = 0; i < inode->num_ads; i++) { - p = put_u64(p, ads_entry_total_length(&inode->ads_entries[i])); - p = put_u64(p, 0); /* Unused */ + /* Write the alternate data streams entries, if any. */ + for (u16 i = 0; i < inode->i_num_ads; i++) { hash = inode_stream_hash(inode, i + 1); - p = put_bytes(p, SHA1_HASH_SIZE, hash); - p = put_u16(p, inode->ads_entries[i].stream_name_len); - if (inode->ads_entries[i].stream_name_len) { - p = put_bytes(p, inode->ads_entries[i].stream_name_len, - (u8*)inode->ads_entries[i].stream_name); - p = put_u16(p, 0); - } - p = put_zeroes(p, (8 - (p - orig_p) % 8) % 8); + p = write_ads_entry(&inode->i_ads_entries[i], hash, p); } - wimlib_assert(p - orig_p == __dentry_total_length(dentry, length)); + return p; } -static int write_dentry_cb(struct dentry *dentry, void *_p) +static int +write_dentry_cb(struct wim_dentry *dentry, void *_p) { u8 **p = _p; *p = write_dentry(dentry, *p); return 0; } -static u8 *write_dentry_tree_recursive(const struct dentry *parent, u8 *p); +static u8 * +write_dentry_tree_recursive(const struct wim_dentry *parent, u8 *p); -static int write_dentry_tree_recursive_cb(struct dentry *dentry, void *_p) +static int +write_dentry_tree_recursive_cb(struct wim_dentry *dentry, void *_p) { u8 **p = _p; *p = write_dentry_tree_recursive(dentry, *p); @@ -1973,7 +2458,8 @@ static int write_dentry_tree_recursive_cb(struct dentry *dentry, void *_p) /* Recursive function that writes a dentry tree rooted at @parent, not including * @parent itself, which has already been written. */ -static u8 *write_dentry_tree_recursive(const struct dentry *parent, u8 *p) +static u8 * +write_dentry_tree_recursive(const struct wim_dentry *parent, u8 *p) { /* Nothing to do if this dentry has no children. */ if (parent->subdir_offset == 0) @@ -1985,14 +2471,14 @@ static u8 *write_dentry_tree_recursive(const struct dentry *parent, u8 *p) * recursively writing the directory trees rooted at each of the child * dentries, since the on-disk dentries for a dentry's children are * always located at consecutive positions in the metadata resource! */ - for_dentry_in_rbtree(parent->d_inode->children.rb_node, write_dentry_cb, &p); + for_dentry_child(parent, write_dentry_cb, &p); /* write end of directory entry */ - p = put_u64(p, 0); + *(le64*)p = cpu_to_le64(0); + p += 8; /* Recurse on children. */ - for_dentry_in_rbtree(parent->d_inode->children.rb_node, - write_dentry_tree_recursive_cb, &p); + for_dentry_child(parent, write_dentry_tree_recursive_cb, &p); return p; } @@ -2003,7 +2489,8 @@ static u8 *write_dentry_tree_recursive(const struct dentry *parent, u8 *p) * * Returns pointer to the byte after the last byte we wrote. */ -u8 *write_dentry_tree(const struct dentry *root, u8 *p) +u8 * +write_dentry_tree(const struct wim_dentry * restrict root, u8 * restrict p) { DEBUG("Writing dentry tree."); wimlib_assert(dentry_is_root(root)); @@ -2014,9 +2501,429 @@ u8 *write_dentry_tree(const struct dentry *root, u8 *p) /* Write end of directory entry after the root dentry just to be safe; * however the root dentry obviously cannot have any siblings. */ - p = put_u64(p, 0); + *(le64*)p = cpu_to_le64(0); + p += 8; /* Recursively write the rest of the dentry tree. */ return write_dentry_tree_recursive(root, p); } + +static int +init_wimlib_dentry(struct wimlib_dir_entry *wdentry, + struct wim_dentry *dentry, + const WIMStruct *wim, + int flags) +{ + int ret; + size_t dummy; + const struct wim_inode *inode = dentry->d_inode; + struct wim_lookup_table_entry *lte; + const u8 *hash; + +#if TCHAR_IS_UTF16LE + wdentry->filename = dentry->file_name; + wdentry->dos_name = dentry->short_name; +#else + if (dentry_has_long_name(dentry)) { + ret = utf16le_to_tstr(dentry->file_name, + dentry->file_name_nbytes, + (tchar**)&wdentry->filename, + &dummy); + if (ret) + return ret; + } + if (dentry_has_short_name(dentry)) { + ret = utf16le_to_tstr(dentry->short_name, + dentry->short_name_nbytes, + (tchar**)&wdentry->dos_name, + &dummy); + if (ret) + return ret; + } +#endif + ret = calculate_dentry_full_path(dentry); + if (ret) + return ret; + wdentry->full_path = dentry->_full_path; + + for (struct wim_dentry *d = dentry; !dentry_is_root(d); d = d->parent) + wdentry->depth++; + + if (inode->i_security_id >= 0) { + const struct wim_security_data *sd = wim_const_security_data(wim); + wdentry->security_descriptor = sd->descriptors[inode->i_security_id]; + wdentry->security_descriptor_size = sd->sizes[inode->i_security_id]; + } + wdentry->reparse_tag = inode->i_reparse_tag; + wdentry->num_links = inode->i_nlink; + wdentry->attributes = inode->i_attributes; + wdentry->hard_link_group_id = inode->i_ino; + wdentry->creation_time = wim_timestamp_to_timespec(inode->i_creation_time); + wdentry->last_write_time = wim_timestamp_to_timespec(inode->i_last_write_time); + wdentry->last_access_time = wim_timestamp_to_timespec(inode->i_last_access_time); + + lte = inode_unnamed_lte(inode, wim->lookup_table); + if (lte) { + lte_to_wimlib_resource_entry(lte, &wdentry->streams[0].resource); + } else if (!is_zero_hash(hash = inode_unnamed_stream_hash(inode))) { + if (flags & WIMLIB_ITERATE_DIR_TREE_FLAG_RESOURCES_NEEDED) + return resource_not_found_error(inode, hash); + copy_hash(wdentry->streams[0].resource.sha1_hash, hash); + wdentry->streams[0].resource.is_missing = 1; + } + + for (unsigned i = 0; i < inode->i_num_ads; i++) { + if (!ads_entry_is_named_stream(&inode->i_ads_entries[i])) + continue; + lte = inode_stream_lte(inode, i + 1, wim->lookup_table); + wdentry->num_named_streams++; + if (lte) { + lte_to_wimlib_resource_entry(lte, &wdentry->streams[ + wdentry->num_named_streams].resource); + } else if (!is_zero_hash(hash = inode_stream_hash(inode, i + 1))) { + if (flags & WIMLIB_ITERATE_DIR_TREE_FLAG_RESOURCES_NEEDED) + return resource_not_found_error(inode, hash); + copy_hash(wdentry->streams[ + wdentry->num_named_streams].resource.sha1_hash, hash); + wdentry->streams[ + wdentry->num_named_streams].resource.is_missing = 1; + } + #if TCHAR_IS_UTF16LE + wdentry->streams[wdentry->num_named_streams].stream_name = + inode->i_ads_entries[i].stream_name; + #else + size_t dummy; + + ret = utf16le_to_tstr(inode->i_ads_entries[i].stream_name, + inode->i_ads_entries[i].stream_name_nbytes, + (tchar**)&wdentry->streams[ + wdentry->num_named_streams].stream_name, + &dummy); + if (ret) + return ret; + #endif + } + return 0; +} + +static void +free_wimlib_dentry(struct wimlib_dir_entry *wdentry) +{ +#if !TCHAR_IS_UTF16LE + FREE((tchar*)wdentry->filename); + FREE((tchar*)wdentry->dos_name); + for (unsigned i = 1; i <= wdentry->num_named_streams; i++) + FREE((tchar*)wdentry->streams[i].stream_name); +#endif + FREE(wdentry); +} + +struct iterate_dir_tree_ctx { + WIMStruct *wim; + int flags; + wimlib_iterate_dir_tree_callback_t cb; + void *user_ctx; +}; + +static int +do_iterate_dir_tree(WIMStruct *wim, + struct wim_dentry *dentry, int flags, + wimlib_iterate_dir_tree_callback_t cb, + void *user_ctx); + +static int +call_do_iterate_dir_tree(struct wim_dentry *dentry, void *_ctx) +{ + struct iterate_dir_tree_ctx *ctx = _ctx; + return do_iterate_dir_tree(ctx->wim, dentry, ctx->flags, + ctx->cb, ctx->user_ctx); +} + +static int +do_iterate_dir_tree(WIMStruct *wim, + struct wim_dentry *dentry, int flags, + wimlib_iterate_dir_tree_callback_t cb, + void *user_ctx) +{ + struct wimlib_dir_entry *wdentry; + int ret = WIMLIB_ERR_NOMEM; + + + wdentry = CALLOC(1, sizeof(struct wimlib_dir_entry) + + (1 + dentry->d_inode->i_num_ads) * + sizeof(struct wimlib_stream_entry)); + if (wdentry == NULL) + goto out; + + ret = init_wimlib_dentry(wdentry, dentry, wim, flags); + if (ret) + goto out_free_wimlib_dentry; + + if (!(flags & WIMLIB_ITERATE_DIR_TREE_FLAG_CHILDREN)) { + ret = (*cb)(wdentry, user_ctx); + if (ret) + goto out_free_wimlib_dentry; + } + + if (flags & (WIMLIB_ITERATE_DIR_TREE_FLAG_RECURSIVE | + WIMLIB_ITERATE_DIR_TREE_FLAG_CHILDREN)) + { + struct iterate_dir_tree_ctx ctx = { + .wim = wim, + .flags = flags &= ~WIMLIB_ITERATE_DIR_TREE_FLAG_CHILDREN, + .cb = cb, + .user_ctx = user_ctx, + }; + ret = for_dentry_child(dentry, call_do_iterate_dir_tree, &ctx); + } +out_free_wimlib_dentry: + free_wimlib_dentry(wdentry); +out: + return ret; +} + +struct image_iterate_dir_tree_ctx { + const tchar *path; + int flags; + wimlib_iterate_dir_tree_callback_t cb; + void *user_ctx; +}; + + +static int +image_do_iterate_dir_tree(WIMStruct *wim) +{ + struct image_iterate_dir_tree_ctx *ctx = wim->private; + struct wim_dentry *dentry; + + dentry = get_dentry(wim, ctx->path, WIMLIB_CASE_PLATFORM_DEFAULT); + if (dentry == NULL) + return WIMLIB_ERR_PATH_DOES_NOT_EXIST; + return do_iterate_dir_tree(wim, dentry, ctx->flags, ctx->cb, ctx->user_ctx); +} + +/* API function documented in wimlib.h */ +WIMLIBAPI int +wimlib_iterate_dir_tree(WIMStruct *wim, int image, const tchar *path, + int flags, + wimlib_iterate_dir_tree_callback_t cb, void *user_ctx) +{ + struct image_iterate_dir_tree_ctx ctx = { + .path = path, + .flags = flags, + .cb = cb, + .user_ctx = user_ctx, + }; + wim->private = &ctx; + return for_image(wim, image, image_do_iterate_dir_tree); +} + +/* Returns %true iff the metadata of @inode and @template_inode are reasonably + * consistent with them being the same, unmodified file. */ +static bool +inode_metadata_consistent(const struct wim_inode *inode, + const struct wim_inode *template_inode, + const struct wim_lookup_table *template_lookup_table) +{ + /* Must have exact same creation time and last write time. */ + if (inode->i_creation_time != template_inode->i_creation_time || + inode->i_last_write_time != template_inode->i_last_write_time) + return false; + + /* Last access time may have stayed the same or increased, but certainly + * shouldn't have decreased. */ + if (inode->i_last_access_time < template_inode->i_last_access_time) + return false; + + /* Must have same number of alternate data stream entries. */ + if (inode->i_num_ads != template_inode->i_num_ads) + return false; + + /* If the stream entries for the inode are for some reason not resolved, + * then the hashes are already available and the point of this function + * is defeated. */ + if (!inode->i_resolved) + return false; + + /* Iterate through each stream and do some more checks. */ + for (unsigned i = 0; i <= inode->i_num_ads; i++) { + const struct wim_lookup_table_entry *lte, *template_lte; + + lte = inode_stream_lte_resolved(inode, i); + template_lte = inode_stream_lte(template_inode, i, + template_lookup_table); + + /* Compare stream sizes. */ + if (lte && template_lte) { + if (lte->size != template_lte->size) + return false; + + /* If hash happens to be available, compare with template. */ + if (!lte->unhashed && !template_lte->unhashed && + !hashes_equal(lte->hash, template_lte->hash)) + return false; + + } else if (lte && lte->size) { + return false; + } else if (template_lte && template_lte->size) { + return false; + } + } + + /* All right, barring a full checksum and given that the inodes share a + * path and the user isn't trying to trick us, these inodes most likely + * refer to the same file. */ + return true; +} + +/** + * Given an inode @inode that has been determined to be "the same" as another + * inode @template_inode in either the same WIM or another WIM, retrieve some + * useful stream information (e.g. checksums) from @template_inode. + * + * This assumes that the streams for @inode have been resolved (to point + * directly to the appropriate `struct wim_lookup_table_entry's) but do not + * necessarily have checksum information filled in. + */ +static int +inode_copy_checksums(struct wim_inode *inode, + struct wim_inode *template_inode, + WIMStruct *wim, + WIMStruct *template_wim) +{ + for (unsigned i = 0; i <= inode->i_num_ads; i++) { + struct wim_lookup_table_entry *lte, *template_lte; + struct wim_lookup_table_entry *replace_lte; + + lte = inode_stream_lte_resolved(inode, i); + template_lte = inode_stream_lte(template_inode, i, + template_wim->lookup_table); + + /* Only take action if both entries exist, the entry for @inode + * has no checksum calculated, but the entry for @template_inode + * does. */ + if (lte == NULL || template_lte == NULL || + !lte->unhashed || template_lte->unhashed) + continue; + + wimlib_assert(lte->refcnt == inode->i_nlink); + + /* If the WIM of the template image is the same as the WIM of + * the new image, then @template_lte can be used directly. + * + * Otherwise, look for a stream with the same hash in the WIM of + * the new image. If found, use it; otherwise re-use the entry + * being discarded, filling in the hash. */ + + if (wim == template_wim) + replace_lte = template_lte; + else + replace_lte = lookup_resource(wim->lookup_table, + template_lte->hash); + + list_del(<e->unhashed_list); + if (replace_lte) { + free_lookup_table_entry(lte); + } else { + copy_hash(lte->hash, template_lte->hash); + lte->unhashed = 0; + lookup_table_insert(wim->lookup_table, lte); + lte->refcnt = 0; + replace_lte = lte; + } + + if (i == 0) + inode->i_lte = replace_lte; + else + inode->i_ads_entries[i - 1].lte = replace_lte; + + replace_lte->refcnt += inode->i_nlink; + } + return 0; +} + +struct reference_template_args { + WIMStruct *wim; + WIMStruct *template_wim; +}; + +static int +dentry_reference_template(struct wim_dentry *dentry, void *_args) +{ + int ret; + struct wim_dentry *template_dentry; + struct wim_inode *inode, *template_inode; + struct reference_template_args *args = _args; + WIMStruct *wim = args->wim; + WIMStruct *template_wim = args->template_wim; + + if (dentry->d_inode->i_visited) + return 0; + + ret = calculate_dentry_full_path(dentry); + if (ret) + return ret; + + template_dentry = get_dentry(template_wim, dentry->_full_path, + WIMLIB_CASE_SENSITIVE); + if (template_dentry == NULL) { + DEBUG("\"%"TS"\": newly added file", dentry->_full_path); + return 0; + } + + inode = dentry->d_inode; + template_inode = template_dentry->d_inode; + + if (inode_metadata_consistent(inode, template_inode, + template_wim->lookup_table)) { + /*DEBUG("\"%"TS"\": No change detected", dentry->_full_path);*/ + ret = inode_copy_checksums(inode, template_inode, + wim, template_wim); + inode->i_visited = 1; + } else { + DEBUG("\"%"TS"\": change detected!", dentry->_full_path); + ret = 0; + } + return ret; +} + +/* API function documented in wimlib.h */ +WIMLIBAPI int +wimlib_reference_template_image(WIMStruct *wim, int new_image, + WIMStruct *template_wim, int template_image, + int flags, wimlib_progress_func_t progress_func) +{ + int ret; + struct wim_image_metadata *new_imd; + + if (wim == NULL || template_wim == NULL) + return WIMLIB_ERR_INVALID_PARAM; + + if (wim == template_wim && new_image == template_image) + return WIMLIB_ERR_INVALID_PARAM; + + if (new_image < 1 || new_image > wim->hdr.image_count) + return WIMLIB_ERR_INVALID_IMAGE; + + if (!wim_has_metadata(wim)) + return WIMLIB_ERR_METADATA_NOT_FOUND; + + new_imd = wim->image_metadata[new_image - 1]; + if (!new_imd->modified) + return WIMLIB_ERR_INVALID_PARAM; + + ret = select_wim_image(template_wim, template_image); + if (ret) + return ret; + + struct reference_template_args args = { + .wim = wim, + .template_wim = template_wim, + }; + + ret = for_dentry_in_tree(new_imd->root_dentry, + dentry_reference_template, &args); + dentry_tree_clear_inode_visited(new_imd->root_dentry); + return ret; +}