X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Fdentry.c;h=f20d7dffdebcc76dc7a9a489c34c94d5b9c4429b;hb=035b2ae42b32ff892dcc794a0f4513fe8d0be76e;hp=a19e02c8f3df54dde88ad1b563d491dd1c5f309f;hpb=f50557a7095444c554a066b3837c2999ecd1be31;p=wimlib diff --git a/src/dentry.c b/src/dentry.c index a19e02c8..f20d7dff 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -3,7 +3,7 @@ */ /* - * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers + * Copyright 2012-2023 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -16,7 +16,7 @@ * details. * * You should have received a copy of the GNU Lesser General Public License - * along with this file; if not, see http://www.gnu.org/licenses/. + * along with this file; if not, see https://www.gnu.org/licenses/. */ /* @@ -46,8 +46,9 @@ * - wimlib does not allow *directory* hard links, so a WIM image really does * have a *tree* of dentries (and not an arbitrary graph of dentries). * - * - wimlib indexes dentries both case-insensitively and case-sensitively, - * allowing either behavior to be used for path lookup. + * - wimlib supports both case-sensitive and case-insensitive path lookups. + * The implementation uses a single in-memory index per directory, using a + * collation order like that used by NTFS; see collate_dentry_names(). * * - Multiple dentries in a directory might have the same case-insensitive * name. But wimlib enforces that at most one dentry in a directory can have @@ -92,10 +93,10 @@ struct wim_dentry_on_disk { le32 attributes; /* A value that specifies the security descriptor for this file or - * directory. If -1, the file or directory has no security descriptor. - * Otherwise, it is a 0-based index into the WIM image's table of - * security descriptors (see: `struct wim_security_data') */ - sle32 security_id; + * directory. If 0xFFFFFFFF, the file or directory has no security + * descriptor. Otherwise, it is a 0-based index into the WIM image's + * table of security descriptors (see: `struct wim_security_data') */ + le32 security_id; /* Offset, in bytes, from the start of the uncompressed metadata * resource of this directory's child directory entries, or 0 if this @@ -116,56 +117,42 @@ struct wim_dentry_on_disk { le64 last_write_time; /* - * Usually this is the SHA-1 message digest of the file's "contents" - * (the unnamed data stream). - * - * If the file has FILE_ATTRIBUTE_REPARSE_POINT set, then this is - * instead usually the SHA-1 message digest of the uncompressed reparse - * point data. - * - * However, there are some special rules that need to be applied to - * interpret this field correctly when extra stream entries are present. - * See the code for details. + * Usually this is the SHA-1 message digest of the file's contents, or + * all zeroes if the file is a directory or is empty. However, special + * rules apply if the file has FILE_ATTRIBUTE_REPARSE_POINT set or has + * named data streams. See assign_stream_types_unencrypted(). */ - u8 default_hash[SHA1_HASH_SIZE]; + u8 main_hash[SHA1_HASH_SIZE]; + + /* Unknown field (maybe accidental padding) */ + le32 unknown_0x54; - /* The format of the following data is not yet completely known and they - * do not correspond to Microsoft's documentation. + /* + * The following 8-byte union contains either information about the + * reparse point (for files with FILE_ATTRIBUTE_REPARSE_POINT set), or + * the "hard link group ID" (for other files). + * + * The reparse point information contains ReparseTag and ReparseReserved + * from the header of the reparse point buffer. It also contains a flag + * that indicates whether a reparse point fixup (for the target of an + * absolute symbolic link or junction) was done or not. * - * If this directory entry is for a reparse point (has - * FILE_ATTRIBUTE_REPARSE_POINT set in the 'attributes' field), then the - * version of the following fields containing the reparse tag is valid. - * Furthermore, the field notated as not_rpfixed, as far as I can tell, - * is supposed to be set to 1 if reparse point fixups (a.k.a. fixing the - * targets of absolute symbolic links) were *not* done, and otherwise 0. + * The "hard link group ID" is like an inode number; all dentries for + * the same inode share the same value. See inode_fixup.c for more + * information. * - * If this directory entry is not for a reparse point, then the version - * of the following fields containing the hard_link_group_id is valid. - * All MS says about this field is that "If this file is part of a hard - * link set, all the directory entries in the set will share the same - * value in this field.". However, more specifically I have observed - * the following: - * - If the file is part of a hard link set of size 1, then the - * hard_link_group_id should be set to either 0, which is treated - * specially as indicating "not hardlinked", or any unique value. - * - The specific nonzero values used to identity hard link sets do - * not matter, as long as they are unique. - * - However, due to bugs in Microsoft's software, it is actually NOT - * guaranteed that directory entries that share the same hard link - * group ID are actually hard linked to each either. See - * inode_fixup.c for the code that handles this. + * Note that this union creates the limitation that reparse point files + * cannot have multiple names (hard links). */ union { struct { - le32 rp_unknown_1; le32 reparse_tag; - le16 rp_unknown_2; - le16 not_rpfixed; - } _packed_attribute reparse; + le16 rp_reserved; + le16 rp_flags; + } __attribute__((packed)) reparse; struct { - le32 rp_unknown_1; le64 hard_link_group_id; - } _packed_attribute nonreparse; + } __attribute__((packed)) nonreparse; }; /* Number of extra stream entries that directly follow this dentry @@ -183,14 +170,14 @@ struct wim_dentry_on_disk { * encoded "long" name, excluding the null terminator. If zero, then * this file has no long name. The root dentry should not have a long * name, but all other dentries in the image should have long names. */ - le16 file_name_nbytes; + le16 name_nbytes; /* Beginning of optional, variable-length fields */ - /* If file_name_nbytes != 0, the next field will be the UTF-16LE encoded - * long file name. This will be null-terminated, so the size of this - * field will really be file_name_nbytes + 2. */ - /*utf16lechar file_name[];*/ + /* If name_nbytes != 0, the next field will be the UTF-16LE encoded long + * name. This will be null-terminated, so the size of this field will + * really be name_nbytes + 2. */ + /*utf16lechar name[];*/ /* If short_name_nbytes != 0, the next field will be the UTF-16LE * encoded short name. This will be null-terminated, so the size of @@ -201,9 +188,9 @@ struct wim_dentry_on_disk { * field) after 8-byte alignment, then the remaining space will be a * variable-length list of tagged metadata items. See tagged_items.c * for more information. */ - /* u8 tagged_items[] _aligned_attribute(8); */ + /* u8 tagged_items[] __attribute__((aligned(8))); */ -} _packed_attribute; +} __attribute__((packed)); /* If num_extra_streams != 0, then there are that many extra stream * entries following the dentry, starting on the next 8-byte aligned * boundary. They are not counted in the 'length' field of the dentry. @@ -233,20 +220,20 @@ struct wim_extra_stream_entry_on_disk { * the null terminator. There is a null terminator character if * @name_nbytes != 0; i.e., if this stream is named. */ utf16lechar name[]; -} _packed_attribute; +} __attribute__((packed)); static void -do_dentry_set_name(struct wim_dentry *dentry, utf16lechar *file_name, - size_t file_name_nbytes) +do_dentry_set_name(struct wim_dentry *dentry, utf16lechar *name, + size_t name_nbytes) { - FREE(dentry->file_name); - dentry->file_name = file_name; - dentry->file_name_nbytes = file_name_nbytes; + FREE(dentry->d_name); + dentry->d_name = name; + dentry->d_name_nbytes = name_nbytes; if (dentry_has_short_name(dentry)) { - FREE(dentry->short_name); - dentry->short_name = NULL; - dentry->short_name_nbytes = 0; + FREE(dentry->d_short_name); + dentry->d_short_name = NULL; + dentry->d_short_name_nbytes = 0; } } @@ -323,11 +310,11 @@ dentry_set_name(struct wim_dentry *dentry, const tchar *name) * tagged metadata items as well as any extra stream entries that may need to * follow the dentry. */ static size_t -dentry_min_len_with_names(u16 file_name_nbytes, u16 short_name_nbytes) +dentry_min_len_with_names(u16 name_nbytes, u16 short_name_nbytes) { size_t length = sizeof(struct wim_dentry_on_disk); - if (file_name_nbytes) - length += (u32)file_name_nbytes + 2; + if (name_nbytes) + length += (u32)name_nbytes + 2; if (short_name_nbytes) length += (u32)short_name_nbytes + 2; return length; @@ -361,43 +348,41 @@ dentry_out_total_length(const struct wim_dentry *dentry) { const struct wim_inode *inode = dentry->d_inode; size_t len; + unsigned num_unnamed_streams = 0; + bool have_named_data_stream = false; - len = dentry_min_len_with_names(dentry->file_name_nbytes, - dentry->short_name_nbytes); + len = dentry_min_len_with_names(dentry->d_name_nbytes, + dentry->d_short_name_nbytes); len = ALIGN(len, 8); - len += ALIGN(inode->i_extra_size, 8); - - if (!(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) { - /* - * Extra stream entries: - * - * - Use one extra stream entry for each named data stream - * - Use one extra stream entry for the unnamed data stream when there is either: - * - a reparse point stream - * - at least one named data stream (for Windows PE bug workaround) - * - Use one extra stream entry for the reparse point stream if there is one - */ - bool have_named_data_stream = false; - bool have_reparse_point_stream = false; + if (inode->i_extra) + len += ALIGN(inode->i_extra->size, 8); + + /* + * Calculate the total length of the extra stream entries that will be + * written. To match DISM, some odd rules need to be followed here. + * See write_dentry_streams() for explanation. Keep this in sync with + * write_dentry_streams()! + */ + if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) { + num_unnamed_streams++; + } else { for (unsigned i = 0; i < inode->i_num_streams; i++) { const struct wim_inode_stream *strm = &inode->i_streams[i]; + if (stream_is_named_data_stream(strm)) { len += stream_out_total_length(strm); have_named_data_stream = true; - } else if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) { - wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT); - have_reparse_point_stream = true; } } - - if (have_named_data_stream || have_reparse_point_stream) { - if (have_reparse_point_stream) - len += ALIGN(sizeof(struct wim_extra_stream_entry_on_disk), 8); - len += ALIGN(sizeof(struct wim_extra_stream_entry_on_disk), 8); - } + if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) + num_unnamed_streams++; + if (!(inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY)) + num_unnamed_streams++; } - + if (num_unnamed_streams > 1 || have_named_data_stream) + len += num_unnamed_streams * + ALIGN(sizeof(struct wim_extra_stream_entry_on_disk), 8); return len; } @@ -443,9 +428,8 @@ do_for_dentry_in_tree_depth(struct wim_dentry *dentry, * @arg will be passed as the second argument to each invocation of @visitor. * * This function does a pre-order traversal --- that is, a parent will be - * visited before its children. It also will visit siblings in order of - * case-sensitive filename. Equivalently, this function visits the entire tree - * in the case-sensitive lexicographic order of the full paths. + * visited before its children. Furthermore, siblings will be visited in their + * collation order. * * It is safe to pass NULL for @root, which means that the dentry tree is empty. * In this case, this function does nothing. @@ -480,10 +464,10 @@ for_dentry_in_tree_depth(struct wim_dentry *root, /* * Calculate the full path to @dentry within the WIM image, if not already done. * - * The full name will be saved in the cached value 'dentry->_full_path'. + * The full name will be saved in the cached value 'dentry->d_full_path'. * * Whenever possible, use dentry_full_path() instead of calling this and - * accessing _full_path directly. + * accessing d_full_path directly. * * Returns 0 or an error code resulting from a failed string conversion. */ @@ -491,16 +475,15 @@ int calculate_dentry_full_path(struct wim_dentry *dentry) { size_t ulen; - size_t dummy; const struct wim_dentry *d; - if (dentry->_full_path) + if (dentry->d_full_path) return 0; ulen = 0; d = dentry; do { - ulen += d->file_name_nbytes / sizeof(utf16lechar); + ulen += d->d_name_nbytes / sizeof(utf16lechar); ulen++; d = d->d_parent; /* assumes d == d->d_parent for root */ } while (!dentry_is_root(d)); @@ -510,8 +493,9 @@ calculate_dentry_full_path(struct wim_dentry *dentry) d = dentry; do { - p -= d->file_name_nbytes / sizeof(utf16lechar); - memcpy(p, d->file_name, d->file_name_nbytes); + p -= d->d_name_nbytes / sizeof(utf16lechar); + if (d->d_name_nbytes) + memcpy(p, d->d_name, d->d_name_nbytes); *--p = cpu_to_le16(WIM_PATH_SEPARATOR); d = d->d_parent; /* assumes d == d->d_parent for root */ } while (!dentry_is_root(d)); @@ -519,7 +503,7 @@ calculate_dentry_full_path(struct wim_dentry *dentry) wimlib_assert(p == ubuf); return utf16le_to_tstr(ubuf, ulen * sizeof(utf16lechar), - &dentry->_full_path, &dummy); + &dentry->d_full_path, NULL); } /* @@ -533,7 +517,7 @@ tchar * dentry_full_path(struct wim_dentry *dentry) { calculate_dentry_full_path(dentry); - return dentry->_full_path; + return dentry->d_full_path; } static int @@ -544,7 +528,7 @@ dentry_calculate_subdir_offset(struct wim_dentry *dentry, void *_subdir_offset_p struct wim_dentry *child; /* Set offset of directory's child dentries */ - dentry->subdir_offset = *subdir_offset_p; + dentry->d_subdir_offset = *subdir_offset_p; /* Account for child dentries */ for_dentry_child(child, dentry) @@ -553,8 +537,8 @@ dentry_calculate_subdir_offset(struct wim_dentry *dentry, void *_subdir_offset_p /* Account for end-of-directory entry */ *subdir_offset_p += 8; } else { - /* Not a directory; set subdir_offset to 0 */ - dentry->subdir_offset = 0; + /* Not a directory; set the subdir offset to 0 */ + dentry->d_subdir_offset = 0; } return 0; } @@ -579,50 +563,34 @@ calculate_subdir_offsets(struct wim_dentry *root, u64 *subdir_offset_p) for_dentry_in_tree(root, dentry_calculate_subdir_offset, subdir_offset_p); } -/* Compare the UTF-16LE long filenames of two dentries case insensitively. */ -static int -dentry_compare_names_case_insensitive(const struct wim_dentry *d1, - const struct wim_dentry *d2) -{ - return cmp_utf16le_strings(d1->file_name, - d1->file_name_nbytes / 2, - d2->file_name, - d2->file_name_nbytes / 2, - true); -} - -/* Compare the UTF-16LE long filenames of two dentries case sensitively. */ static int -dentry_compare_names_case_sensitive(const struct wim_dentry *d1, - const struct wim_dentry *d2) +dentry_compare_names(const struct wim_dentry *d1, const struct wim_dentry *d2, + bool ignore_case) { - return cmp_utf16le_strings(d1->file_name, - d1->file_name_nbytes / 2, - d2->file_name, - d2->file_name_nbytes / 2, - false); -} - -static int -_avl_dentry_compare_names_ci(const struct avl_tree_node *n1, - const struct avl_tree_node *n2) -{ - const struct wim_dentry *d1, *d2; - - d1 = avl_tree_entry(n1, struct wim_dentry, d_index_node_ci); - d2 = avl_tree_entry(n2, struct wim_dentry, d_index_node_ci); - return dentry_compare_names_case_insensitive(d1, d2); + return cmp_utf16le_strings(d1->d_name, d1->d_name_nbytes / 2, + d2->d_name, d2->d_name_nbytes / 2, + ignore_case); } +/* + * Collate (compare) the long filenames of two dentries. This first compares + * the names ignoring case, then falls back to a case-sensitive comparison if + * the names are the same ignoring case. + */ static int -_avl_dentry_compare_names(const struct avl_tree_node *n1, - const struct avl_tree_node *n2) +collate_dentry_names(const struct avl_tree_node *n1, + const struct avl_tree_node *n2) { const struct wim_dentry *d1, *d2; + int res; d1 = avl_tree_entry(n1, struct wim_dentry, d_index_node); d2 = avl_tree_entry(n2, struct wim_dentry, d_index_node); - return dentry_compare_names_case_sensitive(d1, d2); + + res = dentry_compare_names(d1, d2, true); + if (res) + return res; + return dentry_compare_names(d1, d2, false); } /* Default case sensitivity behavior for searches with @@ -630,106 +598,92 @@ _avl_dentry_compare_names(const struct avl_tree_node *n1, * WIMLIB_INIT_FLAG_DEFAULT_CASE_SENSITIVE or * WIMLIB_INIT_FLAG_DEFAULT_CASE_INSENSITIVE to wimlib_global_init(). */ bool default_ignore_case = -#ifdef __WIN32__ +#ifdef _WIN32 true #else false #endif ; -/* Case-sensitive dentry lookup. Only @file_name and @file_name_nbytes of - * @dummy must be valid. */ -static struct wim_dentry * -dir_lookup(const struct wim_inode *dir, const struct wim_dentry *dummy) +/* + * Find the dentry within the given directory that has the given UTF-16LE + * filename. Return it if found, otherwise return NULL. This has configurable + * case sensitivity, and @name need not be null-terminated. + */ +struct wim_dentry * +get_dentry_child_with_utf16le_name(const struct wim_dentry *dir, + const utf16lechar *name, + size_t name_nbytes, + CASE_SENSITIVITY_TYPE case_type) { - struct avl_tree_node *node; + struct wim_dentry wanted; + struct avl_tree_node *cur = dir->d_inode->i_children; + struct wim_dentry *ci_match = NULL; - node = avl_tree_lookup_node(dir->i_children, - &dummy->d_index_node, - _avl_dentry_compare_names); - if (!node) - return NULL; - return avl_tree_entry(node, struct wim_dentry, d_index_node); -} + wanted.d_name = (utf16lechar *)name; + wanted.d_name_nbytes = name_nbytes; -/* Case-insensitive dentry lookup. Only @file_name and @file_name_nbytes of - * @dummy must be valid. */ -static struct wim_dentry * -dir_lookup_ci(const struct wim_inode *dir, const struct wim_dentry *dummy) -{ - struct avl_tree_node *node; + if (unlikely(wanted.d_name_nbytes != name_nbytes)) + return NULL; /* overflow */ - node = avl_tree_lookup_node(dir->i_children_ci, - &dummy->d_index_node_ci, - _avl_dentry_compare_names_ci); - if (!node) - return NULL; - return avl_tree_entry(node, struct wim_dentry, d_index_node_ci); -} + /* Note: we can't use avl_tree_lookup_node() here because we need to + * save case-insensitive matches. */ + while (cur) { + struct wim_dentry *child; + int res; -/* Given a UTF-16LE filename and a directory, look up the dentry for the file. - * Return it if found, otherwise NULL. This has configurable case sensitivity, - * and @name need not be null-terminated. */ -struct wim_dentry * -get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry, - const utf16lechar *name, - size_t name_nbytes, - CASE_SENSITIVITY_TYPE case_ctype) -{ - const struct wim_inode *dir = dentry->d_inode; - bool ignore_case = will_ignore_case(case_ctype); - struct wim_dentry dummy; - struct wim_dentry *child; + child = avl_tree_entry(cur, struct wim_dentry, d_index_node); - dummy.file_name = (utf16lechar*)name; - dummy.file_name_nbytes = name_nbytes; + res = dentry_compare_names(&wanted, child, true); + if (!res) { + /* case-insensitive match found */ + ci_match = child; - if (!ignore_case) - /* Case-sensitive lookup. */ - return dir_lookup(dir, &dummy); + res = dentry_compare_names(&wanted, child, false); + if (!res) + return child; /* case-sensitive match found */ + } - /* Case-insensitive lookup. */ + if (res < 0) + cur = cur->left; + else + cur = cur->right; + } + + /* No case-sensitive match; use a case-insensitive match if possible. */ - child = dir_lookup_ci(dir, &dummy); - if (!child) + if (!will_ignore_case(case_type)) return NULL; - if (likely(list_empty(&child->d_ci_conflict_list))) - /* Only one dentry has this case-insensitive name; return it */ - return child; + if (ci_match) { + size_t num_other_ci_matches = 0; + struct wim_dentry *other_ci_match, *d; - /* Multiple dentries have the same case-insensitive name. Choose the - * dentry with the same case-sensitive name, if one exists; otherwise - * print a warning and choose one of the possible dentries arbitrarily. - */ - struct wim_dentry *alt = child; - size_t num_alts = 0; + dentry_for_each_ci_match(d, ci_match) { + num_other_ci_matches++; + other_ci_match = d; + } - do { - num_alts++; - if (!dentry_compare_names_case_sensitive(&dummy, alt)) - return alt; - alt = list_entry(alt->d_ci_conflict_list.next, - struct wim_dentry, d_ci_conflict_list); - } while (alt != child); - - WARNING("Result of case-insensitive lookup is ambiguous\n" - " (returning \"%"TS"\" of %zu " - "possible files, including \"%"TS"\")", - dentry_full_path(child), - num_alts, - dentry_full_path(list_entry(child->d_ci_conflict_list.next, - struct wim_dentry, - d_ci_conflict_list))); - return child; + if (num_other_ci_matches != 0) { + WARNING("Result of case-insensitive lookup is ambiguous\n" + " (returning \"%"TS"\" of %zu " + "possible files, including \"%"TS"\")", + dentry_full_path(ci_match), num_other_ci_matches, + dentry_full_path(other_ci_match)); + } + } + + return ci_match; } -/* Given a 'tchar' filename and a directory, look up the dentry for the file. - * If the filename was successfully converted to UTF-16LE and the dentry was - * found, return it; otherwise return NULL. This has configurable case - * sensitivity. */ +/* + * Find the dentry within the given directory that has the given 'tstr' + * filename. If the filename was successfully converted to UTF-16LE and the + * dentry was found, return it; otherwise return NULL. This has configurable + * case sensitivity. + */ struct wim_dentry * -get_dentry_child_with_name(const struct wim_dentry *dentry, const tchar *name, +get_dentry_child_with_name(const struct wim_dentry *dir, const tchar *name, CASE_SENSITIVITY_TYPE case_type) { int ret; @@ -742,7 +696,7 @@ get_dentry_child_with_name(const struct wim_dentry *dentry, const tchar *name, if (ret) return NULL; - child = get_dentry_child_with_utf16le_name(dentry, + child = get_dentry_child_with_utf16le_name(dir, name_utf16le, name_utf16le_nbytes, case_type); @@ -990,19 +944,6 @@ new_filler_directory(struct wim_dentry **dentry_ret) return 0; } -static int -dentry_clear_inode_visited(struct wim_dentry *dentry, void *_ignore) -{ - dentry->d_inode->i_visited = 0; - return 0; -} - -void -dentry_tree_clear_inode_visited(struct wim_dentry *root) -{ - for_dentry_in_tree(root, dentry_clear_inode_visited, NULL); -} - /* * Free a WIM dentry. * @@ -1014,9 +955,9 @@ free_dentry(struct wim_dentry *dentry) { if (dentry) { d_disassociate(dentry); - FREE(dentry->file_name); - FREE(dentry->short_name); - FREE(dentry->_full_path); + FREE(dentry->d_name); + FREE(dentry->d_short_name); + FREE(dentry->d_full_path); FREE(dentry); } } @@ -1067,134 +1008,99 @@ free_dentry_tree(struct wim_dentry *root, struct blob_table *blob_table) for_dentry_in_tree_depth(root, f, blob_table); } -/* Insert the @child dentry into the case sensitive index of the @dir directory. - * Return NULL if successfully inserted, otherwise a pointer to the - * already-inserted duplicate. */ -static struct wim_dentry * -dir_index_child(struct wim_inode *dir, struct wim_dentry *child) +/* + * Return the first dentry in the list of dentries which have the same + * case-insensitive name as the one given. + */ +struct wim_dentry * +dentry_get_first_ci_match(struct wim_dentry *dentry) { - struct avl_tree_node *duplicate; + struct wim_dentry *ci_match = dentry; - duplicate = avl_tree_insert(&dir->i_children, - &child->d_index_node, - _avl_dentry_compare_names); - if (!duplicate) - return NULL; - return avl_tree_entry(duplicate, struct wim_dentry, d_index_node); -} + for (;;) { + struct avl_tree_node *node; + struct wim_dentry *prev; + + node = avl_tree_prev_in_order(&ci_match->d_index_node); + if (!node) + break; + prev = avl_tree_entry(node, struct wim_dentry, d_index_node); + if (dentry_compare_names(prev, dentry, true)) + break; + ci_match = prev; + } -/* Insert the @child dentry into the case insensitive index of the @dir - * directory. Return NULL if successfully inserted, otherwise a pointer to the - * already-inserted duplicate. */ -static struct wim_dentry * -dir_index_child_ci(struct wim_inode *dir, struct wim_dentry *child) -{ - struct avl_tree_node *duplicate; + if (ci_match == dentry) + return dentry_get_next_ci_match(dentry, dentry); - duplicate = avl_tree_insert(&dir->i_children_ci, - &child->d_index_node_ci, - _avl_dentry_compare_names_ci); - if (!duplicate) - return NULL; - return avl_tree_entry(duplicate, struct wim_dentry, d_index_node_ci); + return ci_match; } -/* Remove the specified dentry from its directory's case-sensitive index. */ -static void -dir_unindex_child(struct wim_inode *dir, struct wim_dentry *child) +/* + * Return the next dentry in the list of dentries which have the same + * case-insensitive name as the one given. + */ +struct wim_dentry * +dentry_get_next_ci_match(struct wim_dentry *dentry, struct wim_dentry *ci_match) { - avl_tree_remove(&dir->i_children, &child->d_index_node); -} + do { + struct avl_tree_node *node; -/* Remove the specified dentry from its directory's case-insensitive index. */ -static void -dir_unindex_child_ci(struct wim_inode *dir, struct wim_dentry *child) -{ - avl_tree_remove(&dir->i_children_ci, &child->d_index_node_ci); -} + node = avl_tree_next_in_order(&ci_match->d_index_node); + if (!node) + return NULL; + ci_match = avl_tree_entry(node, struct wim_dentry, d_index_node); + } while (ci_match == dentry); -/* Return true iff the specified dentry is in its parent directory's - * case-insensitive index. */ -static bool -dentry_in_ci_index(const struct wim_dentry *dentry) -{ - return !avl_tree_node_is_unlinked(&dentry->d_index_node_ci); + if (dentry_compare_names(ci_match, dentry, true)) + return NULL; + + return ci_match; } /* - * Link a dentry into the tree. + * Link a dentry into a directory. * * @parent: - * The dentry that will be the parent of @child. It must name a directory. + * The directory into which to link the dentry. * * @child: - * The dentry to link. It must be currently unlinked. + * The dentry to link into the directory. It must be currently unlinked. * - * Returns NULL if successful. If @parent already contains a dentry with the - * same case-sensitive name as @child, returns a pointer to this duplicate - * dentry. + * Returns NULL if successful; or, if @parent already contains a dentry with the + * same case-sensitive name as @child, then a pointer to this duplicate dentry + * is returned. */ struct wim_dentry * dentry_add_child(struct wim_dentry *parent, struct wim_dentry *child) { - struct wim_dentry *duplicate; - struct wim_inode *dir; + struct wim_inode *dir = parent->d_inode; + struct avl_tree_node *duplicate; wimlib_assert(parent != child); - - dir = parent->d_inode; - wimlib_assert(inode_is_directory(dir)); - duplicate = dir_index_child(dir, child); + duplicate = avl_tree_insert(&dir->i_children, &child->d_index_node, + collate_dentry_names); if (duplicate) - return duplicate; + return avl_tree_entry(duplicate, struct wim_dentry, d_index_node); - duplicate = dir_index_child_ci(dir, child); - if (duplicate) { - list_add(&child->d_ci_conflict_list, &duplicate->d_ci_conflict_list); - avl_tree_node_set_unlinked(&child->d_index_node_ci); - } else { - INIT_LIST_HEAD(&child->d_ci_conflict_list); - } child->d_parent = parent; return NULL; } -/* Unlink a dentry from the tree. */ +/* Unlink a dentry from its parent directory. */ void unlink_dentry(struct wim_dentry *dentry) { - struct wim_inode *dir; - /* Do nothing if the dentry is root or it's already unlinked. Not * actually necessary based on the current callers, but we do the check * here to be safe. */ if (unlikely(dentry->d_parent == dentry)) return; - dir = dentry->d_parent->d_inode; - - dir_unindex_child(dir, dentry); - - if (dentry_in_ci_index(dentry)) { - - dir_unindex_child_ci(dir, dentry); - - if (!list_empty(&dentry->d_ci_conflict_list)) { - /* Make a different case-insensitively-the-same dentry - * be the "representative" in the search index. */ - struct list_head *next; - struct wim_dentry *other; - struct wim_dentry *existing; - - next = dentry->d_ci_conflict_list.next; - other = list_entry(next, struct wim_dentry, d_ci_conflict_list); - existing = dir_index_child_ci(dir, other); - wimlib_assert(existing == NULL); - } - } - list_del(&dentry->d_ci_conflict_list); + avl_tree_remove(&dentry->d_parent->d_inode->i_children, + &dentry->d_index_node); /* Not actually necessary, but to be safe don't retain the now-obsolete * parent pointer. */ @@ -1208,10 +1114,12 @@ read_extra_data(const u8 *p, const u8 *end, struct wim_inode *inode) p++; if (unlikely(p < end)) { - inode->i_extra = memdup(p, end - p); + inode->i_extra = MALLOC(sizeof(struct wim_inode_extra) + + end - p); if (!inode->i_extra) return WIMLIB_ERR_NOMEM; - inode->i_extra_size = end - p; + inode->i_extra->size = end - p; + memcpy(inode->i_extra->data, p, end - p); } return 0; } @@ -1245,43 +1153,66 @@ assign_stream_types_encrypted(struct wim_inode *inode) /* * Set the type of each stream for an unencrypted file. * - * There will be an unnamed data stream, a reparse point stream, or both an - * unnamed data stream and a reparse point stream. In addition, there may be - * named data streams. + * To specify the streams of each file, the WIM provides a main_hash and an + * optional list of "extra stream entries". Each extra stream entry is a + * (name, hash) pair where the name is optional. Hashes can be the special + * value of zero_hash, which means the stream is empty (zero-length). + * + * While extra stream entries with names always refer to "named data streams", + * the main hash and any extra unnamed hashes can be hard to interpret. This is + * because the WIM file format unfortunately doesn't make it very clear which is + * the unnamed data stream (i.e. standard file contents) and which is the + * reparse stream. The way this ambiguity is resolved (based on what MS + * software seems to do) is by (1) a file can have at most one unnamed data + * stream and at most one reparse stream, (2) a reparse stream is present if and + * only if the file has FILE_ATTRIBUTE_REPARSE_POINT, and (3) the reparse + * stream, if present, is stored before the unnamed data stream if present + * (considering main_hash to come before any extra hashes). Note: directories + * need not have an unnamed data stream stored, even with a zero hash, as + * "unnamed data stream" isn't meaningful for a directory in the first place. + * + * With those rules in mind, one would expect that the first unnamed stream + * would use main_hash, and the second (if present) would use an extra stream + * entry. However, there is another quirk that we must be compatible with: + * sometimes main_hash isn't used and only extra stream entries are used. To + * handle this, we ignore main_hash if it is zero and there is at least one + * unnamed extra stream entry. This works correctly as long as a zero main_hash + * and an unnamed extra stream entry is never used to represent an empty reparse + * stream and an unnamed data stream. (It's not, as the reparse stream always + * goes in the extra stream entries in this case. See write_dentry_streams().) */ static void assign_stream_types_unencrypted(struct wim_inode *inode) { - bool found_reparse_point_stream = false; + bool found_reparse_stream = false; bool found_unnamed_data_stream = false; - struct wim_inode_stream *unnamed_stream_with_zero_hash = NULL; for (unsigned i = 0; i < inode->i_num_streams; i++) { struct wim_inode_stream *strm = &inode->i_streams[i]; if (stream_is_named(strm)) { - /* Named data stream */ + /* Named extra stream entry */ strm->stream_type = STREAM_TYPE_DATA; - } else if (!is_zero_hash(strm->_stream_hash)) { + } else if (i != 0 || !is_zero_hash(strm->_stream_hash)) { + /* Unnamed extra stream entry or a nonzero main_hash */ if ((inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) && - !found_reparse_point_stream) { - found_reparse_point_stream = true; + !found_reparse_stream) { + found_reparse_stream = true; strm->stream_type = STREAM_TYPE_REPARSE_POINT; } else if (!found_unnamed_data_stream) { found_unnamed_data_stream = true; strm->stream_type = STREAM_TYPE_DATA; - } - } else { - /* If no stream name is specified and the hash is zero, - * then remember this stream for later so that we can - * assign it to the unnamed data stream if we don't find - * a better candidate. */ - unnamed_stream_with_zero_hash = strm; - } + } /* Else, too many unnamed streams were found. */ + + } /* Else, it's a zero main_hash. */ } - if (!found_unnamed_data_stream && unnamed_stream_with_zero_hash != NULL) - unnamed_stream_with_zero_hash->stream_type = STREAM_TYPE_DATA; + /* If needed, use the zero main_hash. */ + if (!found_reparse_stream && !found_unnamed_data_stream) { + inode->i_streams[0].stream_type = + (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) ? + STREAM_TYPE_REPARSE_POINT : STREAM_TYPE_DATA; + } } /* @@ -1289,7 +1220,7 @@ assign_stream_types_unencrypted(struct wim_inode *inode) */ static int setup_inode_streams(const u8 *p, const u8 *end, struct wim_inode *inode, - unsigned num_extra_streams, const u8 *default_hash, + unsigned num_extra_streams, const u8 *main_hash, u64 *offset_p) { const u8 *orig_p = p; @@ -1303,13 +1234,13 @@ setup_inode_streams(const u8 *p, const u8 *end, struct wim_inode *inode, return WIMLIB_ERR_NOMEM; } - /* Use the default hash field for the first stream */ + /* Use main_hash for the first stream. */ inode->i_streams[0].stream_name = (utf16lechar *)NO_STREAM_NAME; - copy_hash(inode->i_streams[0]._stream_hash, default_hash); + copy_hash(inode->i_streams[0]._stream_hash, main_hash); inode->i_streams[0].stream_type = STREAM_TYPE_UNKNOWN; inode->i_streams[0].stream_id = 0; - /* Read the extra stream entries */ + /* Read the extra stream entries. */ for (unsigned i = 1; i < inode->i_num_streams; i++) { struct wim_inode_stream *strm; const struct wim_extra_stream_entry_on_disk *disk_strm; @@ -1398,11 +1329,11 @@ read_dentry(const u8 * restrict buf, size_t buf_len, struct wim_dentry *dentry; struct wim_inode *inode; u16 short_name_nbytes; - u16 file_name_nbytes; + u16 name_nbytes; u64 calculated_size; int ret; - BUILD_BUG_ON(sizeof(struct wim_dentry_on_disk) != WIM_DENTRY_DISK_SIZE); + STATIC_ASSERT(sizeof(struct wim_dentry_on_disk) == WIM_DENTRY_DISK_SIZE); /* Before reading the whole dentry, we need to read just the length. * This is because a dentry of length 8 (that is, just the length field) @@ -1445,24 +1376,19 @@ read_dentry(const u8 * restrict buf, size_t buf_len, /* Read more fields: some into the dentry, and some into the inode. */ inode->i_attributes = le32_to_cpu(disk_dentry->attributes); inode->i_security_id = le32_to_cpu(disk_dentry->security_id); - dentry->subdir_offset = le64_to_cpu(disk_dentry->subdir_offset); + dentry->d_subdir_offset = le64_to_cpu(disk_dentry->subdir_offset); inode->i_creation_time = le64_to_cpu(disk_dentry->creation_time); inode->i_last_access_time = le64_to_cpu(disk_dentry->last_access_time); inode->i_last_write_time = le64_to_cpu(disk_dentry->last_write_time); + inode->i_unknown_0x54 = le32_to_cpu(disk_dentry->unknown_0x54); - /* I don't know what's going on here. It seems like M$ screwed up the - * reparse points, then put the fields in the same place and didn't - * document it. So we have some fields we read for reparse points, and - * some fields in the same place for non-reparse-points. */ if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { - inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->reparse.rp_unknown_1); inode->i_reparse_tag = le32_to_cpu(disk_dentry->reparse.reparse_tag); - inode->i_rp_unknown_2 = le16_to_cpu(disk_dentry->reparse.rp_unknown_2); - inode->i_not_rpfixed = le16_to_cpu(disk_dentry->reparse.not_rpfixed); + inode->i_rp_reserved = le16_to_cpu(disk_dentry->reparse.rp_reserved); + inode->i_rp_flags = le16_to_cpu(disk_dentry->reparse.rp_flags); /* Leave inode->i_ino at 0. Note: this means that WIM cannot * represent multiple hard links to a reparse point file. */ } else { - inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->nonreparse.rp_unknown_1); inode->i_ino = le64_to_cpu(disk_dentry->nonreparse.hard_link_group_id); } @@ -1470,16 +1396,16 @@ read_dentry(const u8 * restrict buf, size_t buf_len, * name, and the short name. */ short_name_nbytes = le16_to_cpu(disk_dentry->short_name_nbytes); - file_name_nbytes = le16_to_cpu(disk_dentry->file_name_nbytes); + name_nbytes = le16_to_cpu(disk_dentry->name_nbytes); - if (unlikely((short_name_nbytes & 1) | (file_name_nbytes & 1))) { + if (unlikely((short_name_nbytes & 1) | (name_nbytes & 1))) { ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; goto err_free_dentry; } /* We now know the length of the file name and short name. Make sure * the length of the dentry is large enough to actually hold them. */ - calculated_size = dentry_min_len_with_names(file_name_nbytes, + calculated_size = dentry_min_len_with_names(name_nbytes, short_name_nbytes); if (unlikely(length < calculated_size)) { @@ -1492,25 +1418,25 @@ read_dentry(const u8 * restrict buf, size_t buf_len, /* Read the filename if present. Note: if the filename is empty, there * is no null terminator following it. */ - if (file_name_nbytes) { - dentry->file_name = utf16le_dupz(p, file_name_nbytes); - if (dentry->file_name == NULL) { + if (name_nbytes) { + dentry->d_name = utf16le_dupz(p, name_nbytes); + if (unlikely(!dentry->d_name)) { ret = WIMLIB_ERR_NOMEM; goto err_free_dentry; } - dentry->file_name_nbytes = file_name_nbytes; - p += (u32)file_name_nbytes + 2; + dentry->d_name_nbytes = name_nbytes; + p += (u32)name_nbytes + 2; } /* Read the short filename if present. Note: if there is no short * filename, there is no null terminator following it. */ if (short_name_nbytes) { - dentry->short_name = utf16le_dupz(p, short_name_nbytes); - if (dentry->short_name == NULL) { + dentry->d_short_name = utf16le_dupz(p, short_name_nbytes); + if (unlikely(!dentry->d_short_name)) { ret = WIMLIB_ERR_NOMEM; goto err_free_dentry; } - dentry->short_name_nbytes = short_name_nbytes; + dentry->d_short_name_nbytes = short_name_nbytes; p += (u32)short_name_nbytes + 2; } @@ -1527,7 +1453,7 @@ read_dentry(const u8 * restrict buf, size_t buf_len, &buf[buf_len], inode, le16_to_cpu(disk_dentry->num_extra_streams), - disk_dentry->default_hash, + disk_dentry->main_hash, &offset); if (ret) goto err_free_dentry; @@ -1541,40 +1467,72 @@ err_free_dentry: return ret; } -/* Is the dentry named "." or ".." ? */ static bool dentry_is_dot_or_dotdot(const struct wim_dentry *dentry) { - if (dentry->file_name_nbytes <= 4) { - if (dentry->file_name_nbytes == 4) { - if (dentry->file_name[0] == cpu_to_le16('.') && - dentry->file_name[1] == cpu_to_le16('.')) + if (dentry->d_name_nbytes <= 4) { + if (dentry->d_name_nbytes == 4) { + if (dentry->d_name[0] == cpu_to_le16('.') && + dentry->d_name[1] == cpu_to_le16('.')) return true; - } else if (dentry->file_name_nbytes == 2) { - if (dentry->file_name[0] == cpu_to_le16('.')) + } else if (dentry->d_name_nbytes == 2) { + if (dentry->d_name[0] == cpu_to_le16('.')) return true; } } return false; } +static bool +dentry_contains_embedded_null(const struct wim_dentry *dentry) +{ + for (unsigned i = 0; i < dentry->d_name_nbytes / 2; i++) + if (dentry->d_name[i] == cpu_to_le16('\0')) + return true; + return false; +} + +static bool +should_ignore_dentry(struct wim_dentry *dir, const struct wim_dentry *dentry) +{ + /* All dentries except the root must be named. */ + if (!dentry_has_long_name(dentry)) { + WARNING("Ignoring unnamed file in directory \"%"TS"\"", + dentry_full_path(dir)); + return true; + } + + /* Don't allow files named "." or "..". Such filenames could be used in + * path traversal attacks. */ + if (dentry_is_dot_or_dotdot(dentry)) { + WARNING("Ignoring file named \".\" or \"..\" in directory " + "\"%"TS"\"", dentry_full_path(dir)); + return true; + } + + /* Don't allow filenames containing embedded null characters. Although + * the null character is already considered an unsupported character for + * extraction by all targets, it is probably a good idea to just forbid + * such names entirely. */ + if (dentry_contains_embedded_null(dentry)) { + WARNING("Ignoring filename with embedded null character in " + "directory \"%"TS"\"", dentry_full_path(dir)); + return true; + } + + return false; +} + static int read_dentry_tree_recursive(const u8 * restrict buf, size_t buf_len, - struct wim_dentry * restrict dir) + struct wim_dentry * restrict dir, unsigned depth) { - u64 cur_offset = dir->subdir_offset; - - /* Check for cyclic directory structure, which would cause infinite - * recursion if not handled. */ - for (struct wim_dentry *d = dir->d_parent; - !dentry_is_root(d); d = d->d_parent) - { - if (unlikely(d->subdir_offset == cur_offset)) { - ERROR("Cyclic directory structure detected: children " - "of \"%"TS"\" coincide with children of \"%"TS"\"", - dentry_full_path(dir), dentry_full_path(d)); - return WIMLIB_ERR_INVALID_METADATA_RESOURCE; - } + u64 cur_offset = dir->d_subdir_offset; + + /* Disallow extremely deep or cyclic directory structures */ + if (unlikely(depth >= 16384)) { + ERROR("Directory structure too deep!"); + return WIMLIB_ERR_INVALID_METADATA_RESOURCE; } for (;;) { @@ -1591,18 +1549,8 @@ read_dentry_tree_recursive(const u8 * restrict buf, size_t buf_len, if (child == NULL) return 0; - /* All dentries except the root should be named. */ - if (unlikely(!dentry_has_long_name(child))) { - WARNING("Ignoring unnamed dentry in " - "directory \"%"TS"\"", dentry_full_path(dir)); - free_dentry(child); - continue; - } - - /* Don't allow files named "." or "..". */ - if (unlikely(dentry_is_dot_or_dotdot(child))) { - WARNING("Ignoring file named \".\" or \"..\"; " - "potentially malicious archive!!!"); + /* Ignore dentries with bad names. */ + if (unlikely(should_ignore_dentry(dir, child))) { free_dentry(child); continue; } @@ -1623,11 +1571,12 @@ read_dentry_tree_recursive(const u8 * restrict buf, size_t buf_len, /* If this child is a directory that itself has children, call * this procedure recursively. */ - if (child->subdir_offset != 0) { + if (child->d_subdir_offset != 0) { if (likely(dentry_is_directory(child))) { ret = read_dentry_tree_recursive(buf, buf_len, - child); + child, + depth + 1); if (ret) return ret; } else { @@ -1668,8 +1617,6 @@ read_dentry_tree(const u8 *buf, size_t buf_len, int ret; struct wim_dentry *root; - DEBUG("Reading dentry tree (root_offset=%"PRIu64")", root_offset); - ret = read_dentry(buf, buf_len, &root_offset, &root); if (ret) return ret; @@ -1689,8 +1636,8 @@ read_dentry_tree(const u8 *buf, size_t buf_len, goto err_free_dentry_tree; } - if (likely(root->subdir_offset != 0)) { - ret = read_dentry_tree_recursive(buf, buf_len, root); + if (likely(root->d_subdir_offset != 0)) { + ret = read_dentry_tree_recursive(buf, buf_len, root, 0); if (ret) goto err_free_dentry_tree; } @@ -1733,6 +1680,105 @@ write_extra_stream_entry(u8 * restrict p, const utf16lechar * restrict name, return p; } +/* + * Write the stream references for a WIM dentry. To be compatible with DISM, we + * follow the below rules: + * + * 1. If the file has FILE_ATTRIBUTE_ENCRYPTED, then only the EFSRPC_RAW_DATA + * stream is stored. Otherwise, the streams that are stored are: + * - Reparse stream if the file has FILE_ATTRIBUTE_REPARSE_POINT + * - Unnamed data stream if the file doesn't have FILE_ATTRIBUTE_DIRECTORY + * - Named data streams + * + * 2. If only one stream is being stored and it is the EFSRPC_RAW_DATA, unnamed + * data, or reparse stream, then its hash goes in main_hash, and no extra + * stream entries are stored. Otherwise, *all* streams go in the extra + * stream entries, and main_hash is left zeroed! + * + * 3. If both the reparse stream and unnamed data stream are being stored, then + * the reparse stream comes first. + * + * 4. The unnamed stream(s) come before the named stream(s). (Actually, DISM + * puts the named streams between the first and second unnamed streams, but + * this is incompatible with itself... Tested with DISM 10.0.20348.681.) + * + * wimlib v1.14.1 and earlier behaved slightly differently for directories. + * First, wimlib always put the hash of the reparse stream in an extra stream + * entry, never in main_hash. This difference vs. DISM went unnoticed for a + * long time, but eventually it was found that it broke the Windows 8 setup + * wizard. Second, when a directory had any extra streams, wimlib created an + * extra stream entry to represent the (empty) unnamed data stream. However, + * DISM now rejects that (though I think it used to accept it). There isn't + * really any such thing as "unnamed data stream" for a directory. + * + * Keep this in sync with dentry_out_total_length()! + */ +static u8 * +write_dentry_streams(const struct wim_inode *inode, + struct wim_dentry_on_disk *disk_dentry, u8 *p) +{ + const u8 *unnamed_data_stream_hash = zero_hash; + const u8 *reparse_stream_hash = zero_hash; + const u8 *efsrpc_stream_hash = zero_hash; + const u8 *unnamed_stream_hashes[2] = { zero_hash }; + unsigned num_unnamed_streams = 0; + unsigned num_named_streams = 0; + + for (unsigned i = 0; i < inode->i_num_streams; i++) { + const struct wim_inode_stream *strm = &inode->i_streams[i]; + + switch (strm->stream_type) { + case STREAM_TYPE_DATA: + if (stream_is_named(strm)) + num_named_streams++; + else + unnamed_data_stream_hash = stream_hash(strm); + break; + case STREAM_TYPE_REPARSE_POINT: + reparse_stream_hash = stream_hash(strm); + break; + case STREAM_TYPE_EFSRPC_RAW_DATA: + efsrpc_stream_hash = stream_hash(strm); + break; + } + } + + if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) { + unnamed_stream_hashes[num_unnamed_streams++] = efsrpc_stream_hash; + num_named_streams = 0; + } else { + if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) + unnamed_stream_hashes[num_unnamed_streams++] = reparse_stream_hash; + if (!(inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY)) + unnamed_stream_hashes[num_unnamed_streams++] = unnamed_data_stream_hash; + } + + if (num_unnamed_streams <= 1 && num_named_streams == 0) { + /* No extra stream entries are needed. */ + copy_hash(disk_dentry->main_hash, unnamed_stream_hashes[0]); + disk_dentry->num_extra_streams = 0; + return p; + } + + /* Else, all streams go in extra stream entries. */ + copy_hash(disk_dentry->main_hash, zero_hash); + wimlib_assert(num_unnamed_streams + num_named_streams <= 0xFFFF); + disk_dentry->num_extra_streams = cpu_to_le16(num_unnamed_streams + + num_named_streams); + for (unsigned i = 0; i < num_unnamed_streams; i++) + p = write_extra_stream_entry(p, NO_STREAM_NAME, + unnamed_stream_hashes[i]); + for (unsigned i = 0; i < inode->i_num_streams; i++) { + const struct wim_inode_stream *strm = &inode->i_streams[i]; + + if (stream_is_named_data_stream(strm)) { + p = write_extra_stream_entry(p, strm->stream_name, + stream_hash(strm)); + } + } + return p; +} + /* * Write a WIM dentry to an output buffer. * @@ -1761,7 +1807,7 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p) disk_dentry->attributes = cpu_to_le32(inode->i_attributes); disk_dentry->security_id = cpu_to_le32(inode->i_security_id); - disk_dentry->subdir_offset = cpu_to_le64(dentry->subdir_offset); + disk_dentry->subdir_offset = cpu_to_le64(dentry->d_subdir_offset); disk_dentry->unused_1 = cpu_to_le64(0); disk_dentry->unused_2 = cpu_to_le64(0); @@ -1769,36 +1815,35 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p) disk_dentry->creation_time = cpu_to_le64(inode->i_creation_time); disk_dentry->last_access_time = cpu_to_le64(inode->i_last_access_time); disk_dentry->last_write_time = cpu_to_le64(inode->i_last_write_time); + disk_dentry->unknown_0x54 = cpu_to_le32(inode->i_unknown_0x54); if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { - disk_dentry->reparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1); disk_dentry->reparse.reparse_tag = cpu_to_le32(inode->i_reparse_tag); - disk_dentry->reparse.rp_unknown_2 = cpu_to_le16(inode->i_rp_unknown_2); - disk_dentry->reparse.not_rpfixed = cpu_to_le16(inode->i_not_rpfixed); + disk_dentry->reparse.rp_reserved = cpu_to_le16(inode->i_rp_reserved); + disk_dentry->reparse.rp_flags = cpu_to_le16(inode->i_rp_flags); } else { - disk_dentry->nonreparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1); disk_dentry->nonreparse.hard_link_group_id = cpu_to_le64((inode->i_nlink == 1) ? 0 : inode->i_ino); } - disk_dentry->short_name_nbytes = cpu_to_le16(dentry->short_name_nbytes); - disk_dentry->file_name_nbytes = cpu_to_le16(dentry->file_name_nbytes); + disk_dentry->short_name_nbytes = cpu_to_le16(dentry->d_short_name_nbytes); + disk_dentry->name_nbytes = cpu_to_le16(dentry->d_name_nbytes); p += sizeof(struct wim_dentry_on_disk); wimlib_assert(dentry_is_root(dentry) != dentry_has_long_name(dentry)); if (dentry_has_long_name(dentry)) - p = mempcpy(p, dentry->file_name, (u32)dentry->file_name_nbytes + 2); + p = mempcpy(p, dentry->d_name, (u32)dentry->d_name_nbytes + 2); if (dentry_has_short_name(dentry)) - p = mempcpy(p, dentry->short_name, (u32)dentry->short_name_nbytes + 2); + p = mempcpy(p, dentry->d_short_name, (u32)dentry->d_short_name_nbytes + 2); /* Align to 8-byte boundary */ while ((uintptr_t)p & 7) *p++ = 0; - if (inode->i_extra_size) { + if (inode->i_extra) { /* Extra tagged items --- not usually present. */ - p = mempcpy(p, inode->i_extra, inode->i_extra_size); + p = mempcpy(p, inode->i_extra->data, inode->i_extra->size); /* Align to 8-byte boundary */ while ((uintptr_t)p & 7) @@ -1807,83 +1852,17 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p) disk_dentry->length = cpu_to_le64(p - orig_p); - /* Streams */ - - if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) { - const struct wim_inode_stream *efs_strm; - const u8 *efs_hash; - - efs_strm = inode_get_unnamed_stream(inode, STREAM_TYPE_EFSRPC_RAW_DATA); - efs_hash = efs_strm ? stream_hash(efs_strm) : zero_hash; - copy_hash(disk_dentry->default_hash, efs_hash); - disk_dentry->num_extra_streams = cpu_to_le16(0); - } else { - /* - * Extra stream entries: - * - * - Use one extra stream entry for each named data stream - * - Use one extra stream entry for the unnamed data stream when there is either: - * - a reparse point stream - * - at least one named data stream (for Windows PE bug workaround) - * - Use one extra stream entry for the reparse point stream if there is one - */ - bool have_named_data_stream = false; - bool have_reparse_point_stream = false; - const u8 *unnamed_data_stream_hash = zero_hash; - const u8 *reparse_point_hash; - for (unsigned i = 0; i < inode->i_num_streams; i++) { - const struct wim_inode_stream *strm = &inode->i_streams[i]; - if (strm->stream_type == STREAM_TYPE_DATA) { - if (stream_is_named(strm)) - have_named_data_stream = true; - else - unnamed_data_stream_hash = stream_hash(strm); - } else if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) { - have_reparse_point_stream = true; - reparse_point_hash = stream_hash(strm); - } - } - - if (unlikely(have_reparse_point_stream || have_named_data_stream)) { - - unsigned num_extra_streams = 0; - - copy_hash(disk_dentry->default_hash, zero_hash); - - if (have_reparse_point_stream) { - p = write_extra_stream_entry(p, NO_STREAM_NAME, - reparse_point_hash); - num_extra_streams++; - } - - p = write_extra_stream_entry(p, NO_STREAM_NAME, - unnamed_data_stream_hash); - num_extra_streams++; - - for (unsigned i = 0; i < inode->i_num_streams; i++) { - const struct wim_inode_stream *strm = &inode->i_streams[i]; - if (stream_is_named_data_stream(strm)) { - p = write_extra_stream_entry(p, strm->stream_name, - stream_hash(strm)); - num_extra_streams++; - } - } - wimlib_assert(num_extra_streams <= 0xFFFF); - - disk_dentry->num_extra_streams = cpu_to_le16(num_extra_streams); - } else { - copy_hash(disk_dentry->default_hash, unnamed_data_stream_hash); - disk_dentry->num_extra_streams = cpu_to_le16(0); - } - } - - return p; + /* + * Set disk_dentry->main_hash and disk_dentry->num_extra_streams, + * and write any extra stream entries that are needed. + */ + return write_dentry_streams(inode, disk_dentry, p); } static int write_dir_dentries(struct wim_dentry *dir, void *_pp) { - if (dir->subdir_offset != 0) { + if (dir->d_subdir_offset != 0) { u8 **pp = _pp; u8 *p = *pp; struct wim_dentry *child; @@ -1917,10 +1896,6 @@ write_dir_dentries(struct wim_dentry *dir, void *_pp) u8 * write_dentry_tree(struct wim_dentry *root, u8 *p) { - DEBUG("Writing dentry tree."); - - wimlib_assert(root != NULL); - /* write root dentry and end-of-directory entry following it */ p = write_dentry(root, p); *(u64*)p = 0;