2 * dentry.c - see description below
6 * Copyright 2012-2023 Eric Biggers
8 * This file is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU Lesser General Public License as published by the Free
10 * Software Foundation; either version 3 of the License, or (at your option) any
13 * This file is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this file; if not, see https://www.gnu.org/licenses/.
23 * This file contains logic to deal with WIM directory entries, or "dentries":
25 * - Reading a dentry tree from a metadata resource in a WIM file
26 * - Writing a dentry tree to a metadata resource in a WIM file
27 * - Iterating through a tree of WIM dentries
28 * - Path lookup: translating a path into a WIM dentry or inode
29 * - Creating, modifying, and deleting WIM dentries
33 * - A WIM file can contain multiple images, each of which has an independent
34 * tree of dentries. "On disk", the dentry tree for an image is stored in
35 * the "metadata resource" for that image.
37 * - Multiple dentries in an image may correspond to the same inode, or "file".
38 * When this occurs, it means that the file has multiple names, or "hard
39 * links". A dentry is not a file, but rather the name of a file!
41 * - Inodes are not represented explicitly in the WIM file format. Instead,
42 * the metadata resource provides a "hard link group ID" for each dentry.
43 * wimlib handles pulling out actual inodes from this information, but this
44 * occurs in inode_fixup.c and not in this file.
46 * - wimlib does not allow *directory* hard links, so a WIM image really does
47 * have a *tree* of dentries (and not an arbitrary graph of dentries).
49 * - wimlib supports both case-sensitive and case-insensitive path lookups.
50 * The implementation uses a single in-memory index per directory, using a
51 * collation order like that used by NTFS; see collate_dentry_names().
53 * - Multiple dentries in a directory might have the same case-insensitive
54 * name. But wimlib enforces that at most one dentry in a directory can have
55 * a given case-sensitive name.
64 #include "wimlib/assert.h"
65 #include "wimlib/dentry.h"
66 #include "wimlib/inode.h"
67 #include "wimlib/encoding.h"
68 #include "wimlib/endianness.h"
69 #include "wimlib/metadata.h"
70 #include "wimlib/paths.h"
72 /* On-disk format of a WIM dentry (directory entry), located in the metadata
73 * resource for a WIM image. */
74 struct wim_dentry_on_disk {
76 /* Length of this directory entry in bytes, not including any extra
77 * stream entries. Should be a multiple of 8 so that the following
78 * dentry or extra stream entry is aligned on an 8-byte boundary. (If
79 * not, wimlib will round it up.) It must be at least as long as the
80 * fixed-length fields of the dentry (WIM_DENTRY_DISK_SIZE), plus the
81 * lengths of the file name and/or short name if present, plus the size
82 * of any "extra" data.
84 * It is also possible for this field to be 0. This case indicates the
85 * end of a list of sibling entries in a directory. It also means the
86 * real length is 8, because the dentry included only the length field,
87 * but that takes up 8 bytes. */
90 /* File attributes for the file or directory. This is a bitwise OR of
91 * the FILE_ATTRIBUTE_* constants and should correspond to the value
92 * retrieved by GetFileAttributes() on Windows. */
95 /* A value that specifies the security descriptor for this file or
96 * directory. If 0xFFFFFFFF, the file or directory has no security
97 * descriptor. Otherwise, it is a 0-based index into the WIM image's
98 * table of security descriptors (see: `struct wim_security_data') */
101 /* Offset, in bytes, from the start of the uncompressed metadata
102 * resource of this directory's child directory entries, or 0 if this
103 * directory entry does not correspond to a directory or otherwise does
104 * not have any children. */
107 /* Reserved fields */
111 /* Creation time, last access time, and last write time, in
112 * 100-nanosecond intervals since 12:00 a.m UTC January 1, 1601. They
113 * should correspond to the times gotten by calling GetFileTime() on
116 le64 last_access_time;
117 le64 last_write_time;
120 * Usually this is the SHA-1 message digest of the file's contents, or
121 * all zeroes if the file is a directory or is empty. However, special
122 * rules apply if the file has FILE_ATTRIBUTE_REPARSE_POINT set or has
123 * named data streams. See assign_stream_types_unencrypted().
125 u8 main_hash[SHA1_HASH_SIZE];
127 /* Unknown field (maybe accidental padding) */
131 * The following 8-byte union contains either information about the
132 * reparse point (for files with FILE_ATTRIBUTE_REPARSE_POINT set), or
133 * the "hard link group ID" (for other files).
135 * The reparse point information contains ReparseTag and ReparseReserved
136 * from the header of the reparse point buffer. It also contains a flag
137 * that indicates whether a reparse point fixup (for the target of an
138 * absolute symbolic link or junction) was done or not.
140 * The "hard link group ID" is like an inode number; all dentries for
141 * the same inode share the same value. See inode_fixup.c for more
144 * Note that this union creates the limitation that reparse point files
145 * cannot have multiple names (hard links).
152 } __attribute__((packed)) reparse;
154 le64 hard_link_group_id;
155 } __attribute__((packed)) nonreparse;
158 /* Number of extra stream entries that directly follow this dentry
160 le16 num_extra_streams;
162 /* If nonzero, this is the length, in bytes, of this dentry's UTF-16LE
163 * encoded short name (8.3 DOS-compatible name), excluding the null
164 * terminator. If zero, then the long name of this dentry does not have
165 * a corresponding short name (but this does not exclude the possibility
166 * that another dentry for the same file has a short name). */
167 le16 short_name_nbytes;
169 /* If nonzero, this is the length, in bytes, of this dentry's UTF-16LE
170 * encoded "long" name, excluding the null terminator. If zero, then
171 * this file has no long name. The root dentry should not have a long
172 * name, but all other dentries in the image should have long names. */
175 /* Beginning of optional, variable-length fields */
177 /* If name_nbytes != 0, the next field will be the UTF-16LE encoded long
178 * name. This will be null-terminated, so the size of this field will
179 * really be name_nbytes + 2. */
180 /*utf16lechar name[];*/
182 /* If short_name_nbytes != 0, the next field will be the UTF-16LE
183 * encoded short name. This will be null-terminated, so the size of
184 * this field will really be short_name_nbytes + 2. */
185 /*utf16lechar short_name[];*/
187 /* If there is still space in the dentry (according to the 'length'
188 * field) after 8-byte alignment, then the remaining space will be a
189 * variable-length list of tagged metadata items. See tagged_items.c
190 * for more information. */
191 /* u8 tagged_items[] __attribute__((aligned(8))); */
193 } __attribute__((packed));
194 /* If num_extra_streams != 0, then there are that many extra stream
195 * entries following the dentry, starting on the next 8-byte aligned
196 * boundary. They are not counted in the 'length' field of the dentry.
199 /* On-disk format of an extra stream entry. This represents an extra NTFS-style
200 * "stream" associated with the file, such as a named data stream. */
201 struct wim_extra_stream_entry_on_disk {
203 /* Length of this extra stream entry, in bytes. This includes all
204 * fixed-length fields, plus the name and null terminator if present,
205 * and any needed padding such that the length is a multiple of 8. */
211 /* SHA-1 message digest of this stream's uncompressed data, or all
212 * zeroes if this stream's data is of zero length. */
213 u8 hash[SHA1_HASH_SIZE];
215 /* Length of this stream's name, in bytes and excluding the null
216 * terminator; or 0 if this stream is unnamed. */
219 /* Stream name in UTF-16LE. It is @name_nbytes bytes long, excluding
220 * the null terminator. There is a null terminator character if
221 * @name_nbytes != 0; i.e., if this stream is named. */
223 } __attribute__((packed));
226 do_dentry_set_name(struct wim_dentry *dentry, utf16lechar *name,
229 FREE(dentry->d_name);
230 dentry->d_name = name;
231 dentry->d_name_nbytes = name_nbytes;
233 if (dentry_has_short_name(dentry)) {
234 FREE(dentry->d_short_name);
235 dentry->d_short_name = NULL;
236 dentry->d_short_name_nbytes = 0;
241 * Set the name of a WIM dentry from a UTF-16LE string.
243 * This sets the long name of the dentry. The short name will automatically be
244 * removed, since it may not be appropriate for the new long name.
246 * The @name string need not be null-terminated, since its length is specified
249 * If @name_nbytes is 0, both the long and short names of the dentry will be
252 * Only use this function on unlinked dentries, since it doesn't update the name
253 * indices. For dentries that are currently linked into the tree, use
256 * Returns 0 or WIMLIB_ERR_NOMEM.
259 dentry_set_name_utf16le(struct wim_dentry *dentry, const utf16lechar *name,
262 utf16lechar *dup = NULL;
265 dup = utf16le_dupz(name, name_nbytes);
267 return WIMLIB_ERR_NOMEM;
269 do_dentry_set_name(dentry, dup, name_nbytes);
275 * Set the name of a WIM dentry from a 'tchar' string.
277 * This sets the long name of the dentry. The short name will automatically be
278 * removed, since it may not be appropriate for the new long name.
280 * If @name is NULL or empty, both the long and short names of the dentry will
283 * Only use this function on unlinked dentries, since it doesn't update the name
284 * indices. For dentries that are currently linked into the tree, use
287 * Returns 0 or an error code resulting from a failed string conversion.
290 dentry_set_name(struct wim_dentry *dentry, const tchar *name)
292 utf16lechar *name_utf16le = NULL;
293 size_t name_utf16le_nbytes = 0;
297 ret = tstr_to_utf16le(name, tstrlen(name) * sizeof(tchar),
298 &name_utf16le, &name_utf16le_nbytes);
303 do_dentry_set_name(dentry, name_utf16le, name_utf16le_nbytes);
307 /* Calculate the minimum unaligned length, in bytes, of an on-disk WIM dentry
308 * that has names of the specified lengths. (Zero length means the
309 * corresponding name actually does not exist.) The returned value excludes
310 * tagged metadata items as well as any extra stream entries that may need to
311 * follow the dentry. */
313 dentry_min_len_with_names(u16 name_nbytes, u16 short_name_nbytes)
315 size_t length = sizeof(struct wim_dentry_on_disk);
317 length += (u32)name_nbytes + 2;
318 if (short_name_nbytes)
319 length += (u32)short_name_nbytes + 2;
324 /* Return the length, in bytes, required for the specified stream on-disk, when
325 * represented as an extra stream entry. */
327 stream_out_total_length(const struct wim_inode_stream *strm)
329 /* Account for the fixed length portion */
330 size_t len = sizeof(struct wim_extra_stream_entry_on_disk);
332 /* For named streams, account for the variable-length name. */
333 if (stream_is_named(strm))
334 len += utf16le_len_bytes(strm->stream_name) + 2;
336 /* Account for any necessary padding to the next 8-byte boundary. */
337 return ALIGN(len, 8);
341 * Calculate the total number of bytes that will be consumed when a dentry is
342 * written. This includes the fixed-length portion of the dentry, the name
343 * fields, any tagged metadata items, and any extra stream entries. This also
344 * includes all alignment bytes.
347 dentry_out_total_length(const struct wim_dentry *dentry)
349 const struct wim_inode *inode = dentry->d_inode;
351 unsigned num_unnamed_streams = 0;
352 bool have_named_data_stream = false;
354 len = dentry_min_len_with_names(dentry->d_name_nbytes,
355 dentry->d_short_name_nbytes);
359 len += ALIGN(inode->i_extra->size, 8);
362 * Calculate the total length of the extra stream entries that will be
363 * written. To match DISM, some odd rules need to be followed here.
364 * See write_dentry_streams() for explanation. Keep this in sync with
365 * write_dentry_streams()!
367 if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) {
368 num_unnamed_streams++;
370 for (unsigned i = 0; i < inode->i_num_streams; i++) {
371 const struct wim_inode_stream *strm = &inode->i_streams[i];
373 if (stream_is_named_data_stream(strm)) {
374 len += stream_out_total_length(strm);
375 have_named_data_stream = true;
378 if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT)
379 num_unnamed_streams++;
380 if (!(inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY))
381 num_unnamed_streams++;
383 if (num_unnamed_streams > 1 || have_named_data_stream)
384 len += num_unnamed_streams *
385 ALIGN(sizeof(struct wim_extra_stream_entry_on_disk), 8);
389 /* Internal version of for_dentry_in_tree() that omits the NULL check */
391 do_for_dentry_in_tree(struct wim_dentry *dentry,
392 int (*visitor)(struct wim_dentry *, void *), void *arg)
395 struct wim_dentry *child;
397 ret = (*visitor)(dentry, arg);
401 for_dentry_child(child, dentry) {
402 ret = do_for_dentry_in_tree(child, visitor, arg);
409 /* Internal version of for_dentry_in_tree_depth() that omits the NULL check */
411 do_for_dentry_in_tree_depth(struct wim_dentry *dentry,
412 int (*visitor)(struct wim_dentry *, void *), void *arg)
415 struct wim_dentry *child;
417 for_dentry_child_postorder(child, dentry) {
418 ret = do_for_dentry_in_tree_depth(child, visitor, arg);
422 return unlikely((*visitor)(dentry, arg));
426 * Call a function on all dentries in a tree.
428 * @arg will be passed as the second argument to each invocation of @visitor.
430 * This function does a pre-order traversal --- that is, a parent will be
431 * visited before its children. Furthermore, siblings will be visited in their
434 * It is safe to pass NULL for @root, which means that the dentry tree is empty.
435 * In this case, this function does nothing.
437 * @visitor must not modify the structure of the dentry tree during the
440 * The return value will be 0 if all calls to @visitor returned 0. Otherwise,
441 * the return value will be the first nonzero value returned by @visitor.
444 for_dentry_in_tree(struct wim_dentry *root,
445 int (*visitor)(struct wim_dentry *, void *), void *arg)
449 return do_for_dentry_in_tree(root, visitor, arg);
452 /* Like for_dentry_in_tree(), but do a depth-first traversal of the dentry tree.
453 * That is, the visitor function will be called on a dentry's children before
454 * itself. It will be safe to free a dentry when visiting it. */
456 for_dentry_in_tree_depth(struct wim_dentry *root,
457 int (*visitor)(struct wim_dentry *, void *), void *arg)
461 return do_for_dentry_in_tree_depth(root, visitor, arg);
465 * Calculate the full path to @dentry within the WIM image, if not already done.
467 * The full name will be saved in the cached value 'dentry->d_full_path'.
469 * Whenever possible, use dentry_full_path() instead of calling this and
470 * accessing d_full_path directly.
472 * Returns 0 or an error code resulting from a failed string conversion.
475 calculate_dentry_full_path(struct wim_dentry *dentry)
478 const struct wim_dentry *d;
480 if (dentry->d_full_path)
486 ulen += d->d_name_nbytes / sizeof(utf16lechar);
488 d = d->d_parent; /* assumes d == d->d_parent for root */
489 } while (!dentry_is_root(d));
491 utf16lechar ubuf[ulen];
492 utf16lechar *p = &ubuf[ulen];
496 p -= d->d_name_nbytes / sizeof(utf16lechar);
497 if (d->d_name_nbytes)
498 memcpy(p, d->d_name, d->d_name_nbytes);
499 *--p = cpu_to_le16(WIM_PATH_SEPARATOR);
500 d = d->d_parent; /* assumes d == d->d_parent for root */
501 } while (!dentry_is_root(d));
503 wimlib_assert(p == ubuf);
505 return utf16le_to_tstr(ubuf, ulen * sizeof(utf16lechar),
506 &dentry->d_full_path, NULL);
510 * Return the full path to the @dentry within the WIM image, or NULL if the full
511 * path could not be determined due to a string conversion error.
513 * The returned memory will be cached in the dentry, so the caller is not
514 * responsible for freeing it.
517 dentry_full_path(struct wim_dentry *dentry)
519 calculate_dentry_full_path(dentry);
520 return dentry->d_full_path;
524 dentry_calculate_subdir_offset(struct wim_dentry *dentry, void *_subdir_offset_p)
526 if (dentry_is_directory(dentry)) {
527 u64 *subdir_offset_p = _subdir_offset_p;
528 struct wim_dentry *child;
530 /* Set offset of directory's child dentries */
531 dentry->d_subdir_offset = *subdir_offset_p;
533 /* Account for child dentries */
534 for_dentry_child(child, dentry)
535 *subdir_offset_p += dentry_out_total_length(child);
537 /* Account for end-of-directory entry */
538 *subdir_offset_p += 8;
540 /* Not a directory; set the subdir offset to 0 */
541 dentry->d_subdir_offset = 0;
547 * Calculate the subdir offsets for a dentry tree, in preparation of writing
548 * that dentry tree to a metadata resource.
550 * The subdir offset of each dentry is the offset in the uncompressed metadata
551 * resource at which its child dentries begin, or 0 if that dentry has no
554 * The caller must initialize *subdir_offset_p to the first subdir offset that
555 * is available to use after the root dentry is written.
557 * When this function returns, *subdir_offset_p will have been advanced past the
558 * size needed for the dentry tree within the uncompressed metadata resource.
561 calculate_subdir_offsets(struct wim_dentry *root, u64 *subdir_offset_p)
563 for_dentry_in_tree(root, dentry_calculate_subdir_offset, subdir_offset_p);
567 dentry_compare_names(const struct wim_dentry *d1, const struct wim_dentry *d2,
570 return cmp_utf16le_strings(d1->d_name, d1->d_name_nbytes / 2,
571 d2->d_name, d2->d_name_nbytes / 2,
576 * Collate (compare) the long filenames of two dentries. This first compares
577 * the names ignoring case, then falls back to a case-sensitive comparison if
578 * the names are the same ignoring case.
581 collate_dentry_names(const struct avl_tree_node *n1,
582 const struct avl_tree_node *n2)
584 const struct wim_dentry *d1, *d2;
587 d1 = avl_tree_entry(n1, struct wim_dentry, d_index_node);
588 d2 = avl_tree_entry(n2, struct wim_dentry, d_index_node);
590 res = dentry_compare_names(d1, d2, true);
593 return dentry_compare_names(d1, d2, false);
596 /* Default case sensitivity behavior for searches with
597 * WIMLIB_CASE_PLATFORM_DEFAULT specified. This can be modified by passing
598 * WIMLIB_INIT_FLAG_DEFAULT_CASE_SENSITIVE or
599 * WIMLIB_INIT_FLAG_DEFAULT_CASE_INSENSITIVE to wimlib_global_init(). */
600 bool default_ignore_case =
609 * Find the dentry within the given directory that has the given UTF-16LE
610 * filename. Return it if found, otherwise return NULL. This has configurable
611 * case sensitivity, and @name need not be null-terminated.
614 get_dentry_child_with_utf16le_name(const struct wim_dentry *dir,
615 const utf16lechar *name,
617 CASE_SENSITIVITY_TYPE case_type)
619 struct wim_dentry wanted;
620 struct avl_tree_node *cur = dir->d_inode->i_children;
621 struct wim_dentry *ci_match = NULL;
623 wanted.d_name = (utf16lechar *)name;
624 wanted.d_name_nbytes = name_nbytes;
626 if (unlikely(wanted.d_name_nbytes != name_nbytes))
627 return NULL; /* overflow */
629 /* Note: we can't use avl_tree_lookup_node() here because we need to
630 * save case-insensitive matches. */
632 struct wim_dentry *child;
635 child = avl_tree_entry(cur, struct wim_dentry, d_index_node);
637 res = dentry_compare_names(&wanted, child, true);
639 /* case-insensitive match found */
642 res = dentry_compare_names(&wanted, child, false);
644 return child; /* case-sensitive match found */
653 /* No case-sensitive match; use a case-insensitive match if possible. */
655 if (!will_ignore_case(case_type))
659 size_t num_other_ci_matches = 0;
660 struct wim_dentry *other_ci_match, *d;
662 dentry_for_each_ci_match(d, ci_match) {
663 num_other_ci_matches++;
667 if (num_other_ci_matches != 0) {
668 WARNING("Result of case-insensitive lookup is ambiguous\n"
669 " (returning \"%"TS"\" of %zu "
670 "possible files, including \"%"TS"\")",
671 dentry_full_path(ci_match), num_other_ci_matches,
672 dentry_full_path(other_ci_match));
680 * Find the dentry within the given directory that has the given 'tstr'
681 * filename. If the filename was successfully converted to UTF-16LE and the
682 * dentry was found, return it; otherwise return NULL. This has configurable
686 get_dentry_child_with_name(const struct wim_dentry *dir, const tchar *name,
687 CASE_SENSITIVITY_TYPE case_type)
690 const utf16lechar *name_utf16le;
691 size_t name_utf16le_nbytes;
692 struct wim_dentry *child;
694 ret = tstr_get_utf16le_and_len(name, &name_utf16le,
695 &name_utf16le_nbytes);
699 child = get_dentry_child_with_utf16le_name(dir,
703 tstr_put_utf16le(name_utf16le);
707 /* This is the UTF-16LE version of get_dentry(), currently private to this file
708 * because no one needs it besides get_dentry(). */
709 static struct wim_dentry *
710 get_dentry_utf16le(WIMStruct *wim, const utf16lechar *path,
711 CASE_SENSITIVITY_TYPE case_type)
713 struct wim_dentry *cur_dentry;
714 const utf16lechar *name_start, *name_end;
716 /* Start with the root directory of the image. Note: this will be NULL
717 * if an image has been added directly with wimlib_add_empty_image() but
718 * no files have been added yet; in that case we fail with ENOENT. */
719 cur_dentry = wim_get_current_root_dentry(wim);
723 if (cur_dentry == NULL) {
728 if (*name_start && !dentry_is_directory(cur_dentry)) {
733 while (*name_start == cpu_to_le16(WIM_PATH_SEPARATOR))
739 name_end = name_start;
742 } while (*name_end != cpu_to_le16(WIM_PATH_SEPARATOR) && *name_end);
744 cur_dentry = get_dentry_child_with_utf16le_name(cur_dentry,
746 (u8*)name_end - (u8*)name_start,
748 name_start = name_end;
753 * WIM path lookup: translate a path in the currently selected WIM image to the
754 * corresponding dentry, if it exists.
757 * The WIMStruct for the WIM. The search takes place in the currently
761 * The path to look up, given relative to the root of the WIM image.
762 * Characters with value WIM_PATH_SEPARATOR are taken to be path
763 * separators. Leading path separators are ignored, whereas one or more
764 * trailing path separators cause the path to only match a directory.
767 * The case-sensitivity behavior of this function, as one of the following
770 * - WIMLIB_CASE_SENSITIVE: Perform the search case sensitively. This means
771 * that names must match exactly.
773 * - WIMLIB_CASE_INSENSITIVE: Perform the search case insensitively. This
774 * means that names are considered to match if they are equal when
775 * transformed to upper case. If a path component matches multiple names
776 * case-insensitively, the name that matches the path component
777 * case-sensitively is chosen, if existent; otherwise one
778 * case-insensitively matching name is chosen arbitrarily.
780 * - WIMLIB_CASE_PLATFORM_DEFAULT: Perform either case-sensitive or
781 * case-insensitive search, depending on the value of the global variable
782 * default_ignore_case.
784 * In any case, no Unicode normalization is done before comparing strings.
786 * Returns a pointer to the dentry that is the result of the lookup, or NULL if
787 * no such dentry exists. If NULL is returned, errno is set to one of the
790 * ENOTDIR if one of the path components used as a directory existed but
791 * was not, in fact, a directory.
797 * - This function does not consider a reparse point to be a directory, even
798 * if it has FILE_ATTRIBUTE_DIRECTORY set.
800 * - This function does not dereference symbolic links or junction points
801 * when performing the search.
803 * - Since this function ignores leading slashes, the empty path is valid and
804 * names the root directory of the WIM image.
806 * - An image added with wimlib_add_empty_image() does not have a root
807 * directory yet, and this function will fail with ENOENT for any path on
811 get_dentry(WIMStruct *wim, const tchar *path, CASE_SENSITIVITY_TYPE case_type)
814 const utf16lechar *path_utf16le;
815 struct wim_dentry *dentry;
817 ret = tstr_get_utf16le(path, &path_utf16le);
820 dentry = get_dentry_utf16le(wim, path_utf16le, case_type);
821 tstr_put_utf16le(path_utf16le);
825 /* Modify @path, which is a null-terminated string @len 'tchars' in length,
826 * in-place to produce the path to its parent directory. */
828 to_parent_name(tchar *path, size_t len)
830 ssize_t i = (ssize_t)len - 1;
831 while (i >= 0 && path[i] == WIM_PATH_SEPARATOR)
833 while (i >= 0 && path[i] != WIM_PATH_SEPARATOR)
835 while (i >= 0 && path[i] == WIM_PATH_SEPARATOR)
837 path[i + 1] = T('\0');
840 /* Similar to get_dentry(), but returns the dentry named by @path with the last
841 * component stripped off.
843 * Note: The returned dentry is NOT guaranteed to be a directory. */
845 get_parent_dentry(WIMStruct *wim, const tchar *path,
846 CASE_SENSITIVITY_TYPE case_type)
848 size_t path_len = tstrlen(path);
849 tchar buf[path_len + 1];
851 tmemcpy(buf, path, path_len + 1);
852 to_parent_name(buf, path_len);
853 return get_dentry(wim, buf, case_type);
857 * Create an unlinked dentry.
859 * @name specifies the long name to give the new dentry. If NULL or empty, the
860 * new dentry will be given no long name.
862 * The new dentry will have no short name and no associated inode.
864 * On success, returns 0 and a pointer to the new, allocated dentry is stored in
865 * *dentry_ret. On failure, returns WIMLIB_ERR_NOMEM or an error code resulting
866 * from a failed string conversion.
869 new_dentry(const tchar *name, struct wim_dentry **dentry_ret)
871 struct wim_dentry *dentry;
874 dentry = CALLOC(1, sizeof(struct wim_dentry));
876 return WIMLIB_ERR_NOMEM;
879 ret = dentry_set_name(dentry, name);
885 dentry->d_parent = dentry;
886 *dentry_ret = dentry;
890 /* Like new_dentry(), but also allocate an inode and associate it with the
891 * dentry. If set_timestamps=true, the timestamps for the inode will be set to
892 * the current time; otherwise, they will be left 0. */
894 new_dentry_with_new_inode(const tchar *name, bool set_timestamps,
895 struct wim_dentry **dentry_ret)
897 struct wim_dentry *dentry;
898 struct wim_inode *inode;
901 ret = new_dentry(name, &dentry);
905 inode = new_inode(dentry, set_timestamps);
908 return WIMLIB_ERR_NOMEM;
911 *dentry_ret = dentry;
915 /* Like new_dentry(), but also associate the new dentry with the specified inode
916 * and acquire a reference to each of the inode's blobs. */
918 new_dentry_with_existing_inode(const tchar *name, struct wim_inode *inode,
919 struct wim_dentry **dentry_ret)
921 int ret = new_dentry(name, dentry_ret);
924 d_associate(*dentry_ret, inode);
925 inode_ref_blobs(inode);
929 /* Create an unnamed dentry with a new inode for a directory with the default
932 new_filler_directory(struct wim_dentry **dentry_ret)
935 struct wim_dentry *dentry;
937 ret = new_dentry_with_new_inode(NULL, true, &dentry);
940 /* Leave the inode number as 0; this is allowed for non
941 * hard-linked files. */
942 dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
943 *dentry_ret = dentry;
950 * In addition to freeing the dentry itself, this disassociates the dentry from
951 * its inode. If the inode is no longer in use, it will be freed as well.
954 free_dentry(struct wim_dentry *dentry)
957 d_disassociate(dentry);
958 FREE(dentry->d_name);
959 FREE(dentry->d_short_name);
960 FREE(dentry->d_full_path);
966 do_free_dentry(struct wim_dentry *dentry, void *_ignore)
973 do_free_dentry_and_unref_blobs(struct wim_dentry *dentry, void *blob_table)
975 inode_unref_blobs(dentry->d_inode, blob_table);
981 * Free all dentries in a tree.
984 * The root of the dentry tree to free. If NULL, this function has no
988 * A pointer to the blob table for the WIM, or NULL if not specified. If
989 * specified, this function will decrement the reference counts of the
990 * blobs referenced by the dentries.
992 * This function also releases references to the corresponding inodes.
994 * This function does *not* unlink @root from its parent directory, if it has
995 * one. If @root has a parent, the caller must unlink @root before calling this
999 free_dentry_tree(struct wim_dentry *root, struct blob_table *blob_table)
1001 int (*f)(struct wim_dentry *, void *);
1004 f = do_free_dentry_and_unref_blobs;
1008 for_dentry_in_tree_depth(root, f, blob_table);
1012 * Return the first dentry in the list of dentries which have the same
1013 * case-insensitive name as the one given.
1016 dentry_get_first_ci_match(struct wim_dentry *dentry)
1018 struct wim_dentry *ci_match = dentry;
1021 struct avl_tree_node *node;
1022 struct wim_dentry *prev;
1024 node = avl_tree_prev_in_order(&ci_match->d_index_node);
1027 prev = avl_tree_entry(node, struct wim_dentry, d_index_node);
1028 if (dentry_compare_names(prev, dentry, true))
1033 if (ci_match == dentry)
1034 return dentry_get_next_ci_match(dentry, dentry);
1040 * Return the next dentry in the list of dentries which have the same
1041 * case-insensitive name as the one given.
1044 dentry_get_next_ci_match(struct wim_dentry *dentry, struct wim_dentry *ci_match)
1047 struct avl_tree_node *node;
1049 node = avl_tree_next_in_order(&ci_match->d_index_node);
1052 ci_match = avl_tree_entry(node, struct wim_dentry, d_index_node);
1053 } while (ci_match == dentry);
1055 if (dentry_compare_names(ci_match, dentry, true))
1062 * Link a dentry into a directory.
1065 * The directory into which to link the dentry.
1068 * The dentry to link into the directory. It must be currently unlinked.
1070 * Returns NULL if successful; or, if @parent already contains a dentry with the
1071 * same case-sensitive name as @child, then a pointer to this duplicate dentry
1075 dentry_add_child(struct wim_dentry *parent, struct wim_dentry *child)
1077 struct wim_inode *dir = parent->d_inode;
1078 struct avl_tree_node *duplicate;
1080 wimlib_assert(parent != child);
1081 wimlib_assert(inode_is_directory(dir));
1083 duplicate = avl_tree_insert(&dir->i_children, &child->d_index_node,
1084 collate_dentry_names);
1086 return avl_tree_entry(duplicate, struct wim_dentry, d_index_node);
1088 child->d_parent = parent;
1092 /* Unlink a dentry from its parent directory. */
1094 unlink_dentry(struct wim_dentry *dentry)
1096 /* Do nothing if the dentry is root or it's already unlinked. Not
1097 * actually necessary based on the current callers, but we do the check
1098 * here to be safe. */
1099 if (unlikely(dentry->d_parent == dentry))
1102 avl_tree_remove(&dentry->d_parent->d_inode->i_children,
1103 &dentry->d_index_node);
1105 /* Not actually necessary, but to be safe don't retain the now-obsolete
1106 * parent pointer. */
1107 dentry->d_parent = dentry;
1111 read_extra_data(const u8 *p, const u8 *end, struct wim_inode *inode)
1113 while (((uintptr_t)p & 7) && p < end)
1116 if (unlikely(p < end)) {
1117 inode->i_extra = MALLOC(sizeof(struct wim_inode_extra) +
1119 if (!inode->i_extra)
1120 return WIMLIB_ERR_NOMEM;
1121 inode->i_extra->size = end - p;
1122 memcpy(inode->i_extra->data, p, end - p);
1128 * Set the type of each stream for an encrypted file.
1130 * All data streams of the encrypted file should have been packed into a single
1131 * stream in the format provided by ReadEncryptedFileRaw() on Windows. We
1132 * assign this stream type STREAM_TYPE_EFSRPC_RAW_DATA.
1134 * Encrypted files can't have a reparse point stream. In the on-disk NTFS
1135 * format they can, but as far as I know the reparse point stream of an
1136 * encrypted file can't be stored in the WIM format in a way that's compatible
1137 * with WIMGAPI, nor is there even any way for it to be read or written on
1138 * Windows when the process does not have access to the file encryption key.
1141 assign_stream_types_encrypted(struct wim_inode *inode)
1143 for (unsigned i = 0; i < inode->i_num_streams; i++) {
1144 struct wim_inode_stream *strm = &inode->i_streams[i];
1145 if (!stream_is_named(strm) && !is_zero_hash(strm->_stream_hash))
1147 strm->stream_type = STREAM_TYPE_EFSRPC_RAW_DATA;
1154 * Set the type of each stream for an unencrypted file.
1156 * To specify the streams of each file, the WIM provides a main_hash and an
1157 * optional list of "extra stream entries". Each extra stream entry is a
1158 * (name, hash) pair where the name is optional. Hashes can be the special
1159 * value of zero_hash, which means the stream is empty (zero-length).
1161 * While extra stream entries with names always refer to "named data streams",
1162 * the main hash and any extra unnamed hashes can be hard to interpret. This is
1163 * because the WIM file format unfortunately doesn't make it very clear which is
1164 * the unnamed data stream (i.e. standard file contents) and which is the
1165 * reparse stream. The way this ambiguity is resolved (based on what MS
1166 * software seems to do) is by (1) a file can have at most one unnamed data
1167 * stream and at most one reparse stream, (2) a reparse stream is present if and
1168 * only if the file has FILE_ATTRIBUTE_REPARSE_POINT, and (3) the reparse
1169 * stream, if present, is stored before the unnamed data stream if present
1170 * (considering main_hash to come before any extra hashes). Note: directories
1171 * need not have an unnamed data stream stored, even with a zero hash, as
1172 * "unnamed data stream" isn't meaningful for a directory in the first place.
1174 * With those rules in mind, one would expect that the first unnamed stream
1175 * would use main_hash, and the second (if present) would use an extra stream
1176 * entry. However, there is another quirk that we must be compatible with:
1177 * sometimes main_hash isn't used and only extra stream entries are used. To
1178 * handle this, we ignore main_hash if it is zero and there is at least one
1179 * unnamed extra stream entry. This works correctly as long as a zero main_hash
1180 * and an unnamed extra stream entry is never used to represent an empty reparse
1181 * stream and an unnamed data stream. (It's not, as the reparse stream always
1182 * goes in the extra stream entries in this case. See write_dentry_streams().)
1185 assign_stream_types_unencrypted(struct wim_inode *inode)
1187 bool found_reparse_stream = false;
1188 bool found_unnamed_data_stream = false;
1190 for (unsigned i = 0; i < inode->i_num_streams; i++) {
1191 struct wim_inode_stream *strm = &inode->i_streams[i];
1193 if (stream_is_named(strm)) {
1194 /* Named extra stream entry */
1195 strm->stream_type = STREAM_TYPE_DATA;
1196 } else if (i != 0 || !is_zero_hash(strm->_stream_hash)) {
1197 /* Unnamed extra stream entry or a nonzero main_hash */
1198 if ((inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) &&
1199 !found_reparse_stream) {
1200 found_reparse_stream = true;
1201 strm->stream_type = STREAM_TYPE_REPARSE_POINT;
1202 } else if (!found_unnamed_data_stream) {
1203 found_unnamed_data_stream = true;
1204 strm->stream_type = STREAM_TYPE_DATA;
1205 } /* Else, too many unnamed streams were found. */
1207 } /* Else, it's a zero main_hash. */
1210 /* If needed, use the zero main_hash. */
1211 if (!found_reparse_stream && !found_unnamed_data_stream) {
1212 inode->i_streams[0].stream_type =
1213 (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) ?
1214 STREAM_TYPE_REPARSE_POINT : STREAM_TYPE_DATA;
1219 * Read and interpret the collection of streams for the specified inode.
1222 setup_inode_streams(const u8 *p, const u8 *end, struct wim_inode *inode,
1223 unsigned num_extra_streams, const u8 *main_hash,
1226 const u8 *orig_p = p;
1228 inode->i_num_streams = 1 + num_extra_streams;
1230 if (unlikely(inode->i_num_streams > ARRAY_LEN(inode->i_embedded_streams))) {
1231 inode->i_streams = CALLOC(inode->i_num_streams,
1232 sizeof(inode->i_streams[0]));
1233 if (!inode->i_streams)
1234 return WIMLIB_ERR_NOMEM;
1237 /* Use main_hash for the first stream. */
1238 inode->i_streams[0].stream_name = (utf16lechar *)NO_STREAM_NAME;
1239 copy_hash(inode->i_streams[0]._stream_hash, main_hash);
1240 inode->i_streams[0].stream_type = STREAM_TYPE_UNKNOWN;
1241 inode->i_streams[0].stream_id = 0;
1243 /* Read the extra stream entries. */
1244 for (unsigned i = 1; i < inode->i_num_streams; i++) {
1245 struct wim_inode_stream *strm;
1246 const struct wim_extra_stream_entry_on_disk *disk_strm;
1250 strm = &inode->i_streams[i];
1252 strm->stream_id = i;
1254 /* Do we have at least the size of the fixed-length data we know
1256 if ((end - p) < sizeof(struct wim_extra_stream_entry_on_disk))
1257 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1259 disk_strm = (const struct wim_extra_stream_entry_on_disk *)p;
1261 /* Read the length field */
1262 length = ALIGN(le64_to_cpu(disk_strm->length), 8);
1264 /* Make sure the length field is neither so small it doesn't
1265 * include all the fixed-length data nor so large it overflows
1266 * the metadata resource buffer. */
1267 if (length < sizeof(struct wim_extra_stream_entry_on_disk) ||
1269 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1271 /* Read the rest of the fixed-length data. */
1273 copy_hash(strm->_stream_hash, disk_strm->hash);
1274 name_nbytes = le16_to_cpu(disk_strm->name_nbytes);
1276 /* If stream_name_nbytes != 0, the stream is named. */
1277 if (name_nbytes != 0) {
1278 /* The name is encoded in UTF16-LE, which uses 2-byte
1279 * coding units, so the length of the name had better be
1280 * an even number of bytes. */
1281 if (name_nbytes & 1)
1282 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1284 /* Add the length of the stream name to get the length
1285 * we actually need to read. Make sure this isn't more
1286 * than the specified length of the entry. */
1287 if (sizeof(struct wim_extra_stream_entry_on_disk) +
1288 name_nbytes > length)
1289 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1291 strm->stream_name = utf16le_dupz(disk_strm->name,
1293 if (!strm->stream_name)
1294 return WIMLIB_ERR_NOMEM;
1296 strm->stream_name = (utf16lechar *)NO_STREAM_NAME;
1299 strm->stream_type = STREAM_TYPE_UNKNOWN;
1304 inode->i_next_stream_id = inode->i_num_streams;
1306 /* Now, assign a type to each stream. Unfortunately this requires
1307 * various hacks because stream types aren't explicitly provided in the
1308 * WIM on-disk format. */
1310 if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED))
1311 assign_stream_types_encrypted(inode);
1313 assign_stream_types_unencrypted(inode);
1315 *offset_p += p - orig_p;
1319 /* Read a dentry, including all extra stream entries that follow it, from an
1320 * uncompressed metadata resource buffer. */
1322 read_dentry(const u8 * restrict buf, size_t buf_len,
1323 u64 *offset_p, struct wim_dentry **dentry_ret)
1325 u64 offset = *offset_p;
1328 const struct wim_dentry_on_disk *disk_dentry;
1329 struct wim_dentry *dentry;
1330 struct wim_inode *inode;
1331 u16 short_name_nbytes;
1333 u64 calculated_size;
1336 STATIC_ASSERT(sizeof(struct wim_dentry_on_disk) == WIM_DENTRY_DISK_SIZE);
1338 /* Before reading the whole dentry, we need to read just the length.
1339 * This is because a dentry of length 8 (that is, just the length field)
1340 * terminates the list of sibling directory entries. */
1342 /* Check for buffer overrun. */
1343 if (unlikely(offset + sizeof(u64) > buf_len ||
1344 offset + sizeof(u64) < offset))
1345 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1347 /* Get pointer to the dentry data. */
1349 disk_dentry = (const struct wim_dentry_on_disk*)p;
1351 /* Get dentry length. */
1352 length = ALIGN(le64_to_cpu(disk_dentry->length), 8);
1354 /* Check for end-of-directory. */
1360 /* Validate dentry length. */
1361 if (unlikely(length < sizeof(struct wim_dentry_on_disk)))
1362 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1364 /* Check for buffer overrun. */
1365 if (unlikely(offset + length > buf_len ||
1366 offset + length < offset))
1367 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1369 /* Allocate new dentry structure, along with a preliminary inode. */
1370 ret = new_dentry_with_new_inode(NULL, false, &dentry);
1374 inode = dentry->d_inode;
1376 /* Read more fields: some into the dentry, and some into the inode. */
1377 inode->i_attributes = le32_to_cpu(disk_dentry->attributes);
1378 inode->i_security_id = le32_to_cpu(disk_dentry->security_id);
1379 dentry->d_subdir_offset = le64_to_cpu(disk_dentry->subdir_offset);
1380 inode->i_creation_time = le64_to_cpu(disk_dentry->creation_time);
1381 inode->i_last_access_time = le64_to_cpu(disk_dentry->last_access_time);
1382 inode->i_last_write_time = le64_to_cpu(disk_dentry->last_write_time);
1383 inode->i_unknown_0x54 = le32_to_cpu(disk_dentry->unknown_0x54);
1385 if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
1386 inode->i_reparse_tag = le32_to_cpu(disk_dentry->reparse.reparse_tag);
1387 inode->i_rp_reserved = le16_to_cpu(disk_dentry->reparse.rp_reserved);
1388 inode->i_rp_flags = le16_to_cpu(disk_dentry->reparse.rp_flags);
1389 /* Leave inode->i_ino at 0. Note: this means that WIM cannot
1390 * represent multiple hard links to a reparse point file. */
1392 inode->i_ino = le64_to_cpu(disk_dentry->nonreparse.hard_link_group_id);
1395 /* Now onto reading the names. There are two of them: the (long) file
1396 * name, and the short name. */
1398 short_name_nbytes = le16_to_cpu(disk_dentry->short_name_nbytes);
1399 name_nbytes = le16_to_cpu(disk_dentry->name_nbytes);
1401 if (unlikely((short_name_nbytes & 1) | (name_nbytes & 1))) {
1402 ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1403 goto err_free_dentry;
1406 /* We now know the length of the file name and short name. Make sure
1407 * the length of the dentry is large enough to actually hold them. */
1408 calculated_size = dentry_min_len_with_names(name_nbytes,
1411 if (unlikely(length < calculated_size)) {
1412 ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1413 goto err_free_dentry;
1416 /* Advance p to point past the base dentry, to the first name. */
1417 p += sizeof(struct wim_dentry_on_disk);
1419 /* Read the filename if present. Note: if the filename is empty, there
1420 * is no null terminator following it. */
1422 dentry->d_name = utf16le_dupz(p, name_nbytes);
1423 if (unlikely(!dentry->d_name)) {
1424 ret = WIMLIB_ERR_NOMEM;
1425 goto err_free_dentry;
1427 dentry->d_name_nbytes = name_nbytes;
1428 p += (u32)name_nbytes + 2;
1431 /* Read the short filename if present. Note: if there is no short
1432 * filename, there is no null terminator following it. */
1433 if (short_name_nbytes) {
1434 dentry->d_short_name = utf16le_dupz(p, short_name_nbytes);
1435 if (unlikely(!dentry->d_short_name)) {
1436 ret = WIMLIB_ERR_NOMEM;
1437 goto err_free_dentry;
1439 dentry->d_short_name_nbytes = short_name_nbytes;
1440 p += (u32)short_name_nbytes + 2;
1443 /* Read extra data at end of dentry (but before extra stream entries).
1444 * This may contain tagged metadata items. */
1445 ret = read_extra_data(p, &buf[offset + length], inode);
1447 goto err_free_dentry;
1451 /* Set up the inode's collection of streams. */
1452 ret = setup_inode_streams(&buf[offset],
1455 le16_to_cpu(disk_dentry->num_extra_streams),
1456 disk_dentry->main_hash,
1459 goto err_free_dentry;
1461 *offset_p = offset; /* Sets offset of next dentry in directory */
1462 *dentry_ret = dentry;
1466 free_dentry(dentry);
1471 dentry_is_dot_or_dotdot(const struct wim_dentry *dentry)
1473 if (dentry->d_name_nbytes <= 4) {
1474 if (dentry->d_name_nbytes == 4) {
1475 if (dentry->d_name[0] == cpu_to_le16('.') &&
1476 dentry->d_name[1] == cpu_to_le16('.'))
1478 } else if (dentry->d_name_nbytes == 2) {
1479 if (dentry->d_name[0] == cpu_to_le16('.'))
1487 dentry_contains_embedded_null(const struct wim_dentry *dentry)
1489 for (unsigned i = 0; i < dentry->d_name_nbytes / 2; i++)
1490 if (dentry->d_name[i] == cpu_to_le16('\0'))
1496 should_ignore_dentry(struct wim_dentry *dir, const struct wim_dentry *dentry)
1498 /* All dentries except the root must be named. */
1499 if (!dentry_has_long_name(dentry)) {
1500 WARNING("Ignoring unnamed file in directory \"%"TS"\"",
1501 dentry_full_path(dir));
1505 /* Don't allow files named "." or "..". Such filenames could be used in
1506 * path traversal attacks. */
1507 if (dentry_is_dot_or_dotdot(dentry)) {
1508 WARNING("Ignoring file named \".\" or \"..\" in directory "
1509 "\"%"TS"\"", dentry_full_path(dir));
1513 /* Don't allow filenames containing embedded null characters. Although
1514 * the null character is already considered an unsupported character for
1515 * extraction by all targets, it is probably a good idea to just forbid
1516 * such names entirely. */
1517 if (dentry_contains_embedded_null(dentry)) {
1518 WARNING("Ignoring filename with embedded null character in "
1519 "directory \"%"TS"\"", dentry_full_path(dir));
1527 read_dentry_tree_recursive(const u8 * restrict buf, size_t buf_len,
1528 struct wim_dentry * restrict dir, unsigned depth)
1530 u64 cur_offset = dir->d_subdir_offset;
1532 /* Disallow extremely deep or cyclic directory structures */
1533 if (unlikely(depth >= 16384)) {
1534 ERROR("Directory structure too deep!");
1535 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1539 struct wim_dentry *child;
1540 struct wim_dentry *duplicate;
1543 /* Read next child of @dir. */
1544 ret = read_dentry(buf, buf_len, &cur_offset, &child);
1548 /* Check for end of directory. */
1552 /* Ignore dentries with bad names. */
1553 if (unlikely(should_ignore_dentry(dir, child))) {
1558 /* Link the child into the directory. */
1559 duplicate = dentry_add_child(dir, child);
1560 if (unlikely(duplicate)) {
1561 /* We already found a dentry with this same
1562 * case-sensitive long name. Only keep the first one.
1564 WARNING("Ignoring duplicate file \"%"TS"\" "
1565 "(the WIM image already contains a file "
1566 "at that path with the exact same name)",
1567 dentry_full_path(duplicate));
1572 /* If this child is a directory that itself has children, call
1573 * this procedure recursively. */
1574 if (child->d_subdir_offset != 0) {
1575 if (likely(dentry_is_directory(child))) {
1576 ret = read_dentry_tree_recursive(buf,
1583 WARNING("Ignoring children of "
1584 "non-directory file \"%"TS"\"",
1585 dentry_full_path(child));
1592 * Read a tree of dentries from a WIM metadata resource.
1595 * Buffer containing an uncompressed WIM metadata resource.
1598 * Length of the uncompressed metadata resource, in bytes.
1601 * Offset in the metadata resource of the root of the dentry tree.
1604 * On success, either NULL or a pointer to the root dentry is written to
1605 * this location. The former case only occurs in the unexpected case that
1606 * the tree began with an end-of-directory entry.
1609 * WIMLIB_ERR_SUCCESS (0)
1610 * WIMLIB_ERR_INVALID_METADATA_RESOURCE
1614 read_dentry_tree(const u8 *buf, size_t buf_len,
1615 u64 root_offset, struct wim_dentry **root_ret)
1618 struct wim_dentry *root;
1620 ret = read_dentry(buf, buf_len, &root_offset, &root);
1624 if (likely(root != NULL)) {
1625 if (unlikely(dentry_has_long_name(root) ||
1626 dentry_has_short_name(root)))
1628 WARNING("The root directory has a nonempty name; "
1630 dentry_set_name(root, NULL);
1633 if (unlikely(!dentry_is_directory(root))) {
1634 ERROR("The root of the WIM image is not a directory!");
1635 ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1636 goto err_free_dentry_tree;
1639 if (likely(root->d_subdir_offset != 0)) {
1640 ret = read_dentry_tree_recursive(buf, buf_len, root, 0);
1642 goto err_free_dentry_tree;
1645 WARNING("The metadata resource has no directory entries; "
1646 "treating as an empty image.");
1651 err_free_dentry_tree:
1652 free_dentry_tree(root, NULL);
1657 write_extra_stream_entry(u8 * restrict p, const utf16lechar * restrict name,
1658 const u8 * restrict hash)
1660 struct wim_extra_stream_entry_on_disk *disk_strm =
1661 (struct wim_extra_stream_entry_on_disk *)p;
1665 if (name == NO_STREAM_NAME)
1668 name_nbytes = utf16le_len_bytes(name);
1670 disk_strm->reserved = 0;
1671 copy_hash(disk_strm->hash, hash);
1672 disk_strm->name_nbytes = cpu_to_le16(name_nbytes);
1673 p += sizeof(struct wim_extra_stream_entry_on_disk);
1674 if (name_nbytes != 0)
1675 p = mempcpy(p, name, name_nbytes + 2);
1676 /* Align to 8-byte boundary */
1677 while ((uintptr_t)p & 7)
1679 disk_strm->length = cpu_to_le64(p - orig_p);
1684 * Write the stream references for a WIM dentry. To be compatible with DISM, we
1685 * follow the below rules:
1687 * 1. If the file has FILE_ATTRIBUTE_ENCRYPTED, then only the EFSRPC_RAW_DATA
1688 * stream is stored. Otherwise, the streams that are stored are:
1689 * - Reparse stream if the file has FILE_ATTRIBUTE_REPARSE_POINT
1690 * - Unnamed data stream if the file doesn't have FILE_ATTRIBUTE_DIRECTORY
1691 * - Named data streams
1693 * 2. If only one stream is being stored and it is the EFSRPC_RAW_DATA, unnamed
1694 * data, or reparse stream, then its hash goes in main_hash, and no extra
1695 * stream entries are stored. Otherwise, *all* streams go in the extra
1696 * stream entries, and main_hash is left zeroed!
1698 * 3. If both the reparse stream and unnamed data stream are being stored, then
1699 * the reparse stream comes first.
1701 * 4. The unnamed stream(s) come before the named stream(s). (Actually, DISM
1702 * puts the named streams between the first and second unnamed streams, but
1703 * this is incompatible with itself... Tested with DISM 10.0.20348.681.)
1705 * wimlib v1.14.1 and earlier behaved slightly differently for directories.
1706 * First, wimlib always put the hash of the reparse stream in an extra stream
1707 * entry, never in main_hash. This difference vs. DISM went unnoticed for a
1708 * long time, but eventually it was found that it broke the Windows 8 setup
1709 * wizard. Second, when a directory had any extra streams, wimlib created an
1710 * extra stream entry to represent the (empty) unnamed data stream. However,
1711 * DISM now rejects that (though I think it used to accept it). There isn't
1712 * really any such thing as "unnamed data stream" for a directory.
1714 * Keep this in sync with dentry_out_total_length()!
1717 write_dentry_streams(const struct wim_inode *inode,
1718 struct wim_dentry_on_disk *disk_dentry, u8 *p)
1720 const u8 *unnamed_data_stream_hash = zero_hash;
1721 const u8 *reparse_stream_hash = zero_hash;
1722 const u8 *efsrpc_stream_hash = zero_hash;
1723 const u8 *unnamed_stream_hashes[2] = { zero_hash };
1724 unsigned num_unnamed_streams = 0;
1725 unsigned num_named_streams = 0;
1727 for (unsigned i = 0; i < inode->i_num_streams; i++) {
1728 const struct wim_inode_stream *strm = &inode->i_streams[i];
1730 switch (strm->stream_type) {
1731 case STREAM_TYPE_DATA:
1732 if (stream_is_named(strm))
1733 num_named_streams++;
1735 unnamed_data_stream_hash = stream_hash(strm);
1737 case STREAM_TYPE_REPARSE_POINT:
1738 reparse_stream_hash = stream_hash(strm);
1740 case STREAM_TYPE_EFSRPC_RAW_DATA:
1741 efsrpc_stream_hash = stream_hash(strm);
1746 if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) {
1747 unnamed_stream_hashes[num_unnamed_streams++] = efsrpc_stream_hash;
1748 num_named_streams = 0;
1750 if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT)
1751 unnamed_stream_hashes[num_unnamed_streams++] = reparse_stream_hash;
1752 if (!(inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY))
1753 unnamed_stream_hashes[num_unnamed_streams++] = unnamed_data_stream_hash;
1756 if (num_unnamed_streams <= 1 && num_named_streams == 0) {
1757 /* No extra stream entries are needed. */
1758 copy_hash(disk_dentry->main_hash, unnamed_stream_hashes[0]);
1759 disk_dentry->num_extra_streams = 0;
1763 /* Else, all streams go in extra stream entries. */
1764 copy_hash(disk_dentry->main_hash, zero_hash);
1765 wimlib_assert(num_unnamed_streams + num_named_streams <= 0xFFFF);
1766 disk_dentry->num_extra_streams = cpu_to_le16(num_unnamed_streams +
1768 for (unsigned i = 0; i < num_unnamed_streams; i++)
1769 p = write_extra_stream_entry(p, NO_STREAM_NAME,
1770 unnamed_stream_hashes[i]);
1771 for (unsigned i = 0; i < inode->i_num_streams; i++) {
1772 const struct wim_inode_stream *strm = &inode->i_streams[i];
1774 if (stream_is_named_data_stream(strm)) {
1775 p = write_extra_stream_entry(p, strm->stream_name,
1783 * Write a WIM dentry to an output buffer.
1785 * This includes any extra stream entries that may follow the dentry itself.
1788 * The dentry to write.
1791 * The memory location to which to write the data.
1793 * Returns a pointer to the byte following the last written.
1796 write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p)
1798 const struct wim_inode *inode;
1799 struct wim_dentry_on_disk *disk_dentry;
1802 wimlib_assert(((uintptr_t)p & 7) == 0); /* 8 byte aligned */
1805 inode = dentry->d_inode;
1806 disk_dentry = (struct wim_dentry_on_disk*)p;
1808 disk_dentry->attributes = cpu_to_le32(inode->i_attributes);
1809 disk_dentry->security_id = cpu_to_le32(inode->i_security_id);
1810 disk_dentry->subdir_offset = cpu_to_le64(dentry->d_subdir_offset);
1812 disk_dentry->unused_1 = cpu_to_le64(0);
1813 disk_dentry->unused_2 = cpu_to_le64(0);
1815 disk_dentry->creation_time = cpu_to_le64(inode->i_creation_time);
1816 disk_dentry->last_access_time = cpu_to_le64(inode->i_last_access_time);
1817 disk_dentry->last_write_time = cpu_to_le64(inode->i_last_write_time);
1818 disk_dentry->unknown_0x54 = cpu_to_le32(inode->i_unknown_0x54);
1819 if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
1820 disk_dentry->reparse.reparse_tag = cpu_to_le32(inode->i_reparse_tag);
1821 disk_dentry->reparse.rp_reserved = cpu_to_le16(inode->i_rp_reserved);
1822 disk_dentry->reparse.rp_flags = cpu_to_le16(inode->i_rp_flags);
1824 disk_dentry->nonreparse.hard_link_group_id =
1825 cpu_to_le64((inode->i_nlink == 1) ? 0 : inode->i_ino);
1828 disk_dentry->short_name_nbytes = cpu_to_le16(dentry->d_short_name_nbytes);
1829 disk_dentry->name_nbytes = cpu_to_le16(dentry->d_name_nbytes);
1830 p += sizeof(struct wim_dentry_on_disk);
1832 wimlib_assert(dentry_is_root(dentry) != dentry_has_long_name(dentry));
1834 if (dentry_has_long_name(dentry))
1835 p = mempcpy(p, dentry->d_name, (u32)dentry->d_name_nbytes + 2);
1837 if (dentry_has_short_name(dentry))
1838 p = mempcpy(p, dentry->d_short_name, (u32)dentry->d_short_name_nbytes + 2);
1840 /* Align to 8-byte boundary */
1841 while ((uintptr_t)p & 7)
1844 if (inode->i_extra) {
1845 /* Extra tagged items --- not usually present. */
1846 p = mempcpy(p, inode->i_extra->data, inode->i_extra->size);
1848 /* Align to 8-byte boundary */
1849 while ((uintptr_t)p & 7)
1853 disk_dentry->length = cpu_to_le64(p - orig_p);
1856 * Set disk_dentry->main_hash and disk_dentry->num_extra_streams,
1857 * and write any extra stream entries that are needed.
1859 return write_dentry_streams(inode, disk_dentry, p);
1863 write_dir_dentries(struct wim_dentry *dir, void *_pp)
1865 if (dir->d_subdir_offset != 0) {
1868 struct wim_dentry *child;
1870 /* write child dentries */
1871 for_dentry_child(child, dir)
1872 p = write_dentry(child, p);
1874 /* write end of directory entry */
1883 * Write a directory tree to the metadata resource.
1886 * The root of a dentry tree on which calculate_subdir_offsets() has been
1887 * called. This cannot be NULL; if the dentry tree is empty, the caller is
1888 * expected to first generate a dummy root directory.
1891 * Pointer to a buffer with enough space for the dentry tree. This size
1892 * must have been obtained by calculate_subdir_offsets().
1894 * Returns a pointer to the byte following the last written.
1897 write_dentry_tree(struct wim_dentry *root, u8 *p)
1899 /* write root dentry and end-of-directory entry following it */
1900 p = write_dentry(root, p);
1904 /* write the rest of the dentry tree */
1905 for_dentry_in_tree(root, write_dir_dentries, &p);