2 * dentry.c - see description below
6 * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
8 * This file is free software; you can redistribute it and/or modify it under
9 * the terms of the GNU Lesser General Public License as published by the Free
10 * Software Foundation; either version 3 of the License, or (at your option) any
13 * This file is distributed in the hope that it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
15 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public License
19 * along with this file; if not, see http://www.gnu.org/licenses/.
23 * This file contains logic to deal with WIM directory entries, or "dentries":
25 * - Reading a dentry tree from a metadata resource in a WIM file
26 * - Writing a dentry tree to a metadata resource in a WIM file
27 * - Iterating through a tree of WIM dentries
28 * - Path lookup: translating a path into a WIM dentry or inode
29 * - Creating, modifying, and deleting WIM dentries
33 * - A WIM file can contain multiple images, each of which has an independent
34 * tree of dentries. "On disk", the dentry tree for an image is stored in
35 * the "metadata resource" for that image.
37 * - Multiple dentries in an image may correspond to the same inode, or "file".
38 * When this occurs, it means that the file has multiple names, or "hard
39 * links". A dentry is not a file, but rather the name of a file!
41 * - Inodes are not represented explicitly in the WIM file format. Instead,
42 * the metadata resource provides a "hard link group ID" for each dentry.
43 * wimlib handles pulling out actual inodes from this information, but this
44 * occurs in inode_fixup.c and not in this file.
46 * - wimlib does not allow *directory* hard links, so a WIM image really does
47 * have a *tree* of dentries (and not an arbitrary graph of dentries).
49 * - wimlib indexes dentries both case-insensitively and case-sensitively,
50 * allowing either behavior to be used for path lookup.
52 * - Multiple dentries in a directory might have the same case-insensitive
53 * name. But wimlib enforces that at most one dentry in a directory can have
54 * a given case-sensitive name.
63 #include "wimlib/assert.h"
64 #include "wimlib/dentry.h"
65 #include "wimlib/inode.h"
66 #include "wimlib/encoding.h"
67 #include "wimlib/endianness.h"
68 #include "wimlib/metadata.h"
69 #include "wimlib/paths.h"
71 /* On-disk format of a WIM dentry (directory entry), located in the metadata
72 * resource for a WIM image. */
73 struct wim_dentry_on_disk {
75 /* Length of this directory entry in bytes, not including any extra
76 * stream entries. Should be a multiple of 8 so that the following
77 * dentry or extra stream entry is aligned on an 8-byte boundary. (If
78 * not, wimlib will round it up.) It must be at least as long as the
79 * fixed-length fields of the dentry (WIM_DENTRY_DISK_SIZE), plus the
80 * lengths of the file name and/or short name if present, plus the size
81 * of any "extra" data.
83 * It is also possible for this field to be 0. This case indicates the
84 * end of a list of sibling entries in a directory. It also means the
85 * real length is 8, because the dentry included only the length field,
86 * but that takes up 8 bytes. */
89 /* File attributes for the file or directory. This is a bitwise OR of
90 * the FILE_ATTRIBUTE_* constants and should correspond to the value
91 * retrieved by GetFileAttributes() on Windows. */
94 /* A value that specifies the security descriptor for this file or
95 * directory. If -1, the file or directory has no security descriptor.
96 * Otherwise, it is a 0-based index into the WIM image's table of
97 * security descriptors (see: `struct wim_security_data') */
100 /* Offset, in bytes, from the start of the uncompressed metadata
101 * resource of this directory's child directory entries, or 0 if this
102 * directory entry does not correspond to a directory or otherwise does
103 * not have any children. */
106 /* Reserved fields */
110 /* Creation time, last access time, and last write time, in
111 * 100-nanosecond intervals since 12:00 a.m UTC January 1, 1601. They
112 * should correspond to the times gotten by calling GetFileTime() on
115 le64 last_access_time;
116 le64 last_write_time;
119 * Usually this is the SHA-1 message digest of the file's "contents"
120 * (the unnamed data stream).
122 * If the file has FILE_ATTRIBUTE_REPARSE_POINT set, then this is
123 * instead usually the SHA-1 message digest of the uncompressed reparse
126 * However, there are some special rules that need to be applied to
127 * interpret this field correctly when extra stream entries are present.
128 * See the code for details.
130 u8 default_hash[SHA1_HASH_SIZE];
132 /* The format of the following data is not yet completely known and they
133 * do not correspond to Microsoft's documentation.
135 * If this directory entry is for a reparse point (has
136 * FILE_ATTRIBUTE_REPARSE_POINT set in the 'attributes' field), then the
137 * version of the following fields containing the reparse tag is valid.
138 * Furthermore, the field notated as not_rpfixed, as far as I can tell,
139 * is supposed to be set to 1 if reparse point fixups (a.k.a. fixing the
140 * targets of absolute symbolic links) were *not* done, and otherwise 0.
142 * If this directory entry is not for a reparse point, then the version
143 * of the following fields containing the hard_link_group_id is valid.
144 * All MS says about this field is that "If this file is part of a hard
145 * link set, all the directory entries in the set will share the same
146 * value in this field.". However, more specifically I have observed
148 * - If the file is part of a hard link set of size 1, then the
149 * hard_link_group_id should be set to either 0, which is treated
150 * specially as indicating "not hardlinked", or any unique value.
151 * - The specific nonzero values used to identity hard link sets do
152 * not matter, as long as they are unique.
153 * - However, due to bugs in Microsoft's software, it is actually NOT
154 * guaranteed that directory entries that share the same hard link
155 * group ID are actually hard linked to each either. See
156 * inode_fixup.c for the code that handles this.
164 } _packed_attribute reparse;
167 le64 hard_link_group_id;
168 } _packed_attribute nonreparse;
171 /* Number of extra stream entries that directly follow this dentry
173 le16 num_extra_streams;
175 /* If nonzero, this is the length, in bytes, of this dentry's UTF-16LE
176 * encoded short name (8.3 DOS-compatible name), excluding the null
177 * terminator. If zero, then the long name of this dentry does not have
178 * a corresponding short name (but this does not exclude the possibility
179 * that another dentry for the same file has a short name). */
180 le16 short_name_nbytes;
182 /* If nonzero, this is the length, in bytes, of this dentry's UTF-16LE
183 * encoded "long" name, excluding the null terminator. If zero, then
184 * this file has no long name. The root dentry should not have a long
185 * name, but all other dentries in the image should have long names. */
186 le16 file_name_nbytes;
188 /* Beginning of optional, variable-length fields */
190 /* If file_name_nbytes != 0, the next field will be the UTF-16LE encoded
191 * long file name. This will be null-terminated, so the size of this
192 * field will really be file_name_nbytes + 2. */
193 /*utf16lechar file_name[];*/
195 /* If short_name_nbytes != 0, the next field will be the UTF-16LE
196 * encoded short name. This will be null-terminated, so the size of
197 * this field will really be short_name_nbytes + 2. */
198 /*utf16lechar short_name[];*/
200 /* If there is still space in the dentry (according to the 'length'
201 * field) after 8-byte alignment, then the remaining space will be a
202 * variable-length list of tagged metadata items. See tagged_items.c
203 * for more information. */
204 /* u8 tagged_items[] _aligned_attribute(8); */
207 /* If num_extra_streams != 0, then there are that many extra stream
208 * entries following the dentry, starting on the next 8-byte aligned
209 * boundary. They are not counted in the 'length' field of the dentry.
212 /* On-disk format of an extra stream entry. This represents an extra NTFS-style
213 * "stream" associated with the file, such as a named data stream. */
214 struct wim_extra_stream_entry_on_disk {
216 /* Length of this extra stream entry, in bytes. This includes all
217 * fixed-length fields, plus the name and null terminator if present,
218 * and any needed padding such that the length is a multiple of 8. */
224 /* SHA-1 message digest of this stream's uncompressed data, or all
225 * zeroes if this stream's data is of zero length. */
226 u8 hash[SHA1_HASH_SIZE];
228 /* Length of this stream's name, in bytes and excluding the null
229 * terminator; or 0 if this stream is unnamed. */
232 /* Stream name in UTF-16LE. It is @name_nbytes bytes long, excluding
233 * the null terminator. There is a null terminator character if
234 * @name_nbytes != 0; i.e., if this stream is named. */
239 do_dentry_set_name(struct wim_dentry *dentry, utf16lechar *file_name,
240 size_t file_name_nbytes)
242 FREE(dentry->file_name);
243 dentry->file_name = file_name;
244 dentry->file_name_nbytes = file_name_nbytes;
246 if (dentry_has_short_name(dentry)) {
247 FREE(dentry->short_name);
248 dentry->short_name = NULL;
249 dentry->short_name_nbytes = 0;
254 * Set the name of a WIM dentry from a UTF-16LE string.
256 * This sets the long name of the dentry. The short name will automatically be
257 * removed, since it may not be appropriate for the new long name.
259 * The @name string need not be null-terminated, since its length is specified
262 * If @name_nbytes is 0, both the long and short names of the dentry will be
265 * Only use this function on unlinked dentries, since it doesn't update the name
266 * indices. For dentries that are currently linked into the tree, use
269 * Returns 0 or WIMLIB_ERR_NOMEM.
272 dentry_set_name_utf16le(struct wim_dentry *dentry, const utf16lechar *name,
275 utf16lechar *dup = NULL;
278 dup = utf16le_dupz(name, name_nbytes);
280 return WIMLIB_ERR_NOMEM;
282 do_dentry_set_name(dentry, dup, name_nbytes);
288 * Set the name of a WIM dentry from a 'tchar' string.
290 * This sets the long name of the dentry. The short name will automatically be
291 * removed, since it may not be appropriate for the new long name.
293 * If @name is NULL or empty, both the long and short names of the dentry will
296 * Only use this function on unlinked dentries, since it doesn't update the name
297 * indices. For dentries that are currently linked into the tree, use
300 * Returns 0 or an error code resulting from a failed string conversion.
303 dentry_set_name(struct wim_dentry *dentry, const tchar *name)
305 utf16lechar *name_utf16le = NULL;
306 size_t name_utf16le_nbytes = 0;
310 ret = tstr_to_utf16le(name, tstrlen(name) * sizeof(tchar),
311 &name_utf16le, &name_utf16le_nbytes);
316 do_dentry_set_name(dentry, name_utf16le, name_utf16le_nbytes);
320 /* Calculate the minimum unaligned length, in bytes, of an on-disk WIM dentry
321 * that has names of the specified lengths. (Zero length means the
322 * corresponding name actually does not exist.) The returned value excludes
323 * tagged metadata items as well as any extra stream entries that may need to
324 * follow the dentry. */
326 dentry_min_len_with_names(u16 file_name_nbytes, u16 short_name_nbytes)
328 size_t length = sizeof(struct wim_dentry_on_disk);
329 if (file_name_nbytes)
330 length += (u32)file_name_nbytes + 2;
331 if (short_name_nbytes)
332 length += (u32)short_name_nbytes + 2;
337 /* Return the length, in bytes, required for the specified stream on-disk, when
338 * represented as an extra stream entry. */
340 stream_out_total_length(const struct wim_inode_stream *strm)
342 /* Account for the fixed length portion */
343 size_t len = sizeof(struct wim_extra_stream_entry_on_disk);
345 /* For named streams, account for the variable-length name. */
346 if (stream_is_named(strm))
347 len += utf16le_len_bytes(strm->stream_name) + 2;
349 /* Account for any necessary padding to the next 8-byte boundary. */
350 return (len + 7) & ~7;
354 * Calculate the total number of bytes that will be consumed when a dentry is
355 * written. This includes the fixed-length portion of the dentry, the name
356 * fields, any tagged metadata items, and any extra stream entries. This also
357 * includes all alignment bytes.
360 dentry_out_total_length(const struct wim_dentry *dentry)
362 const struct wim_inode *inode = dentry->d_inode;
365 len = dentry_min_len_with_names(dentry->file_name_nbytes,
366 dentry->short_name_nbytes);
367 len = (len + 7) & ~7;
369 if (inode->i_extra_size) {
370 len += inode->i_extra_size;
371 len = (len + 7) & ~7;
374 if (!(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) {
376 * Extra stream entries:
378 * - Use one extra stream entry for each named data stream
379 * - Use one extra stream entry for the unnamed data stream when there is either:
380 * - a reparse point stream
381 * - at least one named data stream (for Windows PE bug workaround)
382 * - Use one extra stream entry for the reparse point stream if there is one
384 bool have_named_data_stream = false;
385 bool have_reparse_point_stream = false;
386 for (unsigned i = 0; i < inode->i_num_streams; i++) {
387 const struct wim_inode_stream *strm = &inode->i_streams[i];
388 if (stream_is_named_data_stream(strm)) {
389 len += stream_out_total_length(strm);
390 have_named_data_stream = true;
391 } else if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) {
392 wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT);
393 have_reparse_point_stream = true;
397 if (have_named_data_stream || have_reparse_point_stream) {
398 if (have_reparse_point_stream)
399 len += (sizeof(struct wim_extra_stream_entry_on_disk) + 7) & ~7;
400 len += (sizeof(struct wim_extra_stream_entry_on_disk) + 7) & ~7;
407 /* Internal version of for_dentry_in_tree() that omits the NULL check */
409 do_for_dentry_in_tree(struct wim_dentry *dentry,
410 int (*visitor)(struct wim_dentry *, void *), void *arg)
413 struct wim_dentry *child;
415 ret = (*visitor)(dentry, arg);
419 for_dentry_child(child, dentry) {
420 ret = do_for_dentry_in_tree(child, visitor, arg);
427 /* Internal version of for_dentry_in_tree_depth() that omits the NULL check */
429 do_for_dentry_in_tree_depth(struct wim_dentry *dentry,
430 int (*visitor)(struct wim_dentry *, void *), void *arg)
433 struct wim_dentry *child;
435 for_dentry_child_postorder(child, dentry) {
436 ret = do_for_dentry_in_tree_depth(child, visitor, arg);
440 return unlikely((*visitor)(dentry, arg));
444 * Call a function on all dentries in a tree.
446 * @arg will be passed as the second argument to each invocation of @visitor.
448 * This function does a pre-order traversal --- that is, a parent will be
449 * visited before its children. It also will visit siblings in order of
450 * case-sensitive filename. Equivalently, this function visits the entire tree
451 * in the case-sensitive lexicographic order of the full paths.
453 * It is safe to pass NULL for @root, which means that the dentry tree is empty.
454 * In this case, this function does nothing.
456 * @visitor must not modify the structure of the dentry tree during the
459 * The return value will be 0 if all calls to @visitor returned 0. Otherwise,
460 * the return value will be the first nonzero value returned by @visitor.
463 for_dentry_in_tree(struct wim_dentry *root,
464 int (*visitor)(struct wim_dentry *, void *), void *arg)
468 return do_for_dentry_in_tree(root, visitor, arg);
471 /* Like for_dentry_in_tree(), but do a depth-first traversal of the dentry tree.
472 * That is, the visitor function will be called on a dentry's children before
473 * itself. It will be safe to free a dentry when visiting it. */
475 for_dentry_in_tree_depth(struct wim_dentry *root,
476 int (*visitor)(struct wim_dentry *, void *), void *arg)
480 return do_for_dentry_in_tree_depth(root, visitor, arg);
484 * Calculate the full path to @dentry within the WIM image, if not already done.
486 * The full name will be saved in the cached value 'dentry->_full_path'.
488 * Whenever possible, use dentry_full_path() instead of calling this and
489 * accessing _full_path directly.
491 * Returns 0 or an error code resulting from a failed string conversion.
494 calculate_dentry_full_path(struct wim_dentry *dentry)
498 const struct wim_dentry *d;
500 if (dentry->_full_path)
506 ulen += d->file_name_nbytes / sizeof(utf16lechar);
508 d = d->d_parent; /* assumes d == d->d_parent for root */
509 } while (!dentry_is_root(d));
511 utf16lechar ubuf[ulen];
512 utf16lechar *p = &ubuf[ulen];
516 p -= d->file_name_nbytes / sizeof(utf16lechar);
517 memcpy(p, d->file_name, d->file_name_nbytes);
518 *--p = cpu_to_le16(WIM_PATH_SEPARATOR);
519 d = d->d_parent; /* assumes d == d->d_parent for root */
520 } while (!dentry_is_root(d));
522 wimlib_assert(p == ubuf);
524 return utf16le_to_tstr(ubuf, ulen * sizeof(utf16lechar),
525 &dentry->_full_path, &dummy);
529 * Return the full path to the @dentry within the WIM image, or NULL if the full
530 * path could not be determined due to a string conversion error.
532 * The returned memory will be cached in the dentry, so the caller is not
533 * responsible for freeing it.
536 dentry_full_path(struct wim_dentry *dentry)
538 calculate_dentry_full_path(dentry);
539 return dentry->_full_path;
543 dentry_calculate_subdir_offset(struct wim_dentry *dentry, void *_subdir_offset_p)
545 if (dentry_is_directory(dentry)) {
546 u64 *subdir_offset_p = _subdir_offset_p;
547 struct wim_dentry *child;
549 /* Set offset of directory's child dentries */
550 dentry->subdir_offset = *subdir_offset_p;
552 /* Account for child dentries */
553 for_dentry_child(child, dentry)
554 *subdir_offset_p += dentry_out_total_length(child);
556 /* Account for end-of-directory entry */
557 *subdir_offset_p += 8;
559 /* Not a directory; set subdir_offset to 0 */
560 dentry->subdir_offset = 0;
566 * Calculate the subdir offsets for a dentry tree, in preparation of writing
567 * that dentry tree to a metadata resource.
569 * The subdir offset of each dentry is the offset in the uncompressed metadata
570 * resource at which its child dentries begin, or 0 if that dentry has no
573 * The caller must initialize *subdir_offset_p to the first subdir offset that
574 * is available to use after the root dentry is written.
576 * When this function returns, *subdir_offset_p will have been advanced past the
577 * size needed for the dentry tree within the uncompressed metadata resource.
580 calculate_subdir_offsets(struct wim_dentry *root, u64 *subdir_offset_p)
582 for_dentry_in_tree(root, dentry_calculate_subdir_offset, subdir_offset_p);
585 /* Compare the UTF-16LE long filenames of two dentries case insensitively. */
587 dentry_compare_names_case_insensitive(const struct wim_dentry *d1,
588 const struct wim_dentry *d2)
590 return cmp_utf16le_strings(d1->file_name,
591 d1->file_name_nbytes / 2,
593 d2->file_name_nbytes / 2,
597 /* Compare the UTF-16LE long filenames of two dentries case sensitively. */
599 dentry_compare_names_case_sensitive(const struct wim_dentry *d1,
600 const struct wim_dentry *d2)
602 return cmp_utf16le_strings(d1->file_name,
603 d1->file_name_nbytes / 2,
605 d2->file_name_nbytes / 2,
610 _avl_dentry_compare_names_ci(const struct avl_tree_node *n1,
611 const struct avl_tree_node *n2)
613 const struct wim_dentry *d1, *d2;
615 d1 = avl_tree_entry(n1, struct wim_dentry, d_index_node_ci);
616 d2 = avl_tree_entry(n2, struct wim_dentry, d_index_node_ci);
617 return dentry_compare_names_case_insensitive(d1, d2);
621 _avl_dentry_compare_names(const struct avl_tree_node *n1,
622 const struct avl_tree_node *n2)
624 const struct wim_dentry *d1, *d2;
626 d1 = avl_tree_entry(n1, struct wim_dentry, d_index_node);
627 d2 = avl_tree_entry(n2, struct wim_dentry, d_index_node);
628 return dentry_compare_names_case_sensitive(d1, d2);
631 /* Default case sensitivity behavior for searches with
632 * WIMLIB_CASE_PLATFORM_DEFAULT specified. This can be modified by passing
633 * WIMLIB_INIT_FLAG_DEFAULT_CASE_SENSITIVE or
634 * WIMLIB_INIT_FLAG_DEFAULT_CASE_INSENSITIVE to wimlib_global_init(). */
635 bool default_ignore_case =
643 /* Case-sensitive dentry lookup. Only @file_name and @file_name_nbytes of
644 * @dummy must be valid. */
645 static struct wim_dentry *
646 dir_lookup(const struct wim_inode *dir, const struct wim_dentry *dummy)
648 struct avl_tree_node *node;
650 node = avl_tree_lookup_node(dir->i_children,
651 &dummy->d_index_node,
652 _avl_dentry_compare_names);
655 return avl_tree_entry(node, struct wim_dentry, d_index_node);
658 /* Case-insensitive dentry lookup. Only @file_name and @file_name_nbytes of
659 * @dummy must be valid. */
660 static struct wim_dentry *
661 dir_lookup_ci(const struct wim_inode *dir, const struct wim_dentry *dummy)
663 struct avl_tree_node *node;
665 node = avl_tree_lookup_node(dir->i_children_ci,
666 &dummy->d_index_node_ci,
667 _avl_dentry_compare_names_ci);
670 return avl_tree_entry(node, struct wim_dentry, d_index_node_ci);
673 /* Given a UTF-16LE filename and a directory, look up the dentry for the file.
674 * Return it if found, otherwise NULL. This has configurable case sensitivity,
675 * and @name need not be null-terminated. */
677 get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry,
678 const utf16lechar *name,
680 CASE_SENSITIVITY_TYPE case_ctype)
682 const struct wim_inode *dir = dentry->d_inode;
683 bool ignore_case = will_ignore_case(case_ctype);
684 struct wim_dentry dummy;
685 struct wim_dentry *child;
687 dummy.file_name = (utf16lechar*)name;
688 dummy.file_name_nbytes = name_nbytes;
691 /* Case-sensitive lookup. */
692 return dir_lookup(dir, &dummy);
694 /* Case-insensitive lookup. */
696 child = dir_lookup_ci(dir, &dummy);
700 if (likely(list_empty(&child->d_ci_conflict_list)))
701 /* Only one dentry has this case-insensitive name; return it */
704 /* Multiple dentries have the same case-insensitive name. Choose the
705 * dentry with the same case-sensitive name, if one exists; otherwise
706 * print a warning and choose one of the possible dentries arbitrarily.
708 struct wim_dentry *alt = child;
713 if (!dentry_compare_names_case_sensitive(&dummy, alt))
715 alt = list_entry(alt->d_ci_conflict_list.next,
716 struct wim_dentry, d_ci_conflict_list);
717 } while (alt != child);
719 WARNING("Result of case-insensitive lookup is ambiguous\n"
720 " (returning \"%"TS"\" of %zu "
721 "possible files, including \"%"TS"\")",
722 dentry_full_path(child),
724 dentry_full_path(list_entry(child->d_ci_conflict_list.next,
726 d_ci_conflict_list)));
730 /* Given a 'tchar' filename and a directory, look up the dentry for the file.
731 * If the filename was successfully converted to UTF-16LE and the dentry was
732 * found, return it; otherwise return NULL. This has configurable case
735 get_dentry_child_with_name(const struct wim_dentry *dentry, const tchar *name,
736 CASE_SENSITIVITY_TYPE case_type)
739 const utf16lechar *name_utf16le;
740 size_t name_utf16le_nbytes;
741 struct wim_dentry *child;
743 ret = tstr_get_utf16le_and_len(name, &name_utf16le,
744 &name_utf16le_nbytes);
748 child = get_dentry_child_with_utf16le_name(dentry,
752 tstr_put_utf16le(name_utf16le);
756 /* This is the UTF-16LE version of get_dentry(), currently private to this file
757 * because no one needs it besides get_dentry(). */
758 static struct wim_dentry *
759 get_dentry_utf16le(WIMStruct *wim, const utf16lechar *path,
760 CASE_SENSITIVITY_TYPE case_type)
762 struct wim_dentry *cur_dentry;
763 const utf16lechar *name_start, *name_end;
765 /* Start with the root directory of the image. Note: this will be NULL
766 * if an image has been added directly with wimlib_add_empty_image() but
767 * no files have been added yet; in that case we fail with ENOENT. */
768 cur_dentry = wim_get_current_root_dentry(wim);
772 if (cur_dentry == NULL) {
777 if (*name_start && !dentry_is_directory(cur_dentry)) {
782 while (*name_start == cpu_to_le16(WIM_PATH_SEPARATOR))
788 name_end = name_start;
791 } while (*name_end != cpu_to_le16(WIM_PATH_SEPARATOR) && *name_end);
793 cur_dentry = get_dentry_child_with_utf16le_name(cur_dentry,
795 (u8*)name_end - (u8*)name_start,
797 name_start = name_end;
802 * WIM path lookup: translate a path in the currently selected WIM image to the
803 * corresponding dentry, if it exists.
806 * The WIMStruct for the WIM. The search takes place in the currently
810 * The path to look up, given relative to the root of the WIM image.
811 * Characters with value WIM_PATH_SEPARATOR are taken to be path
812 * separators. Leading path separators are ignored, whereas one or more
813 * trailing path separators cause the path to only match a directory.
816 * The case-sensitivity behavior of this function, as one of the following
819 * - WIMLIB_CASE_SENSITIVE: Perform the search case sensitively. This means
820 * that names must match exactly.
822 * - WIMLIB_CASE_INSENSITIVE: Perform the search case insensitively. This
823 * means that names are considered to match if they are equal when
824 * transformed to upper case. If a path component matches multiple names
825 * case-insensitively, the name that matches the path component
826 * case-sensitively is chosen, if existent; otherwise one
827 * case-insensitively matching name is chosen arbitrarily.
829 * - WIMLIB_CASE_PLATFORM_DEFAULT: Perform either case-sensitive or
830 * case-insensitive search, depending on the value of the global variable
831 * default_ignore_case.
833 * In any case, no Unicode normalization is done before comparing strings.
835 * Returns a pointer to the dentry that is the result of the lookup, or NULL if
836 * no such dentry exists. If NULL is returned, errno is set to one of the
839 * ENOTDIR if one of the path components used as a directory existed but
840 * was not, in fact, a directory.
846 * - This function does not consider a reparse point to be a directory, even
847 * if it has FILE_ATTRIBUTE_DIRECTORY set.
849 * - This function does not dereference symbolic links or junction points
850 * when performing the search.
852 * - Since this function ignores leading slashes, the empty path is valid and
853 * names the root directory of the WIM image.
855 * - An image added with wimlib_add_empty_image() does not have a root
856 * directory yet, and this function will fail with ENOENT for any path on
860 get_dentry(WIMStruct *wim, const tchar *path, CASE_SENSITIVITY_TYPE case_type)
863 const utf16lechar *path_utf16le;
864 struct wim_dentry *dentry;
866 ret = tstr_get_utf16le(path, &path_utf16le);
869 dentry = get_dentry_utf16le(wim, path_utf16le, case_type);
870 tstr_put_utf16le(path_utf16le);
874 /* Modify @path, which is a null-terminated string @len 'tchars' in length,
875 * in-place to produce the path to its parent directory. */
877 to_parent_name(tchar *path, size_t len)
879 ssize_t i = (ssize_t)len - 1;
880 while (i >= 0 && path[i] == WIM_PATH_SEPARATOR)
882 while (i >= 0 && path[i] != WIM_PATH_SEPARATOR)
884 while (i >= 0 && path[i] == WIM_PATH_SEPARATOR)
886 path[i + 1] = T('\0');
889 /* Similar to get_dentry(), but returns the dentry named by @path with the last
890 * component stripped off.
892 * Note: The returned dentry is NOT guaranteed to be a directory. */
894 get_parent_dentry(WIMStruct *wim, const tchar *path,
895 CASE_SENSITIVITY_TYPE case_type)
897 size_t path_len = tstrlen(path);
898 tchar buf[path_len + 1];
900 tmemcpy(buf, path, path_len + 1);
901 to_parent_name(buf, path_len);
902 return get_dentry(wim, buf, case_type);
906 * Create an unlinked dentry.
908 * @name specifies the long name to give the new dentry. If NULL or empty, the
909 * new dentry will be given no long name.
911 * The new dentry will have no short name and no associated inode.
913 * On success, returns 0 and a pointer to the new, allocated dentry is stored in
914 * *dentry_ret. On failure, returns WIMLIB_ERR_NOMEM or an error code resulting
915 * from a failed string conversion.
918 new_dentry(const tchar *name, struct wim_dentry **dentry_ret)
920 struct wim_dentry *dentry;
923 dentry = CALLOC(1, sizeof(struct wim_dentry));
925 return WIMLIB_ERR_NOMEM;
928 ret = dentry_set_name(dentry, name);
934 dentry->d_parent = dentry;
935 *dentry_ret = dentry;
939 /* Like new_dentry(), but also allocate an inode and associate it with the
940 * dentry. If set_timestamps=true, the timestamps for the inode will be set to
941 * the current time; otherwise, they will be left 0. */
943 new_dentry_with_new_inode(const tchar *name, bool set_timestamps,
944 struct wim_dentry **dentry_ret)
946 struct wim_dentry *dentry;
947 struct wim_inode *inode;
950 ret = new_dentry(name, &dentry);
954 inode = new_inode(dentry, set_timestamps);
957 return WIMLIB_ERR_NOMEM;
960 *dentry_ret = dentry;
964 /* Like new_dentry(), but also associate the new dentry with the specified inode
965 * and acquire a reference to each of the inode's blobs. */
967 new_dentry_with_existing_inode(const tchar *name, struct wim_inode *inode,
968 struct wim_dentry **dentry_ret)
970 int ret = new_dentry(name, dentry_ret);
973 d_associate(*dentry_ret, inode);
974 inode_ref_blobs(inode);
978 /* Create an unnamed dentry with a new inode for a directory with the default
981 new_filler_directory(struct wim_dentry **dentry_ret)
984 struct wim_dentry *dentry;
986 ret = new_dentry_with_new_inode(NULL, true, &dentry);
989 /* Leave the inode number as 0; this is allowed for non
990 * hard-linked files. */
991 dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
992 *dentry_ret = dentry;
997 dentry_clear_inode_visited(struct wim_dentry *dentry, void *_ignore)
999 dentry->d_inode->i_visited = 0;
1004 dentry_tree_clear_inode_visited(struct wim_dentry *root)
1006 for_dentry_in_tree(root, dentry_clear_inode_visited, NULL);
1010 * Free a WIM dentry.
1012 * In addition to freeing the dentry itself, this disassociates the dentry from
1013 * its inode. If the inode is no longer in use, it will be freed as well.
1016 free_dentry(struct wim_dentry *dentry)
1019 d_disassociate(dentry);
1020 FREE(dentry->file_name);
1021 FREE(dentry->short_name);
1022 FREE(dentry->_full_path);
1028 do_free_dentry(struct wim_dentry *dentry, void *_ignore)
1030 free_dentry(dentry);
1035 do_free_dentry_and_unref_blobs(struct wim_dentry *dentry, void *blob_table)
1037 inode_unref_blobs(dentry->d_inode, blob_table);
1038 free_dentry(dentry);
1043 * Free all dentries in a tree.
1046 * The root of the dentry tree to free. If NULL, this function has no
1050 * A pointer to the blob table for the WIM, or NULL if not specified. If
1051 * specified, this function will decrement the reference counts of the
1052 * blobs referenced by the dentries.
1054 * This function also releases references to the corresponding inodes.
1056 * This function does *not* unlink @root from its parent directory, if it has
1057 * one. If @root has a parent, the caller must unlink @root before calling this
1061 free_dentry_tree(struct wim_dentry *root, struct blob_table *blob_table)
1063 int (*f)(struct wim_dentry *, void *);
1066 f = do_free_dentry_and_unref_blobs;
1070 for_dentry_in_tree_depth(root, f, blob_table);
1073 /* Insert the @child dentry into the case sensitive index of the @dir directory.
1074 * Return NULL if successfully inserted, otherwise a pointer to the
1075 * already-inserted duplicate. */
1076 static struct wim_dentry *
1077 dir_index_child(struct wim_inode *dir, struct wim_dentry *child)
1079 struct avl_tree_node *duplicate;
1081 duplicate = avl_tree_insert(&dir->i_children,
1082 &child->d_index_node,
1083 _avl_dentry_compare_names);
1086 return avl_tree_entry(duplicate, struct wim_dentry, d_index_node);
1089 /* Insert the @child dentry into the case insensitive index of the @dir
1090 * directory. Return NULL if successfully inserted, otherwise a pointer to the
1091 * already-inserted duplicate. */
1092 static struct wim_dentry *
1093 dir_index_child_ci(struct wim_inode *dir, struct wim_dentry *child)
1095 struct avl_tree_node *duplicate;
1097 duplicate = avl_tree_insert(&dir->i_children_ci,
1098 &child->d_index_node_ci,
1099 _avl_dentry_compare_names_ci);
1102 return avl_tree_entry(duplicate, struct wim_dentry, d_index_node_ci);
1105 /* Remove the specified dentry from its directory's case-sensitive index. */
1107 dir_unindex_child(struct wim_inode *dir, struct wim_dentry *child)
1109 avl_tree_remove(&dir->i_children, &child->d_index_node);
1112 /* Remove the specified dentry from its directory's case-insensitive index. */
1114 dir_unindex_child_ci(struct wim_inode *dir, struct wim_dentry *child)
1116 avl_tree_remove(&dir->i_children_ci, &child->d_index_node_ci);
1119 /* Return true iff the specified dentry is in its parent directory's
1120 * case-insensitive index. */
1122 dentry_in_ci_index(const struct wim_dentry *dentry)
1124 return !avl_tree_node_is_unlinked(&dentry->d_index_node_ci);
1128 * Link a dentry into the tree.
1131 * The dentry that will be the parent of @child. It must name a directory.
1134 * The dentry to link. It must be currently unlinked.
1136 * Returns NULL if successful. If @parent already contains a dentry with the
1137 * same case-sensitive name as @child, returns a pointer to this duplicate
1141 dentry_add_child(struct wim_dentry *parent, struct wim_dentry *child)
1143 struct wim_dentry *duplicate;
1144 struct wim_inode *dir;
1146 wimlib_assert(parent != child);
1148 dir = parent->d_inode;
1150 wimlib_assert(inode_is_directory(dir));
1152 duplicate = dir_index_child(dir, child);
1156 duplicate = dir_index_child_ci(dir, child);
1158 list_add(&child->d_ci_conflict_list, &duplicate->d_ci_conflict_list);
1159 avl_tree_node_set_unlinked(&child->d_index_node_ci);
1161 INIT_LIST_HEAD(&child->d_ci_conflict_list);
1163 child->d_parent = parent;
1167 /* Unlink a dentry from the tree. */
1169 unlink_dentry(struct wim_dentry *dentry)
1171 struct wim_inode *dir;
1173 /* Do nothing if the dentry is root or it's already unlinked. Not
1174 * actually necessary based on the current callers, but we do the check
1175 * here to be safe. */
1176 if (unlikely(dentry->d_parent == dentry))
1179 dir = dentry->d_parent->d_inode;
1181 dir_unindex_child(dir, dentry);
1183 if (dentry_in_ci_index(dentry)) {
1185 dir_unindex_child_ci(dir, dentry);
1187 if (!list_empty(&dentry->d_ci_conflict_list)) {
1188 /* Make a different case-insensitively-the-same dentry
1189 * be the "representative" in the search index. */
1190 struct list_head *next;
1191 struct wim_dentry *other;
1192 struct wim_dentry *existing;
1194 next = dentry->d_ci_conflict_list.next;
1195 other = list_entry(next, struct wim_dentry, d_ci_conflict_list);
1196 existing = dir_index_child_ci(dir, other);
1197 wimlib_assert(existing == NULL);
1200 list_del(&dentry->d_ci_conflict_list);
1202 /* Not actually necessary, but to be safe don't retain the now-obsolete
1203 * parent pointer. */
1204 dentry->d_parent = dentry;
1208 read_extra_data(const u8 *p, const u8 *end, struct wim_inode *inode)
1210 while (((uintptr_t)p & 7) && p < end)
1213 if (unlikely(p < end)) {
1214 inode->i_extra = memdup(p, end - p);
1215 if (!inode->i_extra)
1216 return WIMLIB_ERR_NOMEM;
1217 inode->i_extra_size = end - p;
1223 * Set the type of each stream for an encrypted file.
1225 * All data streams of the encrypted file should have been packed into a single
1226 * stream in the format provided by ReadEncryptedFileRaw() on Windows. We
1227 * assign this stream type STREAM_TYPE_EFSRPC_RAW_DATA.
1229 * Encrypted files can't have a reparse point stream. In the on-disk NTFS
1230 * format they can, but as far as I know the reparse point stream of an
1231 * encrypted file can't be stored in the WIM format in a way that's compatible
1232 * with WIMGAPI, nor is there even any way for it to be read or written on
1233 * Windows when the process does not have access to the file encryption key.
1236 assign_stream_types_encrypted(struct wim_inode *inode)
1238 for (unsigned i = 0; i < inode->i_num_streams; i++) {
1239 struct wim_inode_stream *strm = &inode->i_streams[i];
1240 if (!stream_is_named(strm) && !is_zero_hash(strm->_stream_hash))
1242 strm->stream_type = STREAM_TYPE_EFSRPC_RAW_DATA;
1249 * Set the type of each stream for an unencrypted file.
1251 * There will be an unnamed data stream, a reparse point stream, or both an
1252 * unnamed data stream and a reparse point stream. In addition, there may be
1253 * named data streams.
1256 assign_stream_types_unencrypted(struct wim_inode *inode)
1258 bool found_reparse_point_stream = false;
1259 bool found_unnamed_data_stream = false;
1260 struct wim_inode_stream *unnamed_stream_with_zero_hash = NULL;
1262 for (unsigned i = 0; i < inode->i_num_streams; i++) {
1263 struct wim_inode_stream *strm = &inode->i_streams[i];
1265 if (stream_is_named(strm)) {
1266 /* Named data stream */
1267 strm->stream_type = STREAM_TYPE_DATA;
1268 } else if (!is_zero_hash(strm->_stream_hash)) {
1269 if ((inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) &&
1270 !found_reparse_point_stream) {
1271 found_reparse_point_stream = true;
1272 strm->stream_type = STREAM_TYPE_REPARSE_POINT;
1273 } else if (!found_unnamed_data_stream) {
1274 found_unnamed_data_stream = true;
1275 strm->stream_type = STREAM_TYPE_DATA;
1278 /* If no stream name is specified and the hash is zero,
1279 * then remember this stream for later so that we can
1280 * assign it to the unnamed data stream if we don't find
1281 * a better candidate. */
1282 unnamed_stream_with_zero_hash = strm;
1286 if (!found_unnamed_data_stream && unnamed_stream_with_zero_hash != NULL)
1287 unnamed_stream_with_zero_hash->stream_type = STREAM_TYPE_DATA;
1291 * Read and interpret the collection of streams for the specified inode.
1294 setup_inode_streams(const u8 *p, const u8 *end, struct wim_inode *inode,
1295 unsigned num_extra_streams, const u8 *default_hash,
1298 const u8 *orig_p = p;
1300 inode->i_num_streams = 1 + num_extra_streams;
1302 if (unlikely(inode->i_num_streams > ARRAY_LEN(inode->i_embedded_streams))) {
1303 inode->i_streams = CALLOC(inode->i_num_streams,
1304 sizeof(inode->i_streams[0]));
1305 if (!inode->i_streams)
1306 return WIMLIB_ERR_NOMEM;
1309 /* Use the default hash field for the first stream */
1310 inode->i_streams[0].stream_name = (utf16lechar *)NO_STREAM_NAME;
1311 copy_hash(inode->i_streams[0]._stream_hash, default_hash);
1312 inode->i_streams[0].stream_type = STREAM_TYPE_UNKNOWN;
1313 inode->i_streams[0].stream_id = 0;
1315 /* Read the extra stream entries */
1316 for (unsigned i = 1; i < inode->i_num_streams; i++) {
1317 struct wim_inode_stream *strm;
1318 const struct wim_extra_stream_entry_on_disk *disk_strm;
1322 strm = &inode->i_streams[i];
1324 strm->stream_id = i;
1326 /* Do we have at least the size of the fixed-length data we know
1328 if ((end - p) < sizeof(struct wim_extra_stream_entry_on_disk))
1329 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1331 disk_strm = (const struct wim_extra_stream_entry_on_disk *)p;
1333 /* Read the length field */
1334 length = le64_to_cpu(disk_strm->length);
1336 /* 8-byte align the length */
1337 length = (length + 7) & ~7;
1339 /* Make sure the length field is neither so small it doesn't
1340 * include all the fixed-length data nor so large it overflows
1341 * the metadata resource buffer. */
1342 if (length < sizeof(struct wim_extra_stream_entry_on_disk) ||
1344 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1346 /* Read the rest of the fixed-length data. */
1348 copy_hash(strm->_stream_hash, disk_strm->hash);
1349 name_nbytes = le16_to_cpu(disk_strm->name_nbytes);
1351 /* If stream_name_nbytes != 0, the stream is named. */
1352 if (name_nbytes != 0) {
1353 /* The name is encoded in UTF16-LE, which uses 2-byte
1354 * coding units, so the length of the name had better be
1355 * an even number of bytes. */
1356 if (name_nbytes & 1)
1357 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1359 /* Add the length of the stream name to get the length
1360 * we actually need to read. Make sure this isn't more
1361 * than the specified length of the entry. */
1362 if (sizeof(struct wim_extra_stream_entry_on_disk) +
1363 name_nbytes > length)
1364 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1366 strm->stream_name = utf16le_dupz(disk_strm->name,
1368 if (!strm->stream_name)
1369 return WIMLIB_ERR_NOMEM;
1371 strm->stream_name = (utf16lechar *)NO_STREAM_NAME;
1374 strm->stream_type = STREAM_TYPE_UNKNOWN;
1379 inode->i_next_stream_id = inode->i_num_streams;
1381 /* Now, assign a type to each stream. Unfortunately this requires
1382 * various hacks because stream types aren't explicitly provided in the
1383 * WIM on-disk format. */
1385 if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED))
1386 assign_stream_types_encrypted(inode);
1388 assign_stream_types_unencrypted(inode);
1390 *offset_p += p - orig_p;
1394 /* Read a dentry, including all extra stream entries that follow it, from an
1395 * uncompressed metadata resource buffer. */
1397 read_dentry(const u8 * restrict buf, size_t buf_len,
1398 u64 *offset_p, struct wim_dentry **dentry_ret)
1400 u64 offset = *offset_p;
1403 const struct wim_dentry_on_disk *disk_dentry;
1404 struct wim_dentry *dentry;
1405 struct wim_inode *inode;
1406 u16 short_name_nbytes;
1407 u16 file_name_nbytes;
1408 u64 calculated_size;
1411 BUILD_BUG_ON(sizeof(struct wim_dentry_on_disk) != WIM_DENTRY_DISK_SIZE);
1413 /* Before reading the whole dentry, we need to read just the length.
1414 * This is because a dentry of length 8 (that is, just the length field)
1415 * terminates the list of sibling directory entries. */
1417 /* Check for buffer overrun. */
1418 if (unlikely(offset + sizeof(u64) > buf_len ||
1419 offset + sizeof(u64) < offset))
1420 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1422 /* Get pointer to the dentry data. */
1424 disk_dentry = (const struct wim_dentry_on_disk*)p;
1426 /* Get dentry length. */
1427 length = (le64_to_cpu(disk_dentry->length) + 7) & ~7;
1429 /* Check for end-of-directory. */
1435 /* Validate dentry length. */
1436 if (unlikely(length < sizeof(struct wim_dentry_on_disk)))
1437 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1439 /* Check for buffer overrun. */
1440 if (unlikely(offset + length > buf_len ||
1441 offset + length < offset))
1442 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1444 /* Allocate new dentry structure, along with a preliminary inode. */
1445 ret = new_dentry_with_new_inode(NULL, false, &dentry);
1449 inode = dentry->d_inode;
1451 /* Read more fields: some into the dentry, and some into the inode. */
1452 inode->i_attributes = le32_to_cpu(disk_dentry->attributes);
1453 inode->i_security_id = le32_to_cpu(disk_dentry->security_id);
1454 dentry->subdir_offset = le64_to_cpu(disk_dentry->subdir_offset);
1455 inode->i_creation_time = le64_to_cpu(disk_dentry->creation_time);
1456 inode->i_last_access_time = le64_to_cpu(disk_dentry->last_access_time);
1457 inode->i_last_write_time = le64_to_cpu(disk_dentry->last_write_time);
1459 /* I don't know what's going on here. It seems like M$ screwed up the
1460 * reparse points, then put the fields in the same place and didn't
1461 * document it. So we have some fields we read for reparse points, and
1462 * some fields in the same place for non-reparse-points. */
1463 if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
1464 inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->reparse.rp_unknown_1);
1465 inode->i_reparse_tag = le32_to_cpu(disk_dentry->reparse.reparse_tag);
1466 inode->i_rp_unknown_2 = le16_to_cpu(disk_dentry->reparse.rp_unknown_2);
1467 inode->i_not_rpfixed = le16_to_cpu(disk_dentry->reparse.not_rpfixed);
1468 /* Leave inode->i_ino at 0. Note: this means that WIM cannot
1469 * represent multiple hard links to a reparse point file. */
1471 inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->nonreparse.rp_unknown_1);
1472 inode->i_ino = le64_to_cpu(disk_dentry->nonreparse.hard_link_group_id);
1475 /* Now onto reading the names. There are two of them: the (long) file
1476 * name, and the short name. */
1478 short_name_nbytes = le16_to_cpu(disk_dentry->short_name_nbytes);
1479 file_name_nbytes = le16_to_cpu(disk_dentry->file_name_nbytes);
1481 if (unlikely((short_name_nbytes & 1) | (file_name_nbytes & 1))) {
1482 ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1483 goto err_free_dentry;
1486 /* We now know the length of the file name and short name. Make sure
1487 * the length of the dentry is large enough to actually hold them. */
1488 calculated_size = dentry_min_len_with_names(file_name_nbytes,
1491 if (unlikely(length < calculated_size)) {
1492 ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1493 goto err_free_dentry;
1496 /* Advance p to point past the base dentry, to the first name. */
1497 p += sizeof(struct wim_dentry_on_disk);
1499 /* Read the filename if present. Note: if the filename is empty, there
1500 * is no null terminator following it. */
1501 if (file_name_nbytes) {
1502 dentry->file_name = utf16le_dupz(p, file_name_nbytes);
1503 if (dentry->file_name == NULL) {
1504 ret = WIMLIB_ERR_NOMEM;
1505 goto err_free_dentry;
1507 dentry->file_name_nbytes = file_name_nbytes;
1508 p += (u32)file_name_nbytes + 2;
1511 /* Read the short filename if present. Note: if there is no short
1512 * filename, there is no null terminator following it. */
1513 if (short_name_nbytes) {
1514 dentry->short_name = utf16le_dupz(p, short_name_nbytes);
1515 if (dentry->short_name == NULL) {
1516 ret = WIMLIB_ERR_NOMEM;
1517 goto err_free_dentry;
1519 dentry->short_name_nbytes = short_name_nbytes;
1520 p += (u32)short_name_nbytes + 2;
1523 /* Read extra data at end of dentry (but before extra stream entries).
1524 * This may contain tagged metadata items. */
1525 ret = read_extra_data(p, &buf[offset + length], inode);
1527 goto err_free_dentry;
1531 /* Set up the inode's collection of streams. */
1532 ret = setup_inode_streams(&buf[offset],
1535 le16_to_cpu(disk_dentry->num_extra_streams),
1536 disk_dentry->default_hash,
1539 goto err_free_dentry;
1541 *offset_p = offset; /* Sets offset of next dentry in directory */
1542 *dentry_ret = dentry;
1546 free_dentry(dentry);
1550 /* Is the dentry named "." or ".." ? */
1552 dentry_is_dot_or_dotdot(const struct wim_dentry *dentry)
1554 if (dentry->file_name_nbytes <= 4) {
1555 if (dentry->file_name_nbytes == 4) {
1556 if (dentry->file_name[0] == cpu_to_le16('.') &&
1557 dentry->file_name[1] == cpu_to_le16('.'))
1559 } else if (dentry->file_name_nbytes == 2) {
1560 if (dentry->file_name[0] == cpu_to_le16('.'))
1568 read_dentry_tree_recursive(const u8 * restrict buf, size_t buf_len,
1569 struct wim_dentry * restrict dir)
1571 u64 cur_offset = dir->subdir_offset;
1573 /* Check for cyclic directory structure, which would cause infinite
1574 * recursion if not handled. */
1575 for (struct wim_dentry *d = dir->d_parent;
1576 !dentry_is_root(d); d = d->d_parent)
1578 if (unlikely(d->subdir_offset == cur_offset)) {
1579 ERROR("Cyclic directory structure detected: children "
1580 "of \"%"TS"\" coincide with children of \"%"TS"\"",
1581 dentry_full_path(dir), dentry_full_path(d));
1582 return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1587 struct wim_dentry *child;
1588 struct wim_dentry *duplicate;
1591 /* Read next child of @dir. */
1592 ret = read_dentry(buf, buf_len, &cur_offset, &child);
1596 /* Check for end of directory. */
1600 /* All dentries except the root should be named. */
1601 if (unlikely(!dentry_has_long_name(child))) {
1602 WARNING("Ignoring unnamed dentry in "
1603 "directory \"%"TS"\"", dentry_full_path(dir));
1608 /* Don't allow files named "." or "..". */
1609 if (unlikely(dentry_is_dot_or_dotdot(child))) {
1610 WARNING("Ignoring file named \".\" or \"..\"; "
1611 "potentially malicious archive!!!");
1616 /* Link the child into the directory. */
1617 duplicate = dentry_add_child(dir, child);
1618 if (unlikely(duplicate)) {
1619 /* We already found a dentry with this same
1620 * case-sensitive long name. Only keep the first one.
1622 WARNING("Ignoring duplicate file \"%"TS"\" "
1623 "(the WIM image already contains a file "
1624 "at that path with the exact same name)",
1625 dentry_full_path(duplicate));
1630 /* If this child is a directory that itself has children, call
1631 * this procedure recursively. */
1632 if (child->subdir_offset != 0) {
1633 if (likely(dentry_is_directory(child))) {
1634 ret = read_dentry_tree_recursive(buf,
1640 WARNING("Ignoring children of "
1641 "non-directory file \"%"TS"\"",
1642 dentry_full_path(child));
1649 * Read a tree of dentries from a WIM metadata resource.
1652 * Buffer containing an uncompressed WIM metadata resource.
1655 * Length of the uncompressed metadata resource, in bytes.
1658 * Offset in the metadata resource of the root of the dentry tree.
1661 * On success, either NULL or a pointer to the root dentry is written to
1662 * this location. The former case only occurs in the unexpected case that
1663 * the tree began with an end-of-directory entry.
1666 * WIMLIB_ERR_SUCCESS (0)
1667 * WIMLIB_ERR_INVALID_METADATA_RESOURCE
1671 read_dentry_tree(const u8 *buf, size_t buf_len,
1672 u64 root_offset, struct wim_dentry **root_ret)
1675 struct wim_dentry *root;
1677 DEBUG("Reading dentry tree (root_offset=%"PRIu64")", root_offset);
1679 ret = read_dentry(buf, buf_len, &root_offset, &root);
1683 if (likely(root != NULL)) {
1684 if (unlikely(dentry_has_long_name(root) ||
1685 dentry_has_short_name(root)))
1687 WARNING("The root directory has a nonempty name; "
1689 dentry_set_name(root, NULL);
1692 if (unlikely(!dentry_is_directory(root))) {
1693 ERROR("The root of the WIM image is not a directory!");
1694 ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
1695 goto err_free_dentry_tree;
1698 if (likely(root->subdir_offset != 0)) {
1699 ret = read_dentry_tree_recursive(buf, buf_len, root);
1701 goto err_free_dentry_tree;
1704 WARNING("The metadata resource has no directory entries; "
1705 "treating as an empty image.");
1710 err_free_dentry_tree:
1711 free_dentry_tree(root, NULL);
1716 write_extra_stream_entry(u8 * restrict p, const utf16lechar * restrict name,
1717 const u8 * restrict hash)
1719 struct wim_extra_stream_entry_on_disk *disk_strm =
1720 (struct wim_extra_stream_entry_on_disk *)p;
1724 if (name == NO_STREAM_NAME)
1727 name_nbytes = utf16le_len_bytes(name);
1729 disk_strm->reserved = 0;
1730 copy_hash(disk_strm->hash, hash);
1731 disk_strm->name_nbytes = cpu_to_le16(name_nbytes);
1732 p += sizeof(struct wim_extra_stream_entry_on_disk);
1733 if (name_nbytes != 0)
1734 p = mempcpy(p, name, name_nbytes + 2);
1735 /* Align to 8-byte boundary */
1736 while ((uintptr_t)p & 7)
1738 disk_strm->length = cpu_to_le64(p - orig_p);
1743 * Write a WIM dentry to an output buffer.
1745 * This includes any extra stream entries that may follow the dentry itself.
1748 * The dentry to write.
1751 * The memory location to which to write the data.
1753 * Returns a pointer to the byte following the last written.
1756 write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p)
1758 const struct wim_inode *inode;
1759 struct wim_dentry_on_disk *disk_dentry;
1762 wimlib_assert(((uintptr_t)p & 7) == 0); /* 8 byte aligned */
1765 inode = dentry->d_inode;
1766 disk_dentry = (struct wim_dentry_on_disk*)p;
1768 disk_dentry->attributes = cpu_to_le32(inode->i_attributes);
1769 disk_dentry->security_id = cpu_to_le32(inode->i_security_id);
1770 disk_dentry->subdir_offset = cpu_to_le64(dentry->subdir_offset);
1772 disk_dentry->unused_1 = cpu_to_le64(0);
1773 disk_dentry->unused_2 = cpu_to_le64(0);
1775 disk_dentry->creation_time = cpu_to_le64(inode->i_creation_time);
1776 disk_dentry->last_access_time = cpu_to_le64(inode->i_last_access_time);
1777 disk_dentry->last_write_time = cpu_to_le64(inode->i_last_write_time);
1778 if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
1779 disk_dentry->reparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1);
1780 disk_dentry->reparse.reparse_tag = cpu_to_le32(inode->i_reparse_tag);
1781 disk_dentry->reparse.rp_unknown_2 = cpu_to_le16(inode->i_rp_unknown_2);
1782 disk_dentry->reparse.not_rpfixed = cpu_to_le16(inode->i_not_rpfixed);
1784 disk_dentry->nonreparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1);
1785 disk_dentry->nonreparse.hard_link_group_id =
1786 cpu_to_le64((inode->i_nlink == 1) ? 0 : inode->i_ino);
1789 disk_dentry->short_name_nbytes = cpu_to_le16(dentry->short_name_nbytes);
1790 disk_dentry->file_name_nbytes = cpu_to_le16(dentry->file_name_nbytes);
1791 p += sizeof(struct wim_dentry_on_disk);
1793 wimlib_assert(dentry_is_root(dentry) != dentry_has_long_name(dentry));
1795 if (dentry_has_long_name(dentry))
1796 p = mempcpy(p, dentry->file_name, (u32)dentry->file_name_nbytes + 2);
1798 if (dentry_has_short_name(dentry))
1799 p = mempcpy(p, dentry->short_name, (u32)dentry->short_name_nbytes + 2);
1801 /* Align to 8-byte boundary */
1802 while ((uintptr_t)p & 7)
1805 if (inode->i_extra_size) {
1806 /* Extra tagged items --- not usually present. */
1807 p = mempcpy(p, inode->i_extra, inode->i_extra_size);
1809 /* Align to 8-byte boundary */
1810 while ((uintptr_t)p & 7)
1814 disk_dentry->length = cpu_to_le64(p - orig_p);
1818 if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) {
1819 const struct wim_inode_stream *efs_strm;
1822 efs_strm = inode_get_unnamed_stream(inode, STREAM_TYPE_EFSRPC_RAW_DATA);
1823 efs_hash = efs_strm ? stream_hash(efs_strm) : zero_hash;
1824 copy_hash(disk_dentry->default_hash, efs_hash);
1825 disk_dentry->num_extra_streams = cpu_to_le16(0);
1828 * Extra stream entries:
1830 * - Use one extra stream entry for each named data stream
1831 * - Use one extra stream entry for the unnamed data stream when there is either:
1832 * - a reparse point stream
1833 * - at least one named data stream (for Windows PE bug workaround)
1834 * - Use one extra stream entry for the reparse point stream if there is one
1836 bool have_named_data_stream = false;
1837 bool have_reparse_point_stream = false;
1838 const u8 *unnamed_data_stream_hash = zero_hash;
1839 const u8 *reparse_point_hash;
1840 for (unsigned i = 0; i < inode->i_num_streams; i++) {
1841 const struct wim_inode_stream *strm = &inode->i_streams[i];
1842 if (strm->stream_type == STREAM_TYPE_DATA) {
1843 if (stream_is_named(strm))
1844 have_named_data_stream = true;
1846 unnamed_data_stream_hash = stream_hash(strm);
1847 } else if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) {
1848 have_reparse_point_stream = true;
1849 reparse_point_hash = stream_hash(strm);
1853 if (unlikely(have_reparse_point_stream || have_named_data_stream)) {
1855 unsigned num_extra_streams = 0;
1857 copy_hash(disk_dentry->default_hash, zero_hash);
1859 if (have_reparse_point_stream) {
1860 p = write_extra_stream_entry(p, NO_STREAM_NAME,
1861 reparse_point_hash);
1862 num_extra_streams++;
1865 p = write_extra_stream_entry(p, NO_STREAM_NAME,
1866 unnamed_data_stream_hash);
1867 num_extra_streams++;
1869 for (unsigned i = 0; i < inode->i_num_streams; i++) {
1870 const struct wim_inode_stream *strm = &inode->i_streams[i];
1871 if (stream_is_named_data_stream(strm)) {
1872 p = write_extra_stream_entry(p, strm->stream_name,
1874 num_extra_streams++;
1877 wimlib_assert(num_extra_streams <= 0xFFFF);
1879 disk_dentry->num_extra_streams = cpu_to_le16(num_extra_streams);
1881 copy_hash(disk_dentry->default_hash, unnamed_data_stream_hash);
1882 disk_dentry->num_extra_streams = cpu_to_le16(0);
1890 write_dir_dentries(struct wim_dentry *dir, void *_pp)
1892 if (dir->subdir_offset != 0) {
1895 struct wim_dentry *child;
1897 /* write child dentries */
1898 for_dentry_child(child, dir)
1899 p = write_dentry(child, p);
1901 /* write end of directory entry */
1910 * Write a directory tree to the metadata resource.
1913 * The root of a dentry tree on which calculate_subdir_offsets() has been
1914 * called. This cannot be NULL; if the dentry tree is empty, the caller is
1915 * expected to first generate a dummy root directory.
1918 * Pointer to a buffer with enough space for the dentry tree. This size
1919 * must have been obtained by calculate_subdir_offsets().
1921 * Returns a pointer to the byte following the last written.
1924 write_dentry_tree(struct wim_dentry *root, u8 *p)
1926 DEBUG("Writing dentry tree.");
1928 wimlib_assert(root != NULL);
1930 /* write root dentry and end-of-directory entry following it */
1931 p = write_dentry(root, p);
1935 /* write the rest of the dentry tree */
1936 for_dentry_in_tree(root, write_dir_dentries, &p);