Rearrange struct wim_inode and improve comments
[wimlib] / src / dentry.c
1 /*
2  * dentry.c
3  *
4  * In the WIM file format, the dentries are stored in the "metadata resource"
5  * section right after the security data.  Each image in the WIM file has its
6  * own metadata resource with its own security data and dentry tree.  Dentries
7  * in different images may share file resources by referring to the same lookup
8  * table entries.
9  */
10
11 /*
12  * Copyright (C) 2012, 2013 Eric Biggers
13  *
14  * This file is part of wimlib, a library for working with WIM files.
15  *
16  * wimlib is free software; you can redistribute it and/or modify it under the
17  * terms of the GNU General Public License as published by the Free Software
18  * Foundation; either version 3 of the License, or (at your option) any later
19  * version.
20  *
21  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
22  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
23  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
24  *
25  * You should have received a copy of the GNU General Public License along with
26  * wimlib; if not, see http://www.gnu.org/licenses/.
27  */
28
29 #ifdef HAVE_CONFIG_H
30 #  include "config.h"
31 #endif
32
33 #include "wimlib.h"
34 #include "wimlib/dentry.h"
35 #include "wimlib/encoding.h"
36 #include "wimlib/endianness.h"
37 #include "wimlib/error.h"
38 #include "wimlib/lookup_table.h"
39 #include "wimlib/metadata.h"
40 #include "wimlib/resource.h"
41 #include "wimlib/sha1.h"
42 #include "wimlib/timestamp.h"
43
44 #include <errno.h>
45
46 /* WIM alternate data stream entry (on-disk format) */
47 struct wim_ads_entry_on_disk {
48         /*  Length of the entry, in bytes.  This apparently includes all
49          *  fixed-length fields, plus the stream name and null terminator if
50          *  present, and the padding up to an 8 byte boundary.  wimlib is a
51          *  little less strict when reading the entries, and only requires that
52          *  the number of bytes from this field is at least as large as the size
53          *  of the fixed length fields and stream name without null terminator.
54          *  */
55         le64  length;
56
57         le64  reserved;
58
59         /* SHA1 message digest of the uncompressed stream; or, alternatively,
60          * can be all zeroes if the stream has zero length. */
61         u8 hash[SHA1_HASH_SIZE];
62
63         /* Length of the stream name, in bytes.  0 if the stream is unnamed.  */
64         le16 stream_name_nbytes;
65
66         /* Stream name in UTF-16LE.  It is @stream_name_nbytes bytes long,
67          * excluding the the null terminator.  There is a null terminator
68          * character if @stream_name_nbytes != 0; i.e., if this stream is named.
69          * */
70         utf16lechar stream_name[];
71 } _packed_attribute;
72
73 #define WIM_ADS_ENTRY_DISK_SIZE 38
74
75 /* On-disk format of a WIM dentry (directory entry), located in the metadata
76  * resource for a WIM image.  */
77 struct wim_dentry_on_disk {
78
79         /* Length of this directory entry in bytes, not including any alternate
80          * data stream entries.  Should be a multiple of 8 so that the following
81          * dentry or alternate data stream entry is aligned on an 8-byte
82          * boundary.  (If not, wimlib will round it up.)  It must be at least as
83          * long as the fixed-length fields of the dentry (WIM_DENTRY_DISK_SIZE),
84          * plus the lengths of the file name and/or short name if present.
85          *
86          * It is also possible for this field to be 0.  This situation, which is
87          * undocumented, indicates the end of a list of sibling nodes in a
88          * directory.  It also means the real length is 8, because the dentry
89          * included only the length field, but that takes up 8 bytes.  */
90         le64 length;
91
92         /* Attributes of the file or directory.  This is a bitwise OR of the
93          * FILE_ATTRIBUTE_* constants and should correspond to the value
94          * retrieved by GetFileAttributes() on Windows. */
95         le32 attributes;
96
97         /* A value that specifies the security descriptor for this file or
98          * directory.  If -1, the file or directory has no security descriptor.
99          * Otherwise, it is a 0-based index into the WIM image's table of
100          * security descriptors (see: `struct wim_security_data') */
101         sle32 security_id;
102
103         /* Offset, in bytes, from the start of the uncompressed metadata
104          * resource of this directory's child directory entries, or 0 if this
105          * directory entry does not correspond to a directory or otherwise does
106          * not have any children. */
107         le64 subdir_offset;
108
109         /* Reserved fields */
110         le64 unused_1;
111         le64 unused_2;
112
113
114         /* Creation time, last access time, and last write time, in
115          * 100-nanosecond intervals since 12:00 a.m UTC January 1, 1601.  They
116          * should correspond to the times gotten by calling GetFileTime() on
117          * Windows. */
118         le64 creation_time;
119         le64 last_access_time;
120         le64 last_write_time;
121
122         /* Vaguely, the SHA-1 message digest ("hash") of the file's contents.
123          * More specifically, this is for the "unnamed data stream" rather than
124          * any "alternate data streams".  This hash value is used to look up the
125          * corresponding entry in the WIM's stream lookup table to actually find
126          * the file contents within the WIM.
127          *
128          * If the file has no unnamed data stream (e.g. is a directory), then
129          * this field will be all zeroes.  If the unnamed data stream is empty
130          * (i.e. an "empty file"), then this field is also expected to be all
131          * zeroes.  (It will be if wimlib created the WIM image, at least;
132          * otherwise it can't be ruled out that the SHA-1 message digest of 0
133          * bytes of data is given explicitly.)
134          *
135          * If the file has reparse data, then this field will instead specify
136          * the SHA-1 message digest of the reparse data.  If it is somehow
137          * possible for a file to have both an unnamed data stream and reparse
138          * data, then this is not handled by wimlib.
139          *
140          * As a further special case, if this field is all zeroes but there is
141          * an alternate data stream entry with no name and a nonzero SHA-1
142          * message digest field, then that hash must be used instead of this
143          * one.  (wimlib does not use this quirk on WIM images it creates.)
144          */
145         u8 unnamed_stream_hash[SHA1_HASH_SIZE];
146
147         /* The format of the following data is not yet completely known and they
148          * do not correspond to Microsoft's documentation.
149          *
150          * If this directory entry is for a reparse point (has
151          * FILE_ATTRIBUTE_REPARSE_POINT set in the attributes field), then the
152          * version of the following fields containing the reparse tag is valid.
153          * Furthermore, the field notated as not_rpfixed, as far as I can tell,
154          * is supposed to be set to 1 if reparse point fixups (a.k.a. fixing the
155          * targets of absolute symbolic links) were *not* done, and otherwise 0.
156          *
157          * If this directory entry is not for a reparse point, then the version
158          * of the following fields containing the hard_link_group_id is valid.
159          * All MS says about this field is that "If this file is part of a hard
160          * link set, all the directory entries in the set will share the same
161          * value in this field.".  However, more specifically I have observed
162          * the following:
163          *    - If the file is part of a hard link set of size 1, then the
164          *    hard_link_group_id should be set to either 0, which is treated
165          *    specially as indicating "not hardlinked", or any unique value.
166          *    - The specific nonzero values used to identity hard link sets do
167          *    not matter, as long as they are unique.
168          *    - However, due to bugs in Microsoft's software, it is actually NOT
169          *    guaranteed that directory entries that share the same hard link
170          *    group ID are actually hard linked to each either.  We have to
171          *    handle this by using special code to use distinguishing features
172          *    (which is possible because some information about the underlying
173          *    inode is repeated in each dentry) to split up these fake hard link
174          *    groups into what they actually are supposed to be.
175          */
176         union {
177                 struct {
178                         le32 rp_unknown_1;
179                         le32 reparse_tag;
180                         le16 rp_unknown_2;
181                         le16 not_rpfixed;
182                 } _packed_attribute reparse;
183                 struct {
184                         le32 rp_unknown_1;
185                         le64 hard_link_group_id;
186                 } _packed_attribute nonreparse;
187         };
188
189         /* Number of alternate data stream entries that directly follow this
190          * dentry on-disk. */
191         le16 num_alternate_data_streams;
192
193         /* Length of this file's UTF-16LE encoded short name (8.3 DOS-compatible
194          * name), if present, in bytes, excluding the null terminator.  If this
195          * file has no short name, then this field should be 0.  */
196         le16 short_name_nbytes;
197
198         /* Length of this file's UTF-16LE encoded "long" name, excluding the
199          * null terminator.  If this file has no short name, then this field
200          * should be 0.  It's expected that only the root dentry has this field
201          * set to 0.  */
202         le16 file_name_nbytes;
203
204         /* Follewed by variable length file name, in UTF16-LE, if
205          * file_name_nbytes != 0.  Includes null terminator. */
206         /*utf16lechar file_name[];*/
207
208         /* Followed by variable length short name, in UTF16-LE, if
209          * short_name_nbytes != 0.  Includes null terminator. */
210         /*utf16lechar short_name[];*/
211 } _packed_attribute;
212
213 #define WIM_DENTRY_DISK_SIZE 102
214
215 /* Calculates the unaligned length, in bytes, of an on-disk WIM dentry that has
216  * a file name and short name that take the specified numbers of bytes.  This
217  * excludes any alternate data stream entries that may follow the dentry. */
218 static u64
219 _dentry_correct_length_unaligned(u16 file_name_nbytes, u16 short_name_nbytes)
220 {
221         u64 length = sizeof(struct wim_dentry_on_disk);
222         if (file_name_nbytes)
223                 length += file_name_nbytes + 2;
224         if (short_name_nbytes)
225                 length += short_name_nbytes + 2;
226         return length;
227 }
228
229 /* Calculates the unaligned length, in bytes, of an on-disk WIM dentry, based on
230  * the file name length and short name length.  Note that dentry->length is
231  * ignored; also, this excludes any alternate data stream entries that may
232  * follow the dentry. */
233 static u64
234 dentry_correct_length_unaligned(const struct wim_dentry *dentry)
235 {
236         return _dentry_correct_length_unaligned(dentry->file_name_nbytes,
237                                                 dentry->short_name_nbytes);
238 }
239
240 /* Duplicates a string of system-dependent encoding into a UTF-16LE string and
241  * returns the string and its length, in bytes, in the pointer arguments.  Frees
242  * any existing string at the return location before overwriting it. */
243 static int
244 get_utf16le_name(const tchar *name, utf16lechar **name_utf16le_ret,
245                  u16 *name_utf16le_nbytes_ret)
246 {
247         utf16lechar *name_utf16le;
248         size_t name_utf16le_nbytes;
249         int ret;
250 #if TCHAR_IS_UTF16LE
251         name_utf16le_nbytes = tstrlen(name) * sizeof(utf16lechar);
252         name_utf16le = MALLOC(name_utf16le_nbytes + sizeof(utf16lechar));
253         if (!name_utf16le)
254                 return WIMLIB_ERR_NOMEM;
255         memcpy(name_utf16le, name, name_utf16le_nbytes + sizeof(utf16lechar));
256         ret = 0;
257 #else
258
259         ret = tstr_to_utf16le(name, tstrlen(name), &name_utf16le,
260                               &name_utf16le_nbytes);
261         if (ret == 0) {
262                 if (name_utf16le_nbytes > 0xffff) {
263                         FREE(name_utf16le);
264                         ERROR("Multibyte string \"%"TS"\" is too long!", name);
265                         ret = WIMLIB_ERR_INVALID_UTF8_STRING;
266                 }
267         }
268 #endif
269         if (ret == 0) {
270                 FREE(*name_utf16le_ret);
271                 *name_utf16le_ret = name_utf16le;
272                 *name_utf16le_nbytes_ret = name_utf16le_nbytes;
273         }
274         return ret;
275 }
276
277 /* Sets the name of a WIM dentry from a multibyte string. */
278 int
279 set_dentry_name(struct wim_dentry *dentry, const tchar *new_name)
280 {
281         int ret;
282         ret = get_utf16le_name(new_name, &dentry->file_name,
283                                &dentry->file_name_nbytes);
284         if (ret == 0) {
285                 /* Clear the short name and recalculate the dentry length */
286                 if (dentry_has_short_name(dentry)) {
287                         FREE(dentry->short_name);
288                         dentry->short_name = NULL;
289                         dentry->short_name_nbytes = 0;
290                 }
291         }
292         return ret;
293 }
294
295 /* Returns the total length of a WIM alternate data stream entry on-disk,
296  * including the stream name, the null terminator, AND the padding after the
297  * entry to align the next ADS entry or dentry on an 8-byte boundary. */
298 static u64
299 ads_entry_total_length(const struct wim_ads_entry *entry)
300 {
301         u64 len = sizeof(struct wim_ads_entry_on_disk);
302         if (entry->stream_name_nbytes)
303                 len += entry->stream_name_nbytes + 2;
304         return (len + 7) & ~7;
305 }
306
307
308 static u64
309 _dentry_total_length(const struct wim_dentry *dentry, u64 length)
310 {
311         const struct wim_inode *inode = dentry->d_inode;
312         for (u16 i = 0; i < inode->i_num_ads; i++)
313                 length += ads_entry_total_length(&inode->i_ads_entries[i]);
314         return (length + 7) & ~7;
315 }
316
317 /* Calculate the aligned *total* length of an on-disk WIM dentry.  This includes
318  * all alternate data streams. */
319 u64
320 dentry_correct_total_length(const struct wim_dentry *dentry)
321 {
322         return _dentry_total_length(dentry,
323                                     dentry_correct_length_unaligned(dentry));
324 }
325
326 /* Like dentry_correct_total_length(), but use the existing dentry->length field
327  * instead of calculating its "correct" value. */
328 static u64
329 dentry_total_length(const struct wim_dentry *dentry)
330 {
331         return _dentry_total_length(dentry, dentry->length);
332 }
333
334 int
335 for_dentry_in_rbtree(struct rb_node *root,
336                      int (*visitor)(struct wim_dentry *, void *),
337                      void *arg)
338 {
339         int ret;
340         struct rb_node *node = root;
341         LIST_HEAD(stack);
342         while (1) {
343                 if (node) {
344                         list_add(&rbnode_dentry(node)->tmp_list, &stack);
345                         node = node->rb_left;
346                 } else {
347                         struct list_head *next;
348                         struct wim_dentry *dentry;
349
350                         next = stack.next;
351                         if (next == &stack)
352                                 return 0;
353                         dentry = container_of(next, struct wim_dentry, tmp_list);
354                         list_del(next);
355                         ret = visitor(dentry, arg);
356                         if (ret != 0)
357                                 return ret;
358                         node = dentry->rb_node.rb_right;
359                 }
360         }
361 }
362
363 static int
364 for_dentry_tree_in_rbtree_depth(struct rb_node *node,
365                                 int (*visitor)(struct wim_dentry*, void*),
366                                 void *arg)
367 {
368         int ret;
369         if (node) {
370                 ret = for_dentry_tree_in_rbtree_depth(node->rb_left,
371                                                       visitor, arg);
372                 if (ret != 0)
373                         return ret;
374                 ret = for_dentry_tree_in_rbtree_depth(node->rb_right,
375                                                       visitor, arg);
376                 if (ret != 0)
377                         return ret;
378                 ret = for_dentry_in_tree_depth(rbnode_dentry(node), visitor, arg);
379                 if (ret != 0)
380                         return ret;
381         }
382         return 0;
383 }
384
385 static int
386 for_dentry_tree_in_rbtree(struct rb_node *node,
387                           int (*visitor)(struct wim_dentry*, void*),
388                           void *arg)
389 {
390         int ret;
391         if (node) {
392                 ret = for_dentry_tree_in_rbtree(node->rb_left, visitor, arg);
393                 if (ret)
394                         return ret;
395                 ret = for_dentry_in_tree(rbnode_dentry(node), visitor, arg);
396                 if (ret)
397                         return ret;
398                 ret = for_dentry_tree_in_rbtree(node->rb_right, visitor, arg);
399                 if (ret)
400                         return ret;
401         }
402         return 0;
403 }
404
405 /* Calls a function on all directory entries in a WIM dentry tree.  Logically,
406  * this is a pre-order traversal (the function is called on a parent dentry
407  * before its children), but sibling dentries will be visited in order as well.
408  * */
409 int
410 for_dentry_in_tree(struct wim_dentry *root,
411                    int (*visitor)(struct wim_dentry*, void*), void *arg)
412 {
413         int ret;
414
415         if (!root)
416                 return 0;
417         ret = (*visitor)(root, arg);
418         if (ret)
419                 return ret;
420         return for_dentry_tree_in_rbtree(root->d_inode->i_children.rb_node,
421                                          visitor,
422                                          arg);
423 }
424
425 /* Like for_dentry_in_tree(), but the visitor function is always called on a
426  * dentry's children before on itself. */
427 int
428 for_dentry_in_tree_depth(struct wim_dentry *root,
429                          int (*visitor)(struct wim_dentry*, void*), void *arg)
430 {
431         int ret;
432
433         if (!root)
434                 return 0;
435         ret = for_dentry_tree_in_rbtree_depth(root->d_inode->i_children.rb_node,
436                                               visitor, arg);
437         if (ret)
438                 return ret;
439         return (*visitor)(root, arg);
440 }
441
442 /* Calculate the full path of @dentry.  The full path of its parent must have
443  * already been calculated, or it must be the root dentry. */
444 int
445 calculate_dentry_full_path(struct wim_dentry *dentry)
446 {
447         tchar *full_path;
448         u32 full_path_nbytes;
449         int ret;
450
451         if (dentry->_full_path)
452                 return 0;
453
454         if (dentry_is_root(dentry)) {
455                 static const tchar _root_path[] = {WIM_PATH_SEPARATOR, T('\0')};
456                 full_path = TSTRDUP(_root_path);
457                 if (!full_path)
458                         return WIMLIB_ERR_NOMEM;
459                 full_path_nbytes = 1 * sizeof(tchar);
460         } else {
461                 struct wim_dentry *parent;
462                 tchar *parent_full_path;
463                 u32 parent_full_path_nbytes;
464                 size_t filename_nbytes;
465
466                 parent = dentry->parent;
467                 if (dentry_is_root(parent)) {
468                         parent_full_path = T("");
469                         parent_full_path_nbytes = 0;
470                 } else {
471                         if (!parent->_full_path) {
472                                 ret = calculate_dentry_full_path(parent);
473                                 if (ret)
474                                         return ret;
475                         }
476                         parent_full_path = parent->_full_path;
477                         parent_full_path_nbytes = parent->full_path_nbytes;
478                 }
479
480                 /* Append this dentry's name as a tchar string to the full path
481                  * of the parent followed by the path separator */
482         #if TCHAR_IS_UTF16LE
483                 filename_nbytes = dentry->file_name_nbytes;
484         #else
485                 {
486                         int ret = utf16le_to_tstr_nbytes(dentry->file_name,
487                                                          dentry->file_name_nbytes,
488                                                          &filename_nbytes);
489                         if (ret)
490                                 return ret;
491                 }
492         #endif
493
494                 full_path_nbytes = parent_full_path_nbytes + sizeof(tchar) +
495                                    filename_nbytes;
496                 full_path = MALLOC(full_path_nbytes + sizeof(tchar));
497                 if (!full_path)
498                         return WIMLIB_ERR_NOMEM;
499                 memcpy(full_path, parent_full_path, parent_full_path_nbytes);
500                 full_path[parent_full_path_nbytes / sizeof(tchar)] = WIM_PATH_SEPARATOR;
501         #if TCHAR_IS_UTF16LE
502                 memcpy(&full_path[parent_full_path_nbytes / sizeof(tchar) + 1],
503                        dentry->file_name,
504                        filename_nbytes + sizeof(tchar));
505         #else
506                 utf16le_to_tstr_buf(dentry->file_name,
507                                     dentry->file_name_nbytes,
508                                     &full_path[parent_full_path_nbytes /
509                                                sizeof(tchar) + 1]);
510         #endif
511         }
512         dentry->_full_path = full_path;
513         dentry->full_path_nbytes= full_path_nbytes;
514         return 0;
515 }
516
517 static int
518 do_calculate_dentry_full_path(struct wim_dentry *dentry, void *_ignore)
519 {
520         return calculate_dentry_full_path(dentry);
521 }
522
523 int
524 calculate_dentry_tree_full_paths(struct wim_dentry *root)
525 {
526         return for_dentry_in_tree(root, do_calculate_dentry_full_path, NULL);
527 }
528
529 tchar *
530 dentry_full_path(struct wim_dentry *dentry)
531 {
532         calculate_dentry_full_path(dentry);
533         return dentry->_full_path;
534 }
535
536 static int
537 increment_subdir_offset(struct wim_dentry *dentry, void *subdir_offset_p)
538 {
539         *(u64*)subdir_offset_p += dentry_correct_total_length(dentry);
540         return 0;
541 }
542
543 static int
544 call_calculate_subdir_offsets(struct wim_dentry *dentry, void *subdir_offset_p)
545 {
546         calculate_subdir_offsets(dentry, subdir_offset_p);
547         return 0;
548 }
549
550 /*
551  * Recursively calculates the subdir offsets for a directory tree.
552  *
553  * @dentry:  The root of the directory tree.
554  * @subdir_offset_p:  The current subdirectory offset; i.e., the subdirectory
555  *                    offset for @dentry.
556  */
557 void
558 calculate_subdir_offsets(struct wim_dentry *dentry, u64 *subdir_offset_p)
559 {
560         struct rb_node *node;
561
562         dentry->subdir_offset = *subdir_offset_p;
563         node = dentry->d_inode->i_children.rb_node;
564         if (node) {
565                 /* Advance the subdir offset by the amount of space the children
566                  * of this dentry take up. */
567                 for_dentry_in_rbtree(node, increment_subdir_offset, subdir_offset_p);
568
569                 /* End-of-directory dentry on disk. */
570                 *subdir_offset_p += 8;
571
572                 /* Recursively call calculate_subdir_offsets() on all the
573                  * children. */
574                 for_dentry_in_rbtree(node, call_calculate_subdir_offsets, subdir_offset_p);
575         } else {
576                 /* On disk, childless directories have a valid subdir_offset
577                  * that points to an 8-byte end-of-directory dentry.  Regular
578                  * files or reparse points have a subdir_offset of 0. */
579                 if (dentry_is_directory(dentry))
580                         *subdir_offset_p += 8;
581                 else
582                         dentry->subdir_offset = 0;
583         }
584 }
585
586 /* Case-sensitive UTF-16LE dentry or stream name comparison.  Used on both UNIX
587  * (always) and Windows (sometimes) */
588 static int
589 compare_utf16le_names_case_sensitive(const utf16lechar *name1, size_t nbytes1,
590                                      const utf16lechar *name2, size_t nbytes2)
591 {
592         /* Return the result if the strings differ up to their minimum length.
593          * Note that we cannot use strcmp() or strncmp() here, as the strings
594          * are in UTF-16LE format. */
595         int result = memcmp(name1, name2, min(nbytes1, nbytes2));
596         if (result)
597                 return result;
598
599         /* The strings are the same up to their minimum length, so return a
600          * result based on their lengths. */
601         if (nbytes1 < nbytes2)
602                 return -1;
603         else if (nbytes1 > nbytes2)
604                 return 1;
605         else
606                 return 0;
607 }
608
609 #ifdef __WIN32__
610 /* Windoze: Case-insensitive UTF-16LE dentry or stream name comparison */
611 static int
612 compare_utf16le_names_case_insensitive(const utf16lechar *name1, size_t nbytes1,
613                                        const utf16lechar *name2, size_t nbytes2)
614 {
615         /* Return the result if the strings differ up to their minimum length.
616          * */
617         int result = _wcsnicmp((const wchar_t*)name1, (const wchar_t*)name2,
618                                min(nbytes1 / 2, nbytes2 / 2));
619         if (result)
620                 return result;
621
622         /* The strings are the same up to their minimum length, so return a
623          * result based on their lengths. */
624         if (nbytes1 < nbytes2)
625                 return -1;
626         else if (nbytes1 > nbytes2)
627                 return 1;
628         else
629                 return 0;
630 }
631 #endif /* __WIN32__ */
632
633 #ifdef __WIN32__
634 #  define compare_utf16le_names compare_utf16le_names_case_insensitive
635 #else
636 #  define compare_utf16le_names compare_utf16le_names_case_sensitive
637 #endif
638
639
640 #ifdef __WIN32__
641 static int
642 dentry_compare_names_case_insensitive(const struct wim_dentry *d1,
643                                       const struct wim_dentry *d2)
644 {
645         return compare_utf16le_names_case_insensitive(d1->file_name,
646                                                       d1->file_name_nbytes,
647                                                       d2->file_name,
648                                                       d2->file_name_nbytes);
649 }
650 #endif /* __WIN32__ */
651
652 static int
653 dentry_compare_names_case_sensitive(const struct wim_dentry *d1,
654                                     const struct wim_dentry *d2)
655 {
656         return compare_utf16le_names_case_sensitive(d1->file_name,
657                                                     d1->file_name_nbytes,
658                                                     d2->file_name,
659                                                     d2->file_name_nbytes);
660 }
661
662 #ifdef __WIN32__
663 #  define dentry_compare_names dentry_compare_names_case_insensitive
664 #else
665 #  define dentry_compare_names dentry_compare_names_case_sensitive
666 #endif
667
668 /* Return %true iff the alternate data stream entry @entry has the UTF-16LE
669  * stream name @name that has length @name_nbytes bytes. */
670 static inline bool
671 ads_entry_has_name(const struct wim_ads_entry *entry,
672                    const utf16lechar *name, size_t name_nbytes)
673 {
674         return !compare_utf16le_names(name, name_nbytes,
675                                       entry->stream_name,
676                                       entry->stream_name_nbytes);
677 }
678
679 /* Given a UTF-16LE filename and a directory, look up the dentry for the file.
680  * Return it if found, otherwise NULL.  This is case-sensitive on UNIX and
681  * case-insensitive on Windows. */
682 struct wim_dentry *
683 get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry,
684                                    const utf16lechar *name,
685                                    size_t name_nbytes)
686 {
687         struct rb_node *node;
688
689 #ifdef __WIN32__
690         node = dentry->d_inode->i_children_case_insensitive.rb_node;
691 #else
692         node = dentry->d_inode->i_children.rb_node;
693 #endif
694
695         struct wim_dentry *child;
696         while (node) {
697         #ifdef __WIN32__
698                 child = rb_entry(node, struct wim_dentry, rb_node_case_insensitive);
699         #else
700                 child = rbnode_dentry(node);
701         #endif
702                 int result = compare_utf16le_names(name, name_nbytes,
703                                                    child->file_name,
704                                                    child->file_name_nbytes);
705                 if (result < 0)
706                         node = node->rb_left;
707                 else if (result > 0)
708                         node = node->rb_right;
709                 else {
710                 #ifdef __WIN32__
711                         if (!list_empty(&child->case_insensitive_conflict_list))
712                         {
713                                 WARNING("Result of case-insensitive lookup is ambiguous "
714                                         "(returning \"%ls\" instead of \"%ls\")",
715                                         child->file_name,
716                                         container_of(child->case_insensitive_conflict_list.next,
717                                                      struct wim_dentry,
718                                                      case_insensitive_conflict_list)->file_name);
719                         }
720                 #endif
721                         return child;
722                 }
723         }
724         return NULL;
725 }
726
727 /* Returns the child of @dentry that has the file name @name.  Returns NULL if
728  * no child has the name. */
729 struct wim_dentry *
730 get_dentry_child_with_name(const struct wim_dentry *dentry, const tchar *name)
731 {
732 #if TCHAR_IS_UTF16LE
733         return get_dentry_child_with_utf16le_name(dentry, name,
734                                                   tstrlen(name) * sizeof(tchar));
735 #else
736         utf16lechar *utf16le_name;
737         size_t utf16le_name_nbytes;
738         int ret;
739         struct wim_dentry *child;
740
741         ret = tstr_to_utf16le(name, tstrlen(name) * sizeof(tchar),
742                               &utf16le_name, &utf16le_name_nbytes);
743         if (ret) {
744                 child = NULL;
745         } else {
746                 child = get_dentry_child_with_utf16le_name(dentry,
747                                                            utf16le_name,
748                                                            utf16le_name_nbytes);
749                 FREE(utf16le_name);
750         }
751         return child;
752 #endif
753 }
754
755 static struct wim_dentry *
756 get_dentry_utf16le(WIMStruct *wim, const utf16lechar *path)
757 {
758         struct wim_dentry *cur_dentry, *parent_dentry;
759         const utf16lechar *p, *pp;
760
761         cur_dentry = parent_dentry = wim_root_dentry(wim);
762         if (!cur_dentry) {
763                 errno = ENOENT;
764                 return NULL;
765         }
766         p = path;
767         while (1) {
768                 while (*p == cpu_to_le16(WIM_PATH_SEPARATOR))
769                         p++;
770                 if (*p == cpu_to_le16('\0'))
771                         break;
772                 pp = p;
773                 while (*pp != cpu_to_le16(WIM_PATH_SEPARATOR) &&
774                        *pp != cpu_to_le16('\0'))
775                         pp++;
776
777                 cur_dentry = get_dentry_child_with_utf16le_name(parent_dentry, p,
778                                                                 (void*)pp - (void*)p);
779                 if (cur_dentry == NULL)
780                         break;
781                 p = pp;
782                 parent_dentry = cur_dentry;
783         }
784         if (cur_dentry == NULL) {
785                 if (dentry_is_directory(parent_dentry))
786                         errno = ENOENT;
787                 else
788                         errno = ENOTDIR;
789         }
790         return cur_dentry;
791 }
792
793 /* Returns the dentry corresponding to the @path, or NULL if there is no such
794  * dentry. */
795 struct wim_dentry *
796 get_dentry(WIMStruct *wim, const tchar *path)
797 {
798 #if TCHAR_IS_UTF16LE
799         return get_dentry_utf16le(wim, path);
800 #else
801         utf16lechar *path_utf16le;
802         size_t path_utf16le_nbytes;
803         int ret;
804         struct wim_dentry *dentry;
805
806         ret = tstr_to_utf16le(path, tstrlen(path) * sizeof(tchar),
807                               &path_utf16le, &path_utf16le_nbytes);
808         if (ret)
809                 return NULL;
810         dentry = get_dentry_utf16le(wim, path_utf16le);
811         FREE(path_utf16le);
812         return dentry;
813 #endif
814 }
815
816 struct wim_inode *
817 wim_pathname_to_inode(WIMStruct *wim, const tchar *path)
818 {
819         struct wim_dentry *dentry;
820         dentry = get_dentry(wim, path);
821         if (dentry)
822                 return dentry->d_inode;
823         else
824                 return NULL;
825 }
826
827 /* Takes in a path of length @len in @buf, and transforms it into a string for
828  * the path of its parent directory. */
829 static void
830 to_parent_name(tchar *buf, size_t len)
831 {
832         ssize_t i = (ssize_t)len - 1;
833         while (i >= 0 && buf[i] == WIM_PATH_SEPARATOR)
834                 i--;
835         while (i >= 0 && buf[i] != WIM_PATH_SEPARATOR)
836                 i--;
837         while (i >= 0 && buf[i] == WIM_PATH_SEPARATOR)
838                 i--;
839         buf[i + 1] = T('\0');
840 }
841
842 /* Returns the dentry that corresponds to the parent directory of @path, or NULL
843  * if the dentry is not found. */
844 struct wim_dentry *
845 get_parent_dentry(WIMStruct *wim, const tchar *path)
846 {
847         size_t path_len = tstrlen(path);
848         tchar buf[path_len + 1];
849
850         tmemcpy(buf, path, path_len + 1);
851         to_parent_name(buf, path_len);
852         return get_dentry(wim, buf);
853 }
854
855 /* Prints the full path of a dentry. */
856 int
857 print_dentry_full_path(struct wim_dentry *dentry, void *_ignore)
858 {
859         int ret = calculate_dentry_full_path(dentry);
860         if (ret)
861                 return ret;
862         tprintf(T("%"TS"\n"), dentry->_full_path);
863         return 0;
864 }
865
866 /* We want to be able to show the names of the file attribute flags that are
867  * set. */
868 struct file_attr_flag {
869         u32 flag;
870         const tchar *name;
871 };
872 struct file_attr_flag file_attr_flags[] = {
873         {FILE_ATTRIBUTE_READONLY,           T("READONLY")},
874         {FILE_ATTRIBUTE_HIDDEN,             T("HIDDEN")},
875         {FILE_ATTRIBUTE_SYSTEM,             T("SYSTEM")},
876         {FILE_ATTRIBUTE_DIRECTORY,          T("DIRECTORY")},
877         {FILE_ATTRIBUTE_ARCHIVE,            T("ARCHIVE")},
878         {FILE_ATTRIBUTE_DEVICE,             T("DEVICE")},
879         {FILE_ATTRIBUTE_NORMAL,             T("NORMAL")},
880         {FILE_ATTRIBUTE_TEMPORARY,          T("TEMPORARY")},
881         {FILE_ATTRIBUTE_SPARSE_FILE,        T("SPARSE_FILE")},
882         {FILE_ATTRIBUTE_REPARSE_POINT,      T("REPARSE_POINT")},
883         {FILE_ATTRIBUTE_COMPRESSED,         T("COMPRESSED")},
884         {FILE_ATTRIBUTE_OFFLINE,            T("OFFLINE")},
885         {FILE_ATTRIBUTE_NOT_CONTENT_INDEXED,T("NOT_CONTENT_INDEXED")},
886         {FILE_ATTRIBUTE_ENCRYPTED,          T("ENCRYPTED")},
887         {FILE_ATTRIBUTE_VIRTUAL,            T("VIRTUAL")},
888 };
889
890 /* Prints a directory entry.  @lookup_table is a pointer to the lookup table, if
891  * available.  If the dentry is unresolved and the lookup table is NULL, the
892  * lookup table entries will not be printed.  Otherwise, they will be. */
893 int
894 print_dentry(struct wim_dentry *dentry, void *lookup_table)
895 {
896         const u8 *hash;
897         struct wim_lookup_table_entry *lte;
898         const struct wim_inode *inode = dentry->d_inode;
899         tchar buf[50];
900
901         tprintf(T("[DENTRY]\n"));
902         tprintf(T("Length            = %"PRIu64"\n"), dentry->length);
903         tprintf(T("Attributes        = 0x%x\n"), inode->i_attributes);
904         for (size_t i = 0; i < ARRAY_LEN(file_attr_flags); i++)
905                 if (file_attr_flags[i].flag & inode->i_attributes)
906                         tprintf(T("    FILE_ATTRIBUTE_%"TS" is set\n"),
907                                 file_attr_flags[i].name);
908         tprintf(T("Security ID       = %d\n"), inode->i_security_id);
909         tprintf(T("Subdir offset     = %"PRIu64"\n"), dentry->subdir_offset);
910
911         wim_timestamp_to_str(inode->i_creation_time, buf, sizeof(buf));
912         tprintf(T("Creation Time     = %"TS"\n"), buf);
913
914         wim_timestamp_to_str(inode->i_last_access_time, buf, sizeof(buf));
915         tprintf(T("Last Access Time  = %"TS"\n"), buf);
916
917         wim_timestamp_to_str(inode->i_last_write_time, buf, sizeof(buf));
918         tprintf(T("Last Write Time   = %"TS"\n"), buf);
919
920         if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
921                 tprintf(T("Reparse Tag       = 0x%"PRIx32"\n"), inode->i_reparse_tag);
922                 tprintf(T("Reparse Point Flags = 0x%"PRIx16"\n"),
923                         inode->i_not_rpfixed);
924                 tprintf(T("Reparse Point Unknown 2 = 0x%"PRIx32"\n"),
925                         inode->i_rp_unknown_2);
926         }
927         tprintf(T("Reparse Point Unknown 1 = 0x%"PRIx32"\n"),
928                 inode->i_rp_unknown_1);
929         tprintf(T("Hard Link Group   = 0x%"PRIx64"\n"), inode->i_ino);
930         tprintf(T("Hard Link Group Size = %"PRIu32"\n"), inode->i_nlink);
931         tprintf(T("Number of Alternate Data Streams = %hu\n"), inode->i_num_ads);
932         if (dentry_has_long_name(dentry))
933                 wimlib_printf(T("Filename = \"%"WS"\"\n"), dentry->file_name);
934         if (dentry_has_short_name(dentry))
935                 wimlib_printf(T("Short Name \"%"WS"\"\n"), dentry->short_name);
936         if (dentry->_full_path)
937                 tprintf(T("Full Path = \"%"TS"\"\n"), dentry->_full_path);
938
939         lte = inode_stream_lte(dentry->d_inode, 0, lookup_table);
940         if (lte) {
941                 print_lookup_table_entry(lte, stdout);
942         } else {
943                 hash = inode_stream_hash(inode, 0);
944                 if (hash) {
945                         tprintf(T("Hash              = 0x"));
946                         print_hash(hash, stdout);
947                         tputchar(T('\n'));
948                         tputchar(T('\n'));
949                 }
950         }
951         for (u16 i = 0; i < inode->i_num_ads; i++) {
952                 tprintf(T("[Alternate Stream Entry %u]\n"), i);
953                 wimlib_printf(T("Name = \"%"WS"\"\n"),
954                               inode->i_ads_entries[i].stream_name);
955                 tprintf(T("Name Length (UTF16 bytes) = %hu\n"),
956                        inode->i_ads_entries[i].stream_name_nbytes);
957                 hash = inode_stream_hash(inode, i + 1);
958                 if (hash) {
959                         tprintf(T("Hash              = 0x"));
960                         print_hash(hash, stdout);
961                         tputchar(T('\n'));
962                 }
963                 print_lookup_table_entry(inode_stream_lte(inode, i + 1, lookup_table),
964                                          stdout);
965         }
966         return 0;
967 }
968
969 /* Initializations done on every `struct wim_dentry'. */
970 static void
971 dentry_common_init(struct wim_dentry *dentry)
972 {
973         memset(dentry, 0, sizeof(struct wim_dentry));
974 }
975
976 struct wim_inode *
977 new_timeless_inode(void)
978 {
979         struct wim_inode *inode = CALLOC(1, sizeof(struct wim_inode));
980         if (inode) {
981                 inode->i_security_id = -1;
982                 inode->i_nlink = 1;
983                 inode->i_next_stream_id = 1;
984                 inode->i_not_rpfixed = 1;
985                 INIT_LIST_HEAD(&inode->i_list);
986         #ifdef WITH_FUSE
987                 if (pthread_mutex_init(&inode->i_mutex, NULL) != 0) {
988                         ERROR_WITH_ERRNO("Error initializing mutex");
989                         FREE(inode);
990                         return NULL;
991                 }
992         #endif
993                 INIT_LIST_HEAD(&inode->i_dentry);
994         }
995         return inode;
996 }
997
998 static struct wim_inode *
999 new_inode(void)
1000 {
1001         struct wim_inode *inode = new_timeless_inode();
1002         if (inode) {
1003                 u64 now = get_wim_timestamp();
1004                 inode->i_creation_time = now;
1005                 inode->i_last_access_time = now;
1006                 inode->i_last_write_time = now;
1007         }
1008         return inode;
1009 }
1010
1011 /* Creates an unlinked directory entry. */
1012 int
1013 new_dentry(const tchar *name, struct wim_dentry **dentry_ret)
1014 {
1015         struct wim_dentry *dentry;
1016         int ret;
1017
1018         dentry = MALLOC(sizeof(struct wim_dentry));
1019         if (!dentry)
1020                 return WIMLIB_ERR_NOMEM;
1021
1022         dentry_common_init(dentry);
1023         ret = set_dentry_name(dentry, name);
1024         if (ret == 0) {
1025                 dentry->parent = dentry;
1026                 *dentry_ret = dentry;
1027         } else {
1028                 FREE(dentry);
1029                 ERROR("Failed to set name on new dentry with name \"%"TS"\"",
1030                       name);
1031         }
1032         return ret;
1033 }
1034
1035
1036 static int
1037 _new_dentry_with_inode(const tchar *name, struct wim_dentry **dentry_ret,
1038                         bool timeless)
1039 {
1040         struct wim_dentry *dentry;
1041         int ret;
1042
1043         ret = new_dentry(name, &dentry);
1044         if (ret)
1045                 return ret;
1046
1047         if (timeless)
1048                 dentry->d_inode = new_timeless_inode();
1049         else
1050                 dentry->d_inode = new_inode();
1051         if (!dentry->d_inode) {
1052                 free_dentry(dentry);
1053                 return WIMLIB_ERR_NOMEM;
1054         }
1055
1056         inode_add_dentry(dentry, dentry->d_inode);
1057         *dentry_ret = dentry;
1058         return 0;
1059 }
1060
1061 int
1062 new_dentry_with_timeless_inode(const tchar *name, struct wim_dentry **dentry_ret)
1063 {
1064         return _new_dentry_with_inode(name, dentry_ret, true);
1065 }
1066
1067 int
1068 new_dentry_with_inode(const tchar *name, struct wim_dentry **dentry_ret)
1069 {
1070         return _new_dentry_with_inode(name, dentry_ret, false);
1071 }
1072
1073 int
1074 new_filler_directory(const tchar *name, struct wim_dentry **dentry_ret)
1075 {
1076         int ret;
1077         struct wim_dentry *dentry;
1078
1079         DEBUG("Creating filler directory \"%"TS"\"", name);
1080         ret = new_dentry_with_inode(name, &dentry);
1081         if (ret)
1082                 return ret;
1083         /* Leave the inode number as 0; this is allowed for non
1084          * hard-linked files. */
1085         dentry->d_inode->i_resolved = 1;
1086         dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
1087         *dentry_ret = dentry;
1088         return 0;
1089 }
1090
1091 static int
1092 init_ads_entry(struct wim_ads_entry *ads_entry, const void *name,
1093                size_t name_nbytes, bool is_utf16le)
1094 {
1095         int ret = 0;
1096         memset(ads_entry, 0, sizeof(*ads_entry));
1097
1098         if (is_utf16le) {
1099                 utf16lechar *p = MALLOC(name_nbytes + sizeof(utf16lechar));
1100                 if (!p)
1101                         return WIMLIB_ERR_NOMEM;
1102                 memcpy(p, name, name_nbytes);
1103                 p[name_nbytes / 2] = cpu_to_le16(0);
1104                 ads_entry->stream_name = p;
1105                 ads_entry->stream_name_nbytes = name_nbytes;
1106         } else {
1107                 if (name && *(const tchar*)name != T('\0')) {
1108                         ret = get_utf16le_name(name, &ads_entry->stream_name,
1109                                                &ads_entry->stream_name_nbytes);
1110                 }
1111         }
1112         return ret;
1113 }
1114
1115 static void
1116 destroy_ads_entry(struct wim_ads_entry *ads_entry)
1117 {
1118         FREE(ads_entry->stream_name);
1119 }
1120
1121 /* Frees an inode. */
1122 void
1123 free_inode(struct wim_inode *inode)
1124 {
1125         if (inode) {
1126                 if (inode->i_ads_entries) {
1127                         for (u16 i = 0; i < inode->i_num_ads; i++)
1128                                 destroy_ads_entry(&inode->i_ads_entries[i]);
1129                         FREE(inode->i_ads_entries);
1130                 }
1131         #ifdef WITH_FUSE
1132                 wimlib_assert(inode->i_num_opened_fds == 0);
1133                 FREE(inode->i_fds);
1134                 pthread_mutex_destroy(&inode->i_mutex);
1135         #endif
1136                 /* HACK: This may instead delete the inode from i_list, but the
1137                  * hlist_del() behaves the same as list_del(). */
1138                 if (!hlist_unhashed(&inode->i_hlist))
1139                         hlist_del(&inode->i_hlist);
1140                 FREE(inode);
1141         }
1142 }
1143
1144 /* Decrements link count on an inode and frees it if the link count reaches 0.
1145  * */
1146 static void
1147 put_inode(struct wim_inode *inode)
1148 {
1149         wimlib_assert(inode->i_nlink != 0);
1150         if (--inode->i_nlink == 0) {
1151         #ifdef WITH_FUSE
1152                 if (inode->i_num_opened_fds == 0)
1153         #endif
1154                 {
1155                         free_inode(inode);
1156                 }
1157         }
1158 }
1159
1160 /* Frees a WIM dentry.
1161  *
1162  * The corresponding inode (if any) is freed only if its link count is
1163  * decremented to 0.
1164  */
1165 void
1166 free_dentry(struct wim_dentry *dentry)
1167 {
1168         if (dentry) {
1169                 FREE(dentry->file_name);
1170                 FREE(dentry->short_name);
1171                 FREE(dentry->_full_path);
1172                 if (dentry->d_inode)
1173                         put_inode(dentry->d_inode);
1174                 FREE(dentry);
1175         }
1176 }
1177
1178 /* This function is passed as an argument to for_dentry_in_tree_depth() in order
1179  * to free a directory tree. */
1180 static int
1181 do_free_dentry(struct wim_dentry *dentry, void *_lookup_table)
1182 {
1183         struct wim_lookup_table *lookup_table = _lookup_table;
1184
1185         if (lookup_table) {
1186                 struct wim_inode *inode = dentry->d_inode;
1187                 for (unsigned i = 0; i <= inode->i_num_ads; i++) {
1188                         struct wim_lookup_table_entry *lte;
1189
1190                         lte = inode_stream_lte(inode, i, lookup_table);
1191                         if (lte)
1192                                 lte_decrement_refcnt(lte, lookup_table);
1193                 }
1194         }
1195         free_dentry(dentry);
1196         return 0;
1197 }
1198
1199 /*
1200  * Unlinks and frees a dentry tree.
1201  *
1202  * @root:               The root of the tree.
1203  * @lookup_table:       The lookup table for dentries.  If non-NULL, the
1204  *                      reference counts in the lookup table for the lookup
1205  *                      table entries corresponding to the dentries will be
1206  *                      decremented.
1207  */
1208 void
1209 free_dentry_tree(struct wim_dentry *root, struct wim_lookup_table *lookup_table)
1210 {
1211         for_dentry_in_tree_depth(root, do_free_dentry, lookup_table);
1212 }
1213
1214 #ifdef __WIN32__
1215
1216 /* Insert a dentry into the case insensitive index for a directory.
1217  *
1218  * This is a red-black tree, but when multiple dentries share the same
1219  * case-insensitive name, only one is inserted into the tree itself; the rest
1220  * are connected in a list.
1221  */
1222 static struct wim_dentry *
1223 dentry_add_child_case_insensitive(struct wim_dentry *parent,
1224                                   struct wim_dentry *child)
1225 {
1226         struct rb_root *root;
1227         struct rb_node **new;
1228         struct rb_node *rb_parent;
1229
1230         root = &parent->d_inode->i_children_case_insensitive;
1231         new = &root->rb_node;
1232         rb_parent = NULL;
1233         while (*new) {
1234                 struct wim_dentry *this = container_of(*new, struct wim_dentry,
1235                                                        rb_node_case_insensitive);
1236                 int result = dentry_compare_names_case_insensitive(child, this);
1237
1238                 rb_parent = *new;
1239
1240                 if (result < 0)
1241                         new = &((*new)->rb_left);
1242                 else if (result > 0)
1243                         new = &((*new)->rb_right);
1244                 else
1245                         return this;
1246         }
1247         rb_link_node(&child->rb_node_case_insensitive, rb_parent, new);
1248         rb_insert_color(&child->rb_node_case_insensitive, root);
1249         return NULL;
1250 }
1251 #endif
1252
1253 /*
1254  * Links a dentry into the directory tree.
1255  *
1256  * @parent: The dentry that will be the parent of @child.
1257  * @child: The dentry to link.
1258  *
1259  * Returns NULL if successful.  If @parent already contains a dentry with the
1260  * same case-sensitive name as @child, the pointer to this duplicate dentry is
1261  * returned.
1262  */
1263 struct wim_dentry *
1264 dentry_add_child(struct wim_dentry * restrict parent,
1265                  struct wim_dentry * restrict child)
1266 {
1267         struct rb_root *root;
1268         struct rb_node **new;
1269         struct rb_node *rb_parent;
1270
1271         wimlib_assert(dentry_is_directory(parent));
1272         wimlib_assert(parent != child);
1273
1274         /* Case sensitive child dentry index */
1275         root = &parent->d_inode->i_children;
1276         new = &root->rb_node;
1277         rb_parent = NULL;
1278         while (*new) {
1279                 struct wim_dentry *this = rbnode_dentry(*new);
1280                 int result = dentry_compare_names_case_sensitive(child, this);
1281
1282                 rb_parent = *new;
1283
1284                 if (result < 0)
1285                         new = &((*new)->rb_left);
1286                 else if (result > 0)
1287                         new = &((*new)->rb_right);
1288                 else
1289                         return this;
1290         }
1291         child->parent = parent;
1292         rb_link_node(&child->rb_node, rb_parent, new);
1293         rb_insert_color(&child->rb_node, root);
1294
1295 #ifdef __WIN32__
1296         {
1297                 struct wim_dentry *existing;
1298                 existing = dentry_add_child_case_insensitive(parent, child);
1299                 if (existing) {
1300                         list_add(&child->case_insensitive_conflict_list,
1301                                  &existing->case_insensitive_conflict_list);
1302                         child->rb_node_case_insensitive.__rb_parent_color = 0;
1303                 } else {
1304                         INIT_LIST_HEAD(&child->case_insensitive_conflict_list);
1305                 }
1306         }
1307 #endif
1308         return NULL;
1309 }
1310
1311 /* Unlink a WIM dentry from the directory entry tree. */
1312 void
1313 unlink_dentry(struct wim_dentry *dentry)
1314 {
1315         struct wim_dentry *parent = dentry->parent;
1316
1317         if (parent == dentry)
1318                 return;
1319         rb_erase(&dentry->rb_node, &parent->d_inode->i_children);
1320 #ifdef __WIN32__
1321         if (dentry->rb_node_case_insensitive.__rb_parent_color) {
1322                 /* This dentry was in the case-insensitive red-black tree. */
1323                 rb_erase(&dentry->rb_node_case_insensitive,
1324                          &parent->d_inode->i_children_case_insensitive);
1325                 if (!list_empty(&dentry->case_insensitive_conflict_list)) {
1326                         /* Make a different case-insensitively-the-same dentry
1327                          * be the "representative" in the red-black tree. */
1328                         struct list_head *next;
1329                         struct wim_dentry *other;
1330                         struct wim_dentry *existing;
1331
1332                         next = dentry->case_insensitive_conflict_list.next;
1333                         other = list_entry(next, struct wim_dentry, case_insensitive_conflict_list);
1334                         existing = dentry_add_child_case_insensitive(parent, other);
1335                         wimlib_assert(existing == NULL);
1336                 }
1337         }
1338         list_del(&dentry->case_insensitive_conflict_list);
1339 #endif
1340 }
1341
1342 /*
1343  * Returns the alternate data stream entry belonging to @inode that has the
1344  * stream name @stream_name.
1345  */
1346 struct wim_ads_entry *
1347 inode_get_ads_entry(struct wim_inode *inode, const tchar *stream_name,
1348                     u16 *idx_ret)
1349 {
1350         if (inode->i_num_ads == 0) {
1351                 return NULL;
1352         } else {
1353                 size_t stream_name_utf16le_nbytes;
1354                 u16 i;
1355                 struct wim_ads_entry *result;
1356
1357         #if TCHAR_IS_UTF16LE
1358                 const utf16lechar *stream_name_utf16le;
1359
1360                 stream_name_utf16le = stream_name;
1361                 stream_name_utf16le_nbytes = tstrlen(stream_name) * sizeof(tchar);
1362         #else
1363                 utf16lechar *stream_name_utf16le;
1364
1365                 {
1366                         int ret = tstr_to_utf16le(stream_name,
1367                                                   tstrlen(stream_name) *
1368                                                       sizeof(tchar),
1369                                                   &stream_name_utf16le,
1370                                                   &stream_name_utf16le_nbytes);
1371                         if (ret)
1372                                 return NULL;
1373                 }
1374         #endif
1375                 i = 0;
1376                 result = NULL;
1377                 do {
1378                         if (ads_entry_has_name(&inode->i_ads_entries[i],
1379                                                stream_name_utf16le,
1380                                                stream_name_utf16le_nbytes))
1381                         {
1382                                 if (idx_ret)
1383                                         *idx_ret = i;
1384                                 result = &inode->i_ads_entries[i];
1385                                 break;
1386                         }
1387                 } while (++i != inode->i_num_ads);
1388         #if !TCHAR_IS_UTF16LE
1389                 FREE(stream_name_utf16le);
1390         #endif
1391                 return result;
1392         }
1393 }
1394
1395 static struct wim_ads_entry *
1396 do_inode_add_ads(struct wim_inode *inode, const void *stream_name,
1397                  size_t stream_name_nbytes, bool is_utf16le)
1398 {
1399         u16 num_ads;
1400         struct wim_ads_entry *ads_entries;
1401         struct wim_ads_entry *new_entry;
1402
1403         if (inode->i_num_ads >= 0xfffe) {
1404                 ERROR("Too many alternate data streams in one inode!");
1405                 return NULL;
1406         }
1407         num_ads = inode->i_num_ads + 1;
1408         ads_entries = REALLOC(inode->i_ads_entries,
1409                               num_ads * sizeof(inode->i_ads_entries[0]));
1410         if (!ads_entries) {
1411                 ERROR("Failed to allocate memory for new alternate data stream");
1412                 return NULL;
1413         }
1414         inode->i_ads_entries = ads_entries;
1415
1416         new_entry = &inode->i_ads_entries[num_ads - 1];
1417         if (init_ads_entry(new_entry, stream_name, stream_name_nbytes, is_utf16le))
1418                 return NULL;
1419         new_entry->stream_id = inode->i_next_stream_id++;
1420         inode->i_num_ads = num_ads;
1421         return new_entry;
1422 }
1423
1424 struct wim_ads_entry *
1425 inode_add_ads_utf16le(struct wim_inode *inode,
1426                       const utf16lechar *stream_name,
1427                       size_t stream_name_nbytes)
1428 {
1429         DEBUG("Add alternate data stream \"%"WS"\"", stream_name);
1430         return do_inode_add_ads(inode, stream_name, stream_name_nbytes, true);
1431 }
1432
1433 /*
1434  * Add an alternate stream entry to a WIM inode and return a pointer to it, or
1435  * NULL if memory could not be allocated.
1436  */
1437 struct wim_ads_entry *
1438 inode_add_ads(struct wim_inode *inode, const tchar *stream_name)
1439 {
1440         DEBUG("Add alternate data stream \"%"TS"\"", stream_name);
1441         return do_inode_add_ads(inode, stream_name,
1442                                 tstrlen(stream_name) * sizeof(tchar),
1443                                 TCHAR_IS_UTF16LE);
1444 }
1445
1446 static struct wim_lookup_table_entry *
1447 add_stream_from_data_buffer(const void *buffer, size_t size,
1448                             struct wim_lookup_table *lookup_table)
1449 {
1450         u8 hash[SHA1_HASH_SIZE];
1451         struct wim_lookup_table_entry *lte, *existing_lte;
1452
1453         sha1_buffer(buffer, size, hash);
1454         existing_lte = __lookup_resource(lookup_table, hash);
1455         if (existing_lte) {
1456                 wimlib_assert(wim_resource_size(existing_lte) == size);
1457                 lte = existing_lte;
1458                 lte->refcnt++;
1459         } else {
1460                 void *buffer_copy;
1461                 lte = new_lookup_table_entry();
1462                 if (!lte)
1463                         return NULL;
1464                 buffer_copy = memdup(buffer, size);
1465                 if (!buffer_copy) {
1466                         free_lookup_table_entry(lte);
1467                         return NULL;
1468                 }
1469                 lte->resource_location            = RESOURCE_IN_ATTACHED_BUFFER;
1470                 lte->attached_buffer              = buffer_copy;
1471                 lte->resource_entry.original_size = size;
1472                 copy_hash(lte->hash, hash);
1473                 lookup_table_insert(lookup_table, lte);
1474         }
1475         return lte;
1476 }
1477
1478 int
1479 inode_add_ads_with_data(struct wim_inode *inode, const tchar *name,
1480                         const void *value, size_t size,
1481                         struct wim_lookup_table *lookup_table)
1482 {
1483         struct wim_ads_entry *new_ads_entry;
1484
1485         wimlib_assert(inode->i_resolved);
1486
1487         new_ads_entry = inode_add_ads(inode, name);
1488         if (!new_ads_entry)
1489                 return WIMLIB_ERR_NOMEM;
1490
1491         new_ads_entry->lte = add_stream_from_data_buffer(value, size,
1492                                                          lookup_table);
1493         if (!new_ads_entry->lte) {
1494                 inode_remove_ads(inode, new_ads_entry - inode->i_ads_entries,
1495                                  lookup_table);
1496                 return WIMLIB_ERR_NOMEM;
1497         }
1498         return 0;
1499 }
1500
1501 /* Set the unnamed stream of a WIM inode, given a data buffer containing the
1502  * stream contents. */
1503 int
1504 inode_set_unnamed_stream(struct wim_inode *inode, const void *data, size_t len,
1505                          struct wim_lookup_table *lookup_table)
1506 {
1507         inode->i_lte = add_stream_from_data_buffer(data, len, lookup_table);
1508         if (!inode->i_lte)
1509                 return WIMLIB_ERR_NOMEM;
1510         inode->i_resolved = 1;
1511         return 0;
1512 }
1513
1514 /* Remove an alternate data stream from a WIM inode  */
1515 void
1516 inode_remove_ads(struct wim_inode *inode, u16 idx,
1517                  struct wim_lookup_table *lookup_table)
1518 {
1519         struct wim_ads_entry *ads_entry;
1520         struct wim_lookup_table_entry *lte;
1521
1522         wimlib_assert(idx < inode->i_num_ads);
1523         wimlib_assert(inode->i_resolved);
1524
1525         ads_entry = &inode->i_ads_entries[idx];
1526
1527         DEBUG("Remove alternate data stream \"%"WS"\"", ads_entry->stream_name);
1528
1529         lte = ads_entry->lte;
1530         if (lte)
1531                 lte_decrement_refcnt(lte, lookup_table);
1532
1533         destroy_ads_entry(ads_entry);
1534
1535         memmove(&inode->i_ads_entries[idx],
1536                 &inode->i_ads_entries[idx + 1],
1537                 (inode->i_num_ads - idx - 1) * sizeof(inode->i_ads_entries[0]));
1538         inode->i_num_ads--;
1539 }
1540
1541 #ifndef __WIN32__
1542 int
1543 inode_get_unix_data(const struct wim_inode *inode,
1544                     struct wimlib_unix_data *unix_data,
1545                     u16 *stream_idx_ret)
1546 {
1547         const struct wim_ads_entry *ads_entry;
1548         const struct wim_lookup_table_entry *lte;
1549         size_t size;
1550         int ret;
1551
1552         wimlib_assert(inode->i_resolved);
1553
1554         ads_entry = inode_get_ads_entry((struct wim_inode*)inode,
1555                                         WIMLIB_UNIX_DATA_TAG, NULL);
1556         if (!ads_entry)
1557                 return NO_UNIX_DATA;
1558
1559         if (stream_idx_ret)
1560                 *stream_idx_ret = ads_entry - inode->i_ads_entries;
1561
1562         lte = ads_entry->lte;
1563         if (!lte)
1564                 return NO_UNIX_DATA;
1565
1566         size = wim_resource_size(lte);
1567         if (size != sizeof(struct wimlib_unix_data))
1568                 return BAD_UNIX_DATA;
1569
1570         ret = read_full_resource_into_buf(lte, unix_data);
1571         if (ret)
1572                 return ret;
1573
1574         if (unix_data->version != 0)
1575                 return BAD_UNIX_DATA;
1576         return 0;
1577 }
1578
1579 int
1580 inode_set_unix_data(struct wim_inode *inode, uid_t uid, gid_t gid, mode_t mode,
1581                     struct wim_lookup_table *lookup_table, int which)
1582 {
1583         struct wimlib_unix_data unix_data;
1584         int ret;
1585         bool have_good_unix_data = false;
1586         bool have_unix_data = false;
1587         u16 stream_idx;
1588
1589         if (!(which & UNIX_DATA_CREATE)) {
1590                 ret = inode_get_unix_data(inode, &unix_data, &stream_idx);
1591                 if (ret == 0 || ret == BAD_UNIX_DATA || ret > 0)
1592                         have_unix_data = true;
1593                 if (ret == 0)
1594                         have_good_unix_data = true;
1595         }
1596         unix_data.version = 0;
1597         if (which & UNIX_DATA_UID || !have_good_unix_data)
1598                 unix_data.uid = uid;
1599         if (which & UNIX_DATA_GID || !have_good_unix_data)
1600                 unix_data.gid = gid;
1601         if (which & UNIX_DATA_MODE || !have_good_unix_data)
1602                 unix_data.mode = mode;
1603         ret = inode_add_ads_with_data(inode, WIMLIB_UNIX_DATA_TAG,
1604                                       &unix_data,
1605                                       sizeof(struct wimlib_unix_data),
1606                                       lookup_table);
1607         if (ret == 0 && have_unix_data)
1608                 inode_remove_ads(inode, stream_idx, lookup_table);
1609         return ret;
1610 }
1611 #endif /* !__WIN32__ */
1612
1613 /*
1614  * Reads the alternate data stream entries of a WIM dentry.
1615  *
1616  * @p:  Pointer to buffer that starts with the first alternate stream entry.
1617  *
1618  * @inode:      Inode to load the alternate data streams into.
1619  *              @inode->i_num_ads must have been set to the number of
1620  *              alternate data streams that are expected.
1621  *
1622  * @remaining_size:     Number of bytes of data remaining in the buffer pointed
1623  *                      to by @p.
1624  *
1625  *
1626  * Return 0 on success or nonzero on failure.  On success, inode->i_ads_entries
1627  * is set to an array of `struct wim_ads_entry's of length inode->i_num_ads.  On
1628  * failure, @inode is not modified.
1629  */
1630 static int
1631 read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode,
1632                  size_t nbytes_remaining)
1633 {
1634         u16 num_ads;
1635         struct wim_ads_entry *ads_entries;
1636         int ret;
1637
1638         BUILD_BUG_ON(sizeof(struct wim_ads_entry_on_disk) != WIM_ADS_ENTRY_DISK_SIZE);
1639
1640         /* Allocate an array for our in-memory representation of the alternate
1641          * data stream entries. */
1642         num_ads = inode->i_num_ads;
1643         ads_entries = CALLOC(num_ads, sizeof(inode->i_ads_entries[0]));
1644         if (!ads_entries)
1645                 goto out_of_memory;
1646
1647         /* Read the entries into our newly allocated buffer. */
1648         for (u16 i = 0; i < num_ads; i++) {
1649                 u64 length;
1650                 struct wim_ads_entry *cur_entry;
1651                 const struct wim_ads_entry_on_disk *disk_entry =
1652                         (const struct wim_ads_entry_on_disk*)p;
1653
1654                 cur_entry = &ads_entries[i];
1655                 ads_entries[i].stream_id = i + 1;
1656
1657                 /* Do we have at least the size of the fixed-length data we know
1658                  * need? */
1659                 if (nbytes_remaining < sizeof(struct wim_ads_entry_on_disk))
1660                         goto out_invalid;
1661
1662                 /* Read the length field */
1663                 length = le64_to_cpu(disk_entry->length);
1664
1665                 /* Make sure the length field is neither so small it doesn't
1666                  * include all the fixed-length data nor so large it overflows
1667                  * the metadata resource buffer. */
1668                 if (length < sizeof(struct wim_ads_entry_on_disk) ||
1669                     length > nbytes_remaining)
1670                         goto out_invalid;
1671
1672                 /* Read the rest of the fixed-length data. */
1673
1674                 cur_entry->reserved = le64_to_cpu(disk_entry->reserved);
1675                 copy_hash(cur_entry->hash, disk_entry->hash);
1676                 cur_entry->stream_name_nbytes = le16_to_cpu(disk_entry->stream_name_nbytes);
1677
1678                 /* If stream_name_nbytes != 0, this is a named stream.
1679                  * Otherwise this is an unnamed stream, or in some cases (bugs
1680                  * in Microsoft's software I guess) a meaningless entry
1681                  * distinguished from the real unnamed stream entry, if any, by
1682                  * the fact that the real unnamed stream entry has a nonzero
1683                  * hash field. */
1684                 if (cur_entry->stream_name_nbytes) {
1685                         /* The name is encoded in UTF16-LE, which uses 2-byte
1686                          * coding units, so the length of the name had better be
1687                          * an even number of bytes... */
1688                         if (cur_entry->stream_name_nbytes & 1)
1689                                 goto out_invalid;
1690
1691                         /* Add the length of the stream name to get the length
1692                          * we actually need to read.  Make sure this isn't more
1693                          * than the specified length of the entry. */
1694                         if (sizeof(struct wim_ads_entry_on_disk) +
1695                             cur_entry->stream_name_nbytes > length)
1696                                 goto out_invalid;
1697
1698                         cur_entry->stream_name = MALLOC(cur_entry->stream_name_nbytes + 2);
1699                         if (!cur_entry->stream_name)
1700                                 goto out_of_memory;
1701
1702                         memcpy(cur_entry->stream_name,
1703                                disk_entry->stream_name,
1704                                cur_entry->stream_name_nbytes);
1705                         cur_entry->stream_name[cur_entry->stream_name_nbytes / 2] = cpu_to_le16(0);
1706                 }
1707
1708                 /* It's expected that the size of every ADS entry is a multiple
1709                  * of 8.  However, to be safe, I'm allowing the possibility of
1710                  * an ADS entry at the very end of the metadata resource ending
1711                  * un-aligned.  So although we still need to increment the input
1712                  * pointer by @length to reach the next ADS entry, it's possible
1713                  * that less than @length is actually remaining in the metadata
1714                  * resource. We should set the remaining bytes to 0 if this
1715                  * happens. */
1716                 length = (length + 7) & ~(u64)7;
1717                 p += length;
1718                 if (nbytes_remaining < length)
1719                         nbytes_remaining = 0;
1720                 else
1721                         nbytes_remaining -= length;
1722         }
1723         inode->i_ads_entries = ads_entries;
1724         inode->i_next_stream_id = inode->i_num_ads + 1;
1725         ret = 0;
1726         goto out;
1727 out_of_memory:
1728         ret = WIMLIB_ERR_NOMEM;
1729         goto out_free_ads_entries;
1730 out_invalid:
1731         ERROR("An alternate data stream entry is invalid");
1732         ret = WIMLIB_ERR_INVALID_DENTRY;
1733 out_free_ads_entries:
1734         if (ads_entries) {
1735                 for (u16 i = 0; i < num_ads; i++)
1736                         destroy_ads_entry(&ads_entries[i]);
1737                 FREE(ads_entries);
1738         }
1739 out:
1740         return ret;
1741 }
1742
1743 /*
1744  * Reads a WIM directory entry, including all alternate data stream entries that
1745  * follow it, from the WIM image's metadata resource.
1746  *
1747  * @metadata_resource:
1748  *              Pointer to the metadata resource buffer.
1749  *
1750  * @metadata_resource_len:
1751  *              Length of the metadata resource buffer, in bytes.
1752  *
1753  * @offset:     Offset of the dentry within the metadata resource.
1754  *
1755  * @dentry:     A `struct wim_dentry' that will be filled in by this function.
1756  *
1757  * Return 0 on success or nonzero on failure.  On failure, @dentry will have
1758  * been modified, but it will not be left with pointers to any allocated
1759  * buffers.  On success, the dentry->length field must be examined.  If zero,
1760  * this was a special "end of directory" dentry and not a real dentry.  If
1761  * nonzero, this was a real dentry.
1762  *
1763  * Possible errors include:
1764  *      WIMLIB_ERR_NOMEM
1765  *      WIMLIB_ERR_INVALID_DENTRY
1766  */
1767 int
1768 read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len,
1769             u64 offset, struct wim_dentry * restrict dentry)
1770 {
1771
1772         u64 calculated_size;
1773         utf16lechar *file_name;
1774         utf16lechar *short_name;
1775         u16 short_name_nbytes;
1776         u16 file_name_nbytes;
1777         int ret;
1778         struct wim_inode *inode;
1779         const u8 *p = &metadata_resource[offset];
1780         const struct wim_dentry_on_disk *disk_dentry =
1781                         (const struct wim_dentry_on_disk*)p;
1782
1783         BUILD_BUG_ON(sizeof(struct wim_dentry_on_disk) != WIM_DENTRY_DISK_SIZE);
1784
1785         if ((uintptr_t)p & 7)
1786                 WARNING("WIM dentry is not 8-byte aligned");
1787
1788         dentry_common_init(dentry);
1789
1790         /* Before reading the whole dentry, we need to read just the length.
1791          * This is because a dentry of length 8 (that is, just the length field)
1792          * terminates the list of sibling directory entries. */
1793         if (offset + sizeof(u64) > metadata_resource_len ||
1794             offset + sizeof(u64) < offset)
1795         {
1796                 ERROR("Directory entry starting at %"PRIu64" ends past the "
1797                       "end of the metadata resource (size %"PRIu64")",
1798                       offset, metadata_resource_len);
1799                 return WIMLIB_ERR_INVALID_DENTRY;
1800         }
1801         dentry->length = le64_to_cpu(disk_dentry->length);
1802
1803         /* A zero length field (really a length of 8, since that's how big the
1804          * directory entry is...) indicates that this is the end of directory
1805          * dentry.  We do not read it into memory as an actual dentry, so just
1806          * return successfully in this case. */
1807         if (dentry->length == 8)
1808                 dentry->length = 0;
1809         if (dentry->length == 0)
1810                 return 0;
1811
1812         /* Now that we have the actual length provided in the on-disk structure,
1813          * again make sure it doesn't overflow the metadata resource buffer. */
1814         if (offset + dentry->length > metadata_resource_len ||
1815             offset + dentry->length < offset)
1816         {
1817                 ERROR("Directory entry at offset %"PRIu64" and with size "
1818                       "%"PRIu64" ends past the end of the metadata resource "
1819                       "(size %"PRIu64")",
1820                       offset, dentry->length, metadata_resource_len);
1821                 return WIMLIB_ERR_INVALID_DENTRY;
1822         }
1823
1824         /* Make sure the dentry length is at least as large as the number of
1825          * fixed-length fields */
1826         if (dentry->length < sizeof(struct wim_dentry_on_disk)) {
1827                 ERROR("Directory entry has invalid length of %"PRIu64" bytes",
1828                       dentry->length);
1829                 return WIMLIB_ERR_INVALID_DENTRY;
1830         }
1831
1832         /* Allocate a `struct wim_inode' for this `struct wim_dentry'. */
1833         inode = new_timeless_inode();
1834         if (!inode)
1835                 return WIMLIB_ERR_NOMEM;
1836
1837         /* Read more fields; some into the dentry, and some into the inode. */
1838
1839         inode->i_attributes = le32_to_cpu(disk_dentry->attributes);
1840         inode->i_security_id = le32_to_cpu(disk_dentry->security_id);
1841         dentry->subdir_offset = le64_to_cpu(disk_dentry->subdir_offset);
1842         dentry->d_unused_1 = le64_to_cpu(disk_dentry->unused_1);
1843         dentry->d_unused_2 = le64_to_cpu(disk_dentry->unused_2);
1844         inode->i_creation_time = le64_to_cpu(disk_dentry->creation_time);
1845         inode->i_last_access_time = le64_to_cpu(disk_dentry->last_access_time);
1846         inode->i_last_write_time = le64_to_cpu(disk_dentry->last_write_time);
1847         copy_hash(inode->i_hash, disk_dentry->unnamed_stream_hash);
1848
1849         /* I don't know what's going on here.  It seems like M$ screwed up the
1850          * reparse points, then put the fields in the same place and didn't
1851          * document it.  So we have some fields we read for reparse points, and
1852          * some fields in the same place for non-reparse-point.s */
1853         if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
1854                 inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->reparse.rp_unknown_1);
1855                 inode->i_reparse_tag = le32_to_cpu(disk_dentry->reparse.reparse_tag);
1856                 inode->i_rp_unknown_2 = le16_to_cpu(disk_dentry->reparse.rp_unknown_2);
1857                 inode->i_not_rpfixed = le16_to_cpu(disk_dentry->reparse.not_rpfixed);
1858                 /* Leave inode->i_ino at 0.  Note that this means the WIM file
1859                  * cannot archive hard-linked reparse points.  Such a thing
1860                  * doesn't really make sense anyway, although I believe it's
1861                  * theoretically possible to have them on NTFS. */
1862         } else {
1863                 inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->nonreparse.rp_unknown_1);
1864                 inode->i_ino = le64_to_cpu(disk_dentry->nonreparse.hard_link_group_id);
1865         }
1866
1867         inode->i_num_ads = le16_to_cpu(disk_dentry->num_alternate_data_streams);
1868
1869         short_name_nbytes = le16_to_cpu(disk_dentry->short_name_nbytes);
1870         file_name_nbytes = le16_to_cpu(disk_dentry->file_name_nbytes);
1871
1872         if ((short_name_nbytes & 1) | (file_name_nbytes & 1))
1873         {
1874                 ERROR("Dentry name is not valid UTF-16LE (odd number of bytes)!");
1875                 ret = WIMLIB_ERR_INVALID_DENTRY;
1876                 goto out_free_inode;
1877         }
1878
1879         /* We now know the length of the file name and short name.  Make sure
1880          * the length of the dentry is large enough to actually hold them.
1881          *
1882          * The calculated length here is unaligned to allow for the possibility
1883          * that the dentry->length names an unaligned length, although this
1884          * would be unexpected. */
1885         calculated_size = _dentry_correct_length_unaligned(file_name_nbytes,
1886                                                            short_name_nbytes);
1887
1888         if (dentry->length < calculated_size) {
1889                 ERROR("Unexpected end of directory entry! (Expected "
1890                       "at least %"PRIu64" bytes, got %"PRIu64" bytes.)",
1891                       calculated_size, dentry->length);
1892                 ret = WIMLIB_ERR_INVALID_DENTRY;
1893                 goto out_free_inode;
1894         }
1895
1896         p += sizeof(struct wim_dentry_on_disk);
1897
1898         /* Read the filename if present.  Note: if the filename is empty, there
1899          * is no null terminator following it. */
1900         if (file_name_nbytes) {
1901                 file_name = MALLOC(file_name_nbytes + 2);
1902                 if (!file_name) {
1903                         ERROR("Failed to allocate %d bytes for dentry file name",
1904                               file_name_nbytes + 2);
1905                         ret = WIMLIB_ERR_NOMEM;
1906                         goto out_free_inode;
1907                 }
1908                 memcpy(file_name, p, file_name_nbytes);
1909                 p += file_name_nbytes + 2;
1910                 file_name[file_name_nbytes / 2] = cpu_to_le16(0);
1911         } else {
1912                 file_name = NULL;
1913         }
1914
1915
1916         /* Read the short filename if present.  Note: if there is no short
1917          * filename, there is no null terminator following it. */
1918         if (short_name_nbytes) {
1919                 short_name = MALLOC(short_name_nbytes + 2);
1920                 if (!short_name) {
1921                         ERROR("Failed to allocate %d bytes for dentry short name",
1922                               short_name_nbytes + 2);
1923                         ret = WIMLIB_ERR_NOMEM;
1924                         goto out_free_file_name;
1925                 }
1926                 memcpy(short_name, p, short_name_nbytes);
1927                 p += short_name_nbytes + 2;
1928                 short_name[short_name_nbytes / 2] = cpu_to_le16(0);
1929         } else {
1930                 short_name = NULL;
1931         }
1932
1933         /* Align the dentry length */
1934         dentry->length = (dentry->length + 7) & ~7;
1935
1936         /*
1937          * Read the alternate data streams, if present.  dentry->num_ads tells
1938          * us how many they are, and they will directly follow the dentry
1939          * on-disk.
1940          *
1941          * Note that each alternate data stream entry begins on an 8-byte
1942          * aligned boundary, and the alternate data stream entries seem to NOT
1943          * be included in the dentry->length field for some reason.
1944          */
1945         if (inode->i_num_ads != 0) {
1946                 ret = WIMLIB_ERR_INVALID_DENTRY;
1947                 if (offset + dentry->length > metadata_resource_len ||
1948                     (ret = read_ads_entries(&metadata_resource[offset + dentry->length],
1949                                             inode,
1950                                             metadata_resource_len - offset - dentry->length)))
1951                 {
1952                         ERROR("Failed to read alternate data stream "
1953                               "entries of WIM dentry \"%"WS"\"", file_name);
1954                         goto out_free_short_name;
1955                 }
1956         }
1957         /* We've read all the data for this dentry.  Set the names and their
1958          * lengths, and we've done. */
1959         dentry->d_inode           = inode;
1960         dentry->file_name         = file_name;
1961         dentry->short_name        = short_name;
1962         dentry->file_name_nbytes  = file_name_nbytes;
1963         dentry->short_name_nbytes = short_name_nbytes;
1964         ret = 0;
1965         goto out;
1966 out_free_short_name:
1967         FREE(short_name);
1968 out_free_file_name:
1969         FREE(file_name);
1970 out_free_inode:
1971         free_inode(inode);
1972 out:
1973         return ret;
1974 }
1975
1976 static const tchar *
1977 dentry_get_file_type_string(const struct wim_dentry *dentry)
1978 {
1979         const struct wim_inode *inode = dentry->d_inode;
1980         if (inode_is_directory(inode))
1981                 return T("directory");
1982         else if (inode_is_symlink(inode))
1983                 return T("symbolic link");
1984         else
1985                 return T("file");
1986 }
1987
1988 /* Reads the children of a dentry, and all their children, ..., etc. from the
1989  * metadata resource and into the dentry tree.
1990  *
1991  * @metadata_resource:  An array that contains the uncompressed metadata
1992  *                      resource for the WIM file.
1993  *
1994  * @metadata_resource_len:  The length of the uncompressed metadata resource, in
1995  *                          bytes.
1996  *
1997  * @dentry:     A pointer to a `struct wim_dentry' that is the root of the directory
1998  *              tree and has already been read from the metadata resource.  It
1999  *              does not need to be the real root because this procedure is
2000  *              called recursively.
2001  *
2002  * Returns zero on success; nonzero on failure.
2003  */
2004 int
2005 read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len,
2006                  struct wim_dentry *dentry)
2007 {
2008         u64 cur_offset = dentry->subdir_offset;
2009         struct wim_dentry *child;
2010         struct wim_dentry *duplicate;
2011         struct wim_dentry *parent;
2012         struct wim_dentry cur_child;
2013         int ret;
2014
2015         /*
2016          * If @dentry has no child dentries, nothing more needs to be done for
2017          * this branch.  This is the case for regular files, symbolic links, and
2018          * *possibly* empty directories (although an empty directory may also
2019          * have one child dentry that is the special end-of-directory dentry)
2020          */
2021         if (cur_offset == 0)
2022                 return 0;
2023
2024         /* Check for cyclic directory structure */
2025         for (parent = dentry->parent; !dentry_is_root(parent); parent = parent->parent)
2026         {
2027                 if (unlikely(parent->subdir_offset == cur_offset)) {
2028                         ERROR("Cyclic directory structure directed: children "
2029                               "of \"%"TS"\" coincide with children of \"%"TS"\"",
2030                               dentry_full_path(dentry),
2031                               dentry_full_path(parent));
2032                         return WIMLIB_ERR_INVALID_DENTRY;
2033                 }
2034         }
2035
2036         /* Find and read all the children of @dentry. */
2037         for (;;) {
2038
2039                 /* Read next child of @dentry into @cur_child. */
2040                 ret = read_dentry(metadata_resource, metadata_resource_len,
2041                                   cur_offset, &cur_child);
2042                 if (ret)
2043                         break;
2044
2045                 /* Check for end of directory. */
2046                 if (cur_child.length == 0)
2047                         break;
2048
2049                 /* Not end of directory.  Allocate this child permanently and
2050                  * link it to the parent and previous child. */
2051                 child = memdup(&cur_child, sizeof(struct wim_dentry));
2052                 if (!child) {
2053                         ERROR("Failed to allocate new dentry!");
2054                         ret = WIMLIB_ERR_NOMEM;
2055                         break;
2056                 }
2057
2058                 /* Advance to the offset of the next child.  Note: We need to
2059                  * advance by the TOTAL length of the dentry, not by the length
2060                  * cur_child.length, which although it does take into account
2061                  * the padding, it DOES NOT take into account alternate stream
2062                  * entries. */
2063                 cur_offset += dentry_total_length(child);
2064
2065                 if (unlikely(!dentry_has_long_name(child))) {
2066                         WARNING("Ignoring unnamed dentry in "
2067                                 "directory \"%"TS"\"",
2068                                 dentry_full_path(dentry));
2069                         free_dentry(child);
2070                         continue;
2071                 }
2072
2073                 duplicate = dentry_add_child(dentry, child);
2074                 if (unlikely(duplicate)) {
2075                         const tchar *child_type, *duplicate_type;
2076                         child_type = dentry_get_file_type_string(child);
2077                         duplicate_type = dentry_get_file_type_string(duplicate);
2078                         WARNING("Ignoring duplicate %"TS" \"%"TS"\" "
2079                                 "(the WIM image already contains a %"TS" "
2080                                 "at that path with the exact same name)",
2081                                 child_type, dentry_full_path(duplicate),
2082                                 duplicate_type);
2083                         free_dentry(child);
2084                         continue;
2085                 }
2086
2087                 inode_add_dentry(child, child->d_inode);
2088                 /* If there are children of this child, call this
2089                  * procedure recursively. */
2090                 if (child->subdir_offset != 0) {
2091                         if (likely(dentry_is_directory(child))) {
2092                                 ret = read_dentry_tree(metadata_resource,
2093                                                        metadata_resource_len,
2094                                                        child);
2095                                 if (ret)
2096                                         break;
2097                         } else {
2098                                 WARNING("Ignoring children of non-directory \"%"TS"\"",
2099                                         dentry_full_path(child));
2100                         }
2101                 }
2102         }
2103         return ret;
2104 }
2105
2106 /*
2107  * Writes a WIM dentry to an output buffer.
2108  *
2109  * @dentry:  The dentry structure.
2110  * @p:       The memory location to write the data to.
2111  *
2112  * Returns the pointer to the byte after the last byte we wrote as part of the
2113  * dentry, including any alternate data stream entries.
2114  */
2115 static u8 *
2116 write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p)
2117 {
2118         const struct wim_inode *inode;
2119         struct wim_dentry_on_disk *disk_dentry;
2120         const u8 *orig_p;
2121         const u8 *hash;
2122
2123         wimlib_assert(((uintptr_t)p & 7) == 0); /* 8 byte aligned */
2124         orig_p = p;
2125
2126         inode = dentry->d_inode;
2127         disk_dentry = (struct wim_dentry_on_disk*)p;
2128
2129         disk_dentry->attributes = cpu_to_le32(inode->i_attributes);
2130         disk_dentry->security_id = cpu_to_le32(inode->i_security_id);
2131         disk_dentry->subdir_offset = cpu_to_le64(dentry->subdir_offset);
2132         disk_dentry->unused_1 = cpu_to_le64(dentry->d_unused_1);
2133         disk_dentry->unused_2 = cpu_to_le64(dentry->d_unused_2);
2134         disk_dentry->creation_time = cpu_to_le64(inode->i_creation_time);
2135         disk_dentry->last_access_time = cpu_to_le64(inode->i_last_access_time);
2136         disk_dentry->last_write_time = cpu_to_le64(inode->i_last_write_time);
2137         hash = inode_stream_hash(inode, 0);
2138         copy_hash(disk_dentry->unnamed_stream_hash, hash);
2139         if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
2140                 disk_dentry->reparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1);
2141                 disk_dentry->reparse.reparse_tag = cpu_to_le32(inode->i_reparse_tag);
2142                 disk_dentry->reparse.rp_unknown_2 = cpu_to_le16(inode->i_rp_unknown_2);
2143                 disk_dentry->reparse.not_rpfixed = cpu_to_le16(inode->i_not_rpfixed);
2144         } else {
2145                 disk_dentry->nonreparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1);
2146                 disk_dentry->nonreparse.hard_link_group_id =
2147                         cpu_to_le64((inode->i_nlink == 1) ? 0 : inode->i_ino);
2148         }
2149         disk_dentry->num_alternate_data_streams = cpu_to_le16(inode->i_num_ads);
2150         disk_dentry->short_name_nbytes = cpu_to_le16(dentry->short_name_nbytes);
2151         disk_dentry->file_name_nbytes = cpu_to_le16(dentry->file_name_nbytes);
2152         p += sizeof(struct wim_dentry_on_disk);
2153
2154         wimlib_assert(dentry_is_root(dentry) != dentry_has_long_name(dentry));
2155
2156         if (dentry_has_long_name(dentry))
2157                 p = mempcpy(p, dentry->file_name, dentry->file_name_nbytes + 2);
2158
2159         if (dentry_has_short_name(dentry))
2160                 p = mempcpy(p, dentry->short_name, dentry->short_name_nbytes + 2);
2161
2162         /* Align to 8-byte boundary */
2163         while ((uintptr_t)p & 7)
2164                 *p++ = 0;
2165
2166         /* We calculate the correct length of the dentry ourselves because the
2167          * dentry->length field may been set to an unexpected value from when we
2168          * read the dentry in (for example, there may have been unknown data
2169          * appended to the end of the dentry...).  Furthermore, the dentry may
2170          * have been renamed, thus changing its needed length. */
2171         disk_dentry->length = cpu_to_le64(p - orig_p);
2172
2173         /* Write the alternate data streams entries, if any. */
2174         for (u16 i = 0; i < inode->i_num_ads; i++) {
2175                 const struct wim_ads_entry *ads_entry =
2176                                 &inode->i_ads_entries[i];
2177                 struct wim_ads_entry_on_disk *disk_ads_entry =
2178                                 (struct wim_ads_entry_on_disk*)p;
2179                 orig_p = p;
2180
2181                 disk_ads_entry->reserved = cpu_to_le64(ads_entry->reserved);
2182
2183                 hash = inode_stream_hash(inode, i + 1);
2184                 copy_hash(disk_ads_entry->hash, hash);
2185                 disk_ads_entry->stream_name_nbytes = cpu_to_le16(ads_entry->stream_name_nbytes);
2186                 p += sizeof(struct wim_ads_entry_on_disk);
2187                 if (ads_entry->stream_name_nbytes) {
2188                         p = mempcpy(p, ads_entry->stream_name,
2189                                     ads_entry->stream_name_nbytes + 2);
2190                 }
2191                 /* Align to 8-byte boundary */
2192                 while ((uintptr_t)p & 7)
2193                         *p++ = 0;
2194                 disk_ads_entry->length = cpu_to_le64(p - orig_p);
2195         }
2196         return p;
2197 }
2198
2199 static int
2200 write_dentry_cb(struct wim_dentry *dentry, void *_p)
2201 {
2202         u8 **p = _p;
2203         *p = write_dentry(dentry, *p);
2204         return 0;
2205 }
2206
2207 static u8 *
2208 write_dentry_tree_recursive(const struct wim_dentry *parent, u8 *p);
2209
2210 static int
2211 write_dentry_tree_recursive_cb(struct wim_dentry *dentry, void *_p)
2212 {
2213         u8 **p = _p;
2214         *p = write_dentry_tree_recursive(dentry, *p);
2215         return 0;
2216 }
2217
2218 /* Recursive function that writes a dentry tree rooted at @parent, not including
2219  * @parent itself, which has already been written. */
2220 static u8 *
2221 write_dentry_tree_recursive(const struct wim_dentry *parent, u8 *p)
2222 {
2223         /* Nothing to do if this dentry has no children. */
2224         if (parent->subdir_offset == 0)
2225                 return p;
2226
2227         /* Write child dentries and end-of-directory entry.
2228          *
2229          * Note: we need to write all of this dentry's children before
2230          * recursively writing the directory trees rooted at each of the child
2231          * dentries, since the on-disk dentries for a dentry's children are
2232          * always located at consecutive positions in the metadata resource! */
2233         for_dentry_child(parent, write_dentry_cb, &p);
2234
2235         /* write end of directory entry */
2236         *(le64*)p = cpu_to_le64(0);
2237         p += 8;
2238
2239         /* Recurse on children. */
2240         for_dentry_child(parent, write_dentry_tree_recursive_cb, &p);
2241         return p;
2242 }
2243
2244 /* Writes a directory tree to the metadata resource.
2245  *
2246  * @root:       Root of the dentry tree.
2247  * @p:          Pointer to a buffer with enough space for the dentry tree.
2248  *
2249  * Returns pointer to the byte after the last byte we wrote.
2250  */
2251 u8 *
2252 write_dentry_tree(const struct wim_dentry *root, u8 *p)
2253 {
2254         DEBUG("Writing dentry tree.");
2255         wimlib_assert(dentry_is_root(root));
2256
2257         /* If we're the root dentry, we have no parent that already
2258          * wrote us, so we need to write ourselves. */
2259         p = write_dentry(root, p);
2260
2261         /* Write end of directory entry after the root dentry just to be safe;
2262          * however the root dentry obviously cannot have any siblings. */
2263         *(le64*)p = cpu_to_le64(0);
2264         p += 8;
2265
2266         /* Recursively write the rest of the dentry tree. */
2267         return write_dentry_tree_recursive(root, p);
2268 }