Dentry doc fixes
[wimlib] / src / dentry.c
1 /*
2  * dentry.c
3  *
4  * In the WIM file format, the dentries are stored in the "metadata resource"
5  * section right after the security data.  Each image in the WIM file has its
6  * own metadata resource with its own security data and dentry tree.  Dentries
7  * in different images may share file resources by referring to the same lookup
8  * table entries.
9  */
10
11 /*
12  * Copyright (C) 2012, 2013 Eric Biggers
13  *
14  * This file is part of wimlib, a library for working with WIM files.
15  *
16  * wimlib is free software; you can redistribute it and/or modify it under the
17  * terms of the GNU General Public License as published by the Free Software
18  * Foundation; either version 3 of the License, or (at your option) any later
19  * version.
20  *
21  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
22  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
23  * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
24  *
25  * You should have received a copy of the GNU General Public License along with
26  * wimlib; if not, see http://www.gnu.org/licenses/.
27  */
28
29 #ifdef HAVE_CONFIG_H
30 #  include "config.h"
31 #endif
32
33 #include "wimlib.h"
34 #include "wimlib/dentry.h"
35 #include "wimlib/encoding.h"
36 #include "wimlib/endianness.h"
37 #include "wimlib/error.h"
38 #include "wimlib/lookup_table.h"
39 #include "wimlib/metadata.h"
40 #include "wimlib/resource.h"
41 #include "wimlib/sha1.h"
42 #include "wimlib/timestamp.h"
43
44 #include <errno.h>
45
46 /* WIM alternate data stream entry (on-disk format) */
47 struct wim_ads_entry_on_disk {
48         /*  Length of the entry, in bytes.  This apparently includes all
49          *  fixed-length fields, plus the stream name and null terminator if
50          *  present, and the padding up to an 8 byte boundary.  wimlib is a
51          *  little less strict when reading the entries, and only requires that
52          *  the number of bytes from this field is at least as large as the size
53          *  of the fixed length fields and stream name without null terminator.
54          *  */
55         le64  length;
56
57         le64  reserved;
58
59         /* SHA1 message digest of the uncompressed stream; or, alternatively,
60          * can be all zeroes if the stream has zero length. */
61         u8 hash[SHA1_HASH_SIZE];
62
63         /* Length of the stream name, in bytes.  0 if the stream is unnamed.  */
64         le16 stream_name_nbytes;
65
66         /* Stream name in UTF-16LE.  It is @stream_name_nbytes bytes long,
67          * excluding the the null terminator.  There is a null terminator
68          * character if @stream_name_nbytes != 0; i.e., if this stream is named.
69          * */
70         utf16lechar stream_name[];
71 } _packed_attribute;
72
73 #define WIM_ADS_ENTRY_DISK_SIZE 38
74
75 /* On-disk format of a WIM dentry (directory entry), located in the metadata
76  * resource for a WIM image.  */
77 struct wim_dentry_on_disk {
78
79         /* Length of this directory entry in bytes, not including any alternate
80          * data stream entries.  Should be a multiple of 8 so that the following
81          * dentry or alternate data stream entry is aligned on an 8-byte
82          * boundary.  (If not, wimlib will round it up.)  It must be at least as
83          * long as the fixed-length fields of the dentry (WIM_DENTRY_DISK_SIZE),
84          * plus the lengths of the file name and/or short name if present.
85          *
86          * It is also possible for this field to be 0.  This situation, which is
87          * undocumented, indicates the end of a list of sibling nodes in a
88          * directory.  It also means the real length is 8, because the dentry
89          * included only the length field, but that takes up 8 bytes.  */
90         le64 length;
91
92         /* Attributes of the file or directory.  This is a bitwise OR of the
93          * FILE_ATTRIBUTE_* constants and should correspond to the value
94          * retrieved by GetFileAttributes() on Windows. */
95         le32 attributes;
96
97         /* A value that specifies the security descriptor for this file or
98          * directory.  If -1, the file or directory has no security descriptor.
99          * Otherwise, it is a 0-based index into the WIM image's table of
100          * security descriptors (see: `struct wim_security_data') */
101         sle32 security_id;
102
103         /* Offset, in bytes, from the start of the uncompressed metadata
104          * resource of this directory's child directory entries, or 0 if this
105          * directory entry does not correspond to a directory or otherwise does
106          * not have any children. */
107         le64 subdir_offset;
108
109         /* Reserved fields */
110         le64 unused_1;
111         le64 unused_2;
112
113         /* The following three time fields should correspond to those gotten by
114          * calling GetFileTime() on Windows. */
115
116         /* Creation time, in 100-nanosecond intervals since January 1, 1601. */
117         le64 creation_time;
118
119         /* Last access time, in 100-nanosecond intervals since January 1, 1601. */
120         le64 last_access_time;
121
122         /* Last write time, in 100-nanosecond intervals since January 1, 1601. */
123         le64 last_write_time;
124
125         /* Vaguely, the SHA-1 message digest ("hash") of the file's contents.
126          * More specifically, this is for the "unnamed data stream" rather than
127          * any "alternate data streams".  This hash value is used to look up the
128          * corresponding entry in the WIM's stream lookup table to actually find
129          * the file contents within the WIM.
130          *
131          * If the file has no unnamed data stream (e.g. is a directory), then
132          * this field will be all zeroes.  If the unnamed data stream is empty
133          * (i.e. an "empty file"), then this field is also expected to be all
134          * zeroes.  (It will be if wimlib created the WIM image, at least;
135          * otherwise it can't be ruled out that the SHA-1 message digest of 0
136          * bytes of data is given explicitly.)
137          *
138          * If the file has reparse data, then this field will instead specify
139          * the SHA-1 message digest of the reparse data.  If it is somehow
140          * possible for a file to have both an unnamed data stream and reparse
141          * data, then this is not handled by wimlib.
142          *
143          * As a further special case, if this field is all zeroes but there is
144          * an alternate data stream entry with no name and a nonzero SHA-1
145          * message digest field, then that hash must be used instead of this
146          * one.  (wimlib does not use this quirk on WIM images it creates.)
147          */
148         u8 unnamed_stream_hash[SHA1_HASH_SIZE];
149
150         /* The format of the following data is not yet completely known and they
151          * do not correspond to Microsoft's documentation.
152          *
153          * If this directory entry is for a reparse point (has
154          * FILE_ATTRIBUTE_REPARSE_POINT set in the attributes field), then the
155          * version of the following fields containing the reparse tag is valid.
156          * Furthermore, the field notated as not_rpfixed, as far as I can tell,
157          * is supposed to be set to 1 if reparse point fixups (a.k.a. fixing the
158          * targets of absolute symbolic links) were *not* done, and otherwise 0.
159          *
160          * If this directory entry is not for a reparse point, then the version
161          * of the following fields containing the hard_link_group_id is valid.
162          * All MS says about this field is that "If this file is part of a hard
163          * link set, all the directory entries in the set will share the same
164          * value in this field.".  However, more specifically I have observed
165          * the following:
166          *    - If the file is part of a hard link set of size 1, then the
167          *    hard_link_group_id should be set to either 0, which is treated
168          *    specially as indicating "not hardlinked", or any unique value.
169          *    - The specific nonzero values used to identity hard link sets do
170          *    not matter, as long as they are unique.
171          *    - However, due to bugs in Microsoft's software, it is actually NOT
172          *    guaranteed that directory entries that share the same hard link
173          *    group ID are actually hard linked to each either.  We have to
174          *    handle this by using special code to use distinguishing features
175          *    (which is possible because some information about the underlying
176          *    inode is repeated in each dentry) to split up these fake hard link
177          *    groups into what they actually are supposed to be.
178          */
179         union {
180                 struct {
181                         le32 rp_unknown_1;
182                         le32 reparse_tag;
183                         le16 rp_unknown_2;
184                         le16 not_rpfixed;
185                 } _packed_attribute reparse;
186                 struct {
187                         le32 rp_unknown_1;
188                         le64 hard_link_group_id;
189                 } _packed_attribute nonreparse;
190         };
191
192         /* Number of alternate data stream entries that directly follow this
193          * dentry on-disk. */
194         le16 num_alternate_data_streams;
195
196         /* Length of this file's UTF-16LE encoded short name (8.3 DOS-compatible
197          * name), if present, in bytes, excluding the null terminator.  If this
198          * file has no short name, then this field should be 0.  */
199         le16 short_name_nbytes;
200
201         /* Length of this file's UTF-16LE encoded "long" name, excluding the
202          * null terminator.  If this file has no short name, then this field
203          * should be 0.  It's expected that only the root dentry has this field
204          * set to 0.  */
205         le16 file_name_nbytes;
206
207         /* Follewed by variable length file name, in UTF16-LE, if
208          * file_name_nbytes != 0.  Includes null terminator. */
209         /*utf16lechar file_name[];*/
210
211         /* Followed by variable length short name, in UTF16-LE, if
212          * short_name_nbytes != 0.  Includes null terminator. */
213         /*utf16lechar short_name[];*/
214 } _packed_attribute;
215
216 #define WIM_DENTRY_DISK_SIZE 102
217
218 /* Calculates the unaligned length, in bytes, of an on-disk WIM dentry that has
219  * a file name and short name that take the specified numbers of bytes.  This
220  * excludes any alternate data stream entries that may follow the dentry. */
221 static u64
222 _dentry_correct_length_unaligned(u16 file_name_nbytes, u16 short_name_nbytes)
223 {
224         u64 length = sizeof(struct wim_dentry_on_disk);
225         if (file_name_nbytes)
226                 length += file_name_nbytes + 2;
227         if (short_name_nbytes)
228                 length += short_name_nbytes + 2;
229         return length;
230 }
231
232 /* Calculates the unaligned length, in bytes, of an on-disk WIM dentry, based on
233  * the file name length and short name length.  Note that dentry->length is
234  * ignored; also, this excludes any alternate data stream entries that may
235  * follow the dentry. */
236 static u64
237 dentry_correct_length_unaligned(const struct wim_dentry *dentry)
238 {
239         return _dentry_correct_length_unaligned(dentry->file_name_nbytes,
240                                                 dentry->short_name_nbytes);
241 }
242
243 /* Duplicates a string of system-dependent encoding into a UTF-16LE string and
244  * returns the string and its length, in bytes, in the pointer arguments.  Frees
245  * any existing string at the return location before overwriting it. */
246 static int
247 get_utf16le_name(const tchar *name, utf16lechar **name_utf16le_ret,
248                  u16 *name_utf16le_nbytes_ret)
249 {
250         utf16lechar *name_utf16le;
251         size_t name_utf16le_nbytes;
252         int ret;
253 #if TCHAR_IS_UTF16LE
254         name_utf16le_nbytes = tstrlen(name) * sizeof(utf16lechar);
255         name_utf16le = MALLOC(name_utf16le_nbytes + sizeof(utf16lechar));
256         if (!name_utf16le)
257                 return WIMLIB_ERR_NOMEM;
258         memcpy(name_utf16le, name, name_utf16le_nbytes + sizeof(utf16lechar));
259         ret = 0;
260 #else
261
262         ret = tstr_to_utf16le(name, tstrlen(name), &name_utf16le,
263                               &name_utf16le_nbytes);
264         if (ret == 0) {
265                 if (name_utf16le_nbytes > 0xffff) {
266                         FREE(name_utf16le);
267                         ERROR("Multibyte string \"%"TS"\" is too long!", name);
268                         ret = WIMLIB_ERR_INVALID_UTF8_STRING;
269                 }
270         }
271 #endif
272         if (ret == 0) {
273                 FREE(*name_utf16le_ret);
274                 *name_utf16le_ret = name_utf16le;
275                 *name_utf16le_nbytes_ret = name_utf16le_nbytes;
276         }
277         return ret;
278 }
279
280 /* Sets the name of a WIM dentry from a multibyte string. */
281 int
282 set_dentry_name(struct wim_dentry *dentry, const tchar *new_name)
283 {
284         int ret;
285         ret = get_utf16le_name(new_name, &dentry->file_name,
286                                &dentry->file_name_nbytes);
287         if (ret == 0) {
288                 /* Clear the short name and recalculate the dentry length */
289                 if (dentry_has_short_name(dentry)) {
290                         FREE(dentry->short_name);
291                         dentry->short_name = NULL;
292                         dentry->short_name_nbytes = 0;
293                 }
294         }
295         return ret;
296 }
297
298 /* Returns the total length of a WIM alternate data stream entry on-disk,
299  * including the stream name, the null terminator, AND the padding after the
300  * entry to align the next ADS entry or dentry on an 8-byte boundary. */
301 static u64
302 ads_entry_total_length(const struct wim_ads_entry *entry)
303 {
304         u64 len = sizeof(struct wim_ads_entry_on_disk);
305         if (entry->stream_name_nbytes)
306                 len += entry->stream_name_nbytes + 2;
307         return (len + 7) & ~7;
308 }
309
310
311 static u64
312 _dentry_total_length(const struct wim_dentry *dentry, u64 length)
313 {
314         const struct wim_inode *inode = dentry->d_inode;
315         for (u16 i = 0; i < inode->i_num_ads; i++)
316                 length += ads_entry_total_length(&inode->i_ads_entries[i]);
317         return (length + 7) & ~7;
318 }
319
320 /* Calculate the aligned *total* length of an on-disk WIM dentry.  This includes
321  * all alternate data streams. */
322 u64
323 dentry_correct_total_length(const struct wim_dentry *dentry)
324 {
325         return _dentry_total_length(dentry,
326                                     dentry_correct_length_unaligned(dentry));
327 }
328
329 /* Like dentry_correct_total_length(), but use the existing dentry->length field
330  * instead of calculating its "correct" value. */
331 static u64
332 dentry_total_length(const struct wim_dentry *dentry)
333 {
334         return _dentry_total_length(dentry, dentry->length);
335 }
336
337 int
338 for_dentry_in_rbtree(struct rb_node *root,
339                      int (*visitor)(struct wim_dentry *, void *),
340                      void *arg)
341 {
342         int ret;
343         struct rb_node *node = root;
344         LIST_HEAD(stack);
345         while (1) {
346                 if (node) {
347                         list_add(&rbnode_dentry(node)->tmp_list, &stack);
348                         node = node->rb_left;
349                 } else {
350                         struct list_head *next;
351                         struct wim_dentry *dentry;
352
353                         next = stack.next;
354                         if (next == &stack)
355                                 return 0;
356                         dentry = container_of(next, struct wim_dentry, tmp_list);
357                         list_del(next);
358                         ret = visitor(dentry, arg);
359                         if (ret != 0)
360                                 return ret;
361                         node = dentry->rb_node.rb_right;
362                 }
363         }
364 }
365
366 static int
367 for_dentry_tree_in_rbtree_depth(struct rb_node *node,
368                                 int (*visitor)(struct wim_dentry*, void*),
369                                 void *arg)
370 {
371         int ret;
372         if (node) {
373                 ret = for_dentry_tree_in_rbtree_depth(node->rb_left,
374                                                       visitor, arg);
375                 if (ret != 0)
376                         return ret;
377                 ret = for_dentry_tree_in_rbtree_depth(node->rb_right,
378                                                       visitor, arg);
379                 if (ret != 0)
380                         return ret;
381                 ret = for_dentry_in_tree_depth(rbnode_dentry(node), visitor, arg);
382                 if (ret != 0)
383                         return ret;
384         }
385         return 0;
386 }
387
388 static int
389 for_dentry_tree_in_rbtree(struct rb_node *node,
390                           int (*visitor)(struct wim_dentry*, void*),
391                           void *arg)
392 {
393         int ret;
394         if (node) {
395                 ret = for_dentry_tree_in_rbtree(node->rb_left, visitor, arg);
396                 if (ret)
397                         return ret;
398                 ret = for_dentry_in_tree(rbnode_dentry(node), visitor, arg);
399                 if (ret)
400                         return ret;
401                 ret = for_dentry_tree_in_rbtree(node->rb_right, visitor, arg);
402                 if (ret)
403                         return ret;
404         }
405         return 0;
406 }
407
408 /* Calls a function on all directory entries in a WIM dentry tree.  Logically,
409  * this is a pre-order traversal (the function is called on a parent dentry
410  * before its children), but sibling dentries will be visited in order as well.
411  * */
412 int
413 for_dentry_in_tree(struct wim_dentry *root,
414                    int (*visitor)(struct wim_dentry*, void*), void *arg)
415 {
416         int ret;
417
418         if (!root)
419                 return 0;
420         ret = (*visitor)(root, arg);
421         if (ret)
422                 return ret;
423         return for_dentry_tree_in_rbtree(root->d_inode->i_children.rb_node,
424                                          visitor,
425                                          arg);
426 }
427
428 /* Like for_dentry_in_tree(), but the visitor function is always called on a
429  * dentry's children before on itself. */
430 int
431 for_dentry_in_tree_depth(struct wim_dentry *root,
432                          int (*visitor)(struct wim_dentry*, void*), void *arg)
433 {
434         int ret;
435
436         if (!root)
437                 return 0;
438         ret = for_dentry_tree_in_rbtree_depth(root->d_inode->i_children.rb_node,
439                                               visitor, arg);
440         if (ret)
441                 return ret;
442         return (*visitor)(root, arg);
443 }
444
445 /* Calculate the full path of @dentry.  The full path of its parent must have
446  * already been calculated, or it must be the root dentry. */
447 int
448 calculate_dentry_full_path(struct wim_dentry *dentry)
449 {
450         tchar *full_path;
451         u32 full_path_nbytes;
452         int ret;
453
454         if (dentry->_full_path)
455                 return 0;
456
457         if (dentry_is_root(dentry)) {
458                 static const tchar _root_path[] = {WIM_PATH_SEPARATOR, T('\0')};
459                 full_path = TSTRDUP(_root_path);
460                 if (!full_path)
461                         return WIMLIB_ERR_NOMEM;
462                 full_path_nbytes = 1 * sizeof(tchar);
463         } else {
464                 struct wim_dentry *parent;
465                 tchar *parent_full_path;
466                 u32 parent_full_path_nbytes;
467                 size_t filename_nbytes;
468
469                 parent = dentry->parent;
470                 if (dentry_is_root(parent)) {
471                         parent_full_path = T("");
472                         parent_full_path_nbytes = 0;
473                 } else {
474                         if (!parent->_full_path) {
475                                 ret = calculate_dentry_full_path(parent);
476                                 if (ret)
477                                         return ret;
478                         }
479                         parent_full_path = parent->_full_path;
480                         parent_full_path_nbytes = parent->full_path_nbytes;
481                 }
482
483                 /* Append this dentry's name as a tchar string to the full path
484                  * of the parent followed by the path separator */
485         #if TCHAR_IS_UTF16LE
486                 filename_nbytes = dentry->file_name_nbytes;
487         #else
488                 {
489                         int ret = utf16le_to_tstr_nbytes(dentry->file_name,
490                                                          dentry->file_name_nbytes,
491                                                          &filename_nbytes);
492                         if (ret)
493                                 return ret;
494                 }
495         #endif
496
497                 full_path_nbytes = parent_full_path_nbytes + sizeof(tchar) +
498                                    filename_nbytes;
499                 full_path = MALLOC(full_path_nbytes + sizeof(tchar));
500                 if (!full_path)
501                         return WIMLIB_ERR_NOMEM;
502                 memcpy(full_path, parent_full_path, parent_full_path_nbytes);
503                 full_path[parent_full_path_nbytes / sizeof(tchar)] = WIM_PATH_SEPARATOR;
504         #if TCHAR_IS_UTF16LE
505                 memcpy(&full_path[parent_full_path_nbytes / sizeof(tchar) + 1],
506                        dentry->file_name,
507                        filename_nbytes + sizeof(tchar));
508         #else
509                 utf16le_to_tstr_buf(dentry->file_name,
510                                     dentry->file_name_nbytes,
511                                     &full_path[parent_full_path_nbytes /
512                                                sizeof(tchar) + 1]);
513         #endif
514         }
515         dentry->_full_path = full_path;
516         dentry->full_path_nbytes= full_path_nbytes;
517         return 0;
518 }
519
520 static int
521 do_calculate_dentry_full_path(struct wim_dentry *dentry, void *_ignore)
522 {
523         return calculate_dentry_full_path(dentry);
524 }
525
526 int
527 calculate_dentry_tree_full_paths(struct wim_dentry *root)
528 {
529         return for_dentry_in_tree(root, do_calculate_dentry_full_path, NULL);
530 }
531
532 tchar *
533 dentry_full_path(struct wim_dentry *dentry)
534 {
535         calculate_dentry_full_path(dentry);
536         return dentry->_full_path;
537 }
538
539 static int
540 increment_subdir_offset(struct wim_dentry *dentry, void *subdir_offset_p)
541 {
542         *(u64*)subdir_offset_p += dentry_correct_total_length(dentry);
543         return 0;
544 }
545
546 static int
547 call_calculate_subdir_offsets(struct wim_dentry *dentry, void *subdir_offset_p)
548 {
549         calculate_subdir_offsets(dentry, subdir_offset_p);
550         return 0;
551 }
552
553 /*
554  * Recursively calculates the subdir offsets for a directory tree.
555  *
556  * @dentry:  The root of the directory tree.
557  * @subdir_offset_p:  The current subdirectory offset; i.e., the subdirectory
558  *                    offset for @dentry.
559  */
560 void
561 calculate_subdir_offsets(struct wim_dentry *dentry, u64 *subdir_offset_p)
562 {
563         struct rb_node *node;
564
565         dentry->subdir_offset = *subdir_offset_p;
566         node = dentry->d_inode->i_children.rb_node;
567         if (node) {
568                 /* Advance the subdir offset by the amount of space the children
569                  * of this dentry take up. */
570                 for_dentry_in_rbtree(node, increment_subdir_offset, subdir_offset_p);
571
572                 /* End-of-directory dentry on disk. */
573                 *subdir_offset_p += 8;
574
575                 /* Recursively call calculate_subdir_offsets() on all the
576                  * children. */
577                 for_dentry_in_rbtree(node, call_calculate_subdir_offsets, subdir_offset_p);
578         } else {
579                 /* On disk, childless directories have a valid subdir_offset
580                  * that points to an 8-byte end-of-directory dentry.  Regular
581                  * files or reparse points have a subdir_offset of 0. */
582                 if (dentry_is_directory(dentry))
583                         *subdir_offset_p += 8;
584                 else
585                         dentry->subdir_offset = 0;
586         }
587 }
588
589 /* Case-sensitive UTF-16LE dentry or stream name comparison.  Used on both UNIX
590  * (always) and Windows (sometimes) */
591 static int
592 compare_utf16le_names_case_sensitive(const utf16lechar *name1, size_t nbytes1,
593                                      const utf16lechar *name2, size_t nbytes2)
594 {
595         /* Return the result if the strings differ up to their minimum length.
596          * Note that we cannot use strcmp() or strncmp() here, as the strings
597          * are in UTF-16LE format. */
598         int result = memcmp(name1, name2, min(nbytes1, nbytes2));
599         if (result)
600                 return result;
601
602         /* The strings are the same up to their minimum length, so return a
603          * result based on their lengths. */
604         if (nbytes1 < nbytes2)
605                 return -1;
606         else if (nbytes1 > nbytes2)
607                 return 1;
608         else
609                 return 0;
610 }
611
612 #ifdef __WIN32__
613 /* Windoze: Case-insensitive UTF-16LE dentry or stream name comparison */
614 static int
615 compare_utf16le_names_case_insensitive(const utf16lechar *name1, size_t nbytes1,
616                                        const utf16lechar *name2, size_t nbytes2)
617 {
618         /* Return the result if the strings differ up to their minimum length.
619          * */
620         int result = _wcsnicmp((const wchar_t*)name1, (const wchar_t*)name2,
621                                min(nbytes1 / 2, nbytes2 / 2));
622         if (result)
623                 return result;
624
625         /* The strings are the same up to their minimum length, so return a
626          * result based on their lengths. */
627         if (nbytes1 < nbytes2)
628                 return -1;
629         else if (nbytes1 > nbytes2)
630                 return 1;
631         else
632                 return 0;
633 }
634 #endif /* __WIN32__ */
635
636 #ifdef __WIN32__
637 #  define compare_utf16le_names compare_utf16le_names_case_insensitive
638 #else
639 #  define compare_utf16le_names compare_utf16le_names_case_sensitive
640 #endif
641
642
643 #ifdef __WIN32__
644 static int
645 dentry_compare_names_case_insensitive(const struct wim_dentry *d1,
646                                       const struct wim_dentry *d2)
647 {
648         return compare_utf16le_names_case_insensitive(d1->file_name,
649                                                       d1->file_name_nbytes,
650                                                       d2->file_name,
651                                                       d2->file_name_nbytes);
652 }
653 #endif /* __WIN32__ */
654
655 static int
656 dentry_compare_names_case_sensitive(const struct wim_dentry *d1,
657                                     const struct wim_dentry *d2)
658 {
659         return compare_utf16le_names_case_sensitive(d1->file_name,
660                                                     d1->file_name_nbytes,
661                                                     d2->file_name,
662                                                     d2->file_name_nbytes);
663 }
664
665 #ifdef __WIN32__
666 #  define dentry_compare_names dentry_compare_names_case_insensitive
667 #else
668 #  define dentry_compare_names dentry_compare_names_case_sensitive
669 #endif
670
671 /* Return %true iff the alternate data stream entry @entry has the UTF-16LE
672  * stream name @name that has length @name_nbytes bytes. */
673 static inline bool
674 ads_entry_has_name(const struct wim_ads_entry *entry,
675                    const utf16lechar *name, size_t name_nbytes)
676 {
677         return !compare_utf16le_names(name, name_nbytes,
678                                       entry->stream_name,
679                                       entry->stream_name_nbytes);
680 }
681
682 /* Given a UTF-16LE filename and a directory, look up the dentry for the file.
683  * Return it if found, otherwise NULL.  This is case-sensitive on UNIX and
684  * case-insensitive on Windows. */
685 struct wim_dentry *
686 get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry,
687                                    const utf16lechar *name,
688                                    size_t name_nbytes)
689 {
690         struct rb_node *node;
691
692 #ifdef __WIN32__
693         node = dentry->d_inode->i_children_case_insensitive.rb_node;
694 #else
695         node = dentry->d_inode->i_children.rb_node;
696 #endif
697
698         struct wim_dentry *child;
699         while (node) {
700         #ifdef __WIN32__
701                 child = rb_entry(node, struct wim_dentry, rb_node_case_insensitive);
702         #else
703                 child = rbnode_dentry(node);
704         #endif
705                 int result = compare_utf16le_names(name, name_nbytes,
706                                                    child->file_name,
707                                                    child->file_name_nbytes);
708                 if (result < 0)
709                         node = node->rb_left;
710                 else if (result > 0)
711                         node = node->rb_right;
712                 else {
713                 #ifdef __WIN32__
714                         if (!list_empty(&child->case_insensitive_conflict_list))
715                         {
716                                 WARNING("Result of case-insensitive lookup is ambiguous "
717                                         "(returning \"%ls\" instead of \"%ls\")",
718                                         child->file_name,
719                                         container_of(child->case_insensitive_conflict_list.next,
720                                                      struct wim_dentry,
721                                                      case_insensitive_conflict_list)->file_name);
722                         }
723                 #endif
724                         return child;
725                 }
726         }
727         return NULL;
728 }
729
730 /* Returns the child of @dentry that has the file name @name.  Returns NULL if
731  * no child has the name. */
732 struct wim_dentry *
733 get_dentry_child_with_name(const struct wim_dentry *dentry, const tchar *name)
734 {
735 #if TCHAR_IS_UTF16LE
736         return get_dentry_child_with_utf16le_name(dentry, name,
737                                                   tstrlen(name) * sizeof(tchar));
738 #else
739         utf16lechar *utf16le_name;
740         size_t utf16le_name_nbytes;
741         int ret;
742         struct wim_dentry *child;
743
744         ret = tstr_to_utf16le(name, tstrlen(name) * sizeof(tchar),
745                               &utf16le_name, &utf16le_name_nbytes);
746         if (ret) {
747                 child = NULL;
748         } else {
749                 child = get_dentry_child_with_utf16le_name(dentry,
750                                                            utf16le_name,
751                                                            utf16le_name_nbytes);
752                 FREE(utf16le_name);
753         }
754         return child;
755 #endif
756 }
757
758 static struct wim_dentry *
759 get_dentry_utf16le(WIMStruct *wim, const utf16lechar *path)
760 {
761         struct wim_dentry *cur_dentry, *parent_dentry;
762         const utf16lechar *p, *pp;
763
764         cur_dentry = parent_dentry = wim_root_dentry(wim);
765         if (!cur_dentry) {
766                 errno = ENOENT;
767                 return NULL;
768         }
769         p = path;
770         while (1) {
771                 while (*p == cpu_to_le16(WIM_PATH_SEPARATOR))
772                         p++;
773                 if (*p == cpu_to_le16('\0'))
774                         break;
775                 pp = p;
776                 while (*pp != cpu_to_le16(WIM_PATH_SEPARATOR) &&
777                        *pp != cpu_to_le16('\0'))
778                         pp++;
779
780                 cur_dentry = get_dentry_child_with_utf16le_name(parent_dentry, p,
781                                                                 (void*)pp - (void*)p);
782                 if (cur_dentry == NULL)
783                         break;
784                 p = pp;
785                 parent_dentry = cur_dentry;
786         }
787         if (cur_dentry == NULL) {
788                 if (dentry_is_directory(parent_dentry))
789                         errno = ENOENT;
790                 else
791                         errno = ENOTDIR;
792         }
793         return cur_dentry;
794 }
795
796 /* Returns the dentry corresponding to the @path, or NULL if there is no such
797  * dentry. */
798 struct wim_dentry *
799 get_dentry(WIMStruct *wim, const tchar *path)
800 {
801 #if TCHAR_IS_UTF16LE
802         return get_dentry_utf16le(wim, path);
803 #else
804         utf16lechar *path_utf16le;
805         size_t path_utf16le_nbytes;
806         int ret;
807         struct wim_dentry *dentry;
808
809         ret = tstr_to_utf16le(path, tstrlen(path) * sizeof(tchar),
810                               &path_utf16le, &path_utf16le_nbytes);
811         if (ret)
812                 return NULL;
813         dentry = get_dentry_utf16le(wim, path_utf16le);
814         FREE(path_utf16le);
815         return dentry;
816 #endif
817 }
818
819 struct wim_inode *
820 wim_pathname_to_inode(WIMStruct *wim, const tchar *path)
821 {
822         struct wim_dentry *dentry;
823         dentry = get_dentry(wim, path);
824         if (dentry)
825                 return dentry->d_inode;
826         else
827                 return NULL;
828 }
829
830 /* Takes in a path of length @len in @buf, and transforms it into a string for
831  * the path of its parent directory. */
832 static void
833 to_parent_name(tchar *buf, size_t len)
834 {
835         ssize_t i = (ssize_t)len - 1;
836         while (i >= 0 && buf[i] == WIM_PATH_SEPARATOR)
837                 i--;
838         while (i >= 0 && buf[i] != WIM_PATH_SEPARATOR)
839                 i--;
840         while (i >= 0 && buf[i] == WIM_PATH_SEPARATOR)
841                 i--;
842         buf[i + 1] = T('\0');
843 }
844
845 /* Returns the dentry that corresponds to the parent directory of @path, or NULL
846  * if the dentry is not found. */
847 struct wim_dentry *
848 get_parent_dentry(WIMStruct *wim, const tchar *path)
849 {
850         size_t path_len = tstrlen(path);
851         tchar buf[path_len + 1];
852
853         tmemcpy(buf, path, path_len + 1);
854         to_parent_name(buf, path_len);
855         return get_dentry(wim, buf);
856 }
857
858 /* Prints the full path of a dentry. */
859 int
860 print_dentry_full_path(struct wim_dentry *dentry, void *_ignore)
861 {
862         int ret = calculate_dentry_full_path(dentry);
863         if (ret)
864                 return ret;
865         tprintf(T("%"TS"\n"), dentry->_full_path);
866         return 0;
867 }
868
869 /* We want to be able to show the names of the file attribute flags that are
870  * set. */
871 struct file_attr_flag {
872         u32 flag;
873         const tchar *name;
874 };
875 struct file_attr_flag file_attr_flags[] = {
876         {FILE_ATTRIBUTE_READONLY,           T("READONLY")},
877         {FILE_ATTRIBUTE_HIDDEN,             T("HIDDEN")},
878         {FILE_ATTRIBUTE_SYSTEM,             T("SYSTEM")},
879         {FILE_ATTRIBUTE_DIRECTORY,          T("DIRECTORY")},
880         {FILE_ATTRIBUTE_ARCHIVE,            T("ARCHIVE")},
881         {FILE_ATTRIBUTE_DEVICE,             T("DEVICE")},
882         {FILE_ATTRIBUTE_NORMAL,             T("NORMAL")},
883         {FILE_ATTRIBUTE_TEMPORARY,          T("TEMPORARY")},
884         {FILE_ATTRIBUTE_SPARSE_FILE,        T("SPARSE_FILE")},
885         {FILE_ATTRIBUTE_REPARSE_POINT,      T("REPARSE_POINT")},
886         {FILE_ATTRIBUTE_COMPRESSED,         T("COMPRESSED")},
887         {FILE_ATTRIBUTE_OFFLINE,            T("OFFLINE")},
888         {FILE_ATTRIBUTE_NOT_CONTENT_INDEXED,T("NOT_CONTENT_INDEXED")},
889         {FILE_ATTRIBUTE_ENCRYPTED,          T("ENCRYPTED")},
890         {FILE_ATTRIBUTE_VIRTUAL,            T("VIRTUAL")},
891 };
892
893 /* Prints a directory entry.  @lookup_table is a pointer to the lookup table, if
894  * available.  If the dentry is unresolved and the lookup table is NULL, the
895  * lookup table entries will not be printed.  Otherwise, they will be. */
896 int
897 print_dentry(struct wim_dentry *dentry, void *lookup_table)
898 {
899         const u8 *hash;
900         struct wim_lookup_table_entry *lte;
901         const struct wim_inode *inode = dentry->d_inode;
902         tchar buf[50];
903
904         tprintf(T("[DENTRY]\n"));
905         tprintf(T("Length            = %"PRIu64"\n"), dentry->length);
906         tprintf(T("Attributes        = 0x%x\n"), inode->i_attributes);
907         for (size_t i = 0; i < ARRAY_LEN(file_attr_flags); i++)
908                 if (file_attr_flags[i].flag & inode->i_attributes)
909                         tprintf(T("    FILE_ATTRIBUTE_%"TS" is set\n"),
910                                 file_attr_flags[i].name);
911         tprintf(T("Security ID       = %d\n"), inode->i_security_id);
912         tprintf(T("Subdir offset     = %"PRIu64"\n"), dentry->subdir_offset);
913
914         wim_timestamp_to_str(inode->i_creation_time, buf, sizeof(buf));
915         tprintf(T("Creation Time     = %"TS"\n"), buf);
916
917         wim_timestamp_to_str(inode->i_last_access_time, buf, sizeof(buf));
918         tprintf(T("Last Access Time  = %"TS"\n"), buf);
919
920         wim_timestamp_to_str(inode->i_last_write_time, buf, sizeof(buf));
921         tprintf(T("Last Write Time   = %"TS"\n"), buf);
922
923         if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
924                 tprintf(T("Reparse Tag       = 0x%"PRIx32"\n"), inode->i_reparse_tag);
925                 tprintf(T("Reparse Point Flags = 0x%"PRIx16"\n"),
926                         inode->i_not_rpfixed);
927                 tprintf(T("Reparse Point Unknown 2 = 0x%"PRIx32"\n"),
928                         inode->i_rp_unknown_2);
929         }
930         tprintf(T("Reparse Point Unknown 1 = 0x%"PRIx32"\n"),
931                 inode->i_rp_unknown_1);
932         tprintf(T("Hard Link Group   = 0x%"PRIx64"\n"), inode->i_ino);
933         tprintf(T("Hard Link Group Size = %"PRIu32"\n"), inode->i_nlink);
934         tprintf(T("Number of Alternate Data Streams = %hu\n"), inode->i_num_ads);
935         if (dentry_has_long_name(dentry))
936                 wimlib_printf(T("Filename = \"%"WS"\"\n"), dentry->file_name);
937         if (dentry_has_short_name(dentry))
938                 wimlib_printf(T("Short Name \"%"WS"\"\n"), dentry->short_name);
939         if (dentry->_full_path)
940                 tprintf(T("Full Path = \"%"TS"\"\n"), dentry->_full_path);
941
942         lte = inode_stream_lte(dentry->d_inode, 0, lookup_table);
943         if (lte) {
944                 print_lookup_table_entry(lte, stdout);
945         } else {
946                 hash = inode_stream_hash(inode, 0);
947                 if (hash) {
948                         tprintf(T("Hash              = 0x"));
949                         print_hash(hash, stdout);
950                         tputchar(T('\n'));
951                         tputchar(T('\n'));
952                 }
953         }
954         for (u16 i = 0; i < inode->i_num_ads; i++) {
955                 tprintf(T("[Alternate Stream Entry %u]\n"), i);
956                 wimlib_printf(T("Name = \"%"WS"\"\n"),
957                               inode->i_ads_entries[i].stream_name);
958                 tprintf(T("Name Length (UTF16 bytes) = %hu\n"),
959                        inode->i_ads_entries[i].stream_name_nbytes);
960                 hash = inode_stream_hash(inode, i + 1);
961                 if (hash) {
962                         tprintf(T("Hash              = 0x"));
963                         print_hash(hash, stdout);
964                         tputchar(T('\n'));
965                 }
966                 print_lookup_table_entry(inode_stream_lte(inode, i + 1, lookup_table),
967                                          stdout);
968         }
969         return 0;
970 }
971
972 /* Initializations done on every `struct wim_dentry'. */
973 static void
974 dentry_common_init(struct wim_dentry *dentry)
975 {
976         memset(dentry, 0, sizeof(struct wim_dentry));
977 }
978
979 struct wim_inode *
980 new_timeless_inode(void)
981 {
982         struct wim_inode *inode = CALLOC(1, sizeof(struct wim_inode));
983         if (inode) {
984                 inode->i_security_id = -1;
985                 inode->i_nlink = 1;
986                 inode->i_next_stream_id = 1;
987                 inode->i_not_rpfixed = 1;
988                 INIT_LIST_HEAD(&inode->i_list);
989         #ifdef WITH_FUSE
990                 if (pthread_mutex_init(&inode->i_mutex, NULL) != 0) {
991                         ERROR_WITH_ERRNO("Error initializing mutex");
992                         FREE(inode);
993                         return NULL;
994                 }
995         #endif
996                 INIT_LIST_HEAD(&inode->i_dentry);
997         }
998         return inode;
999 }
1000
1001 static struct wim_inode *
1002 new_inode(void)
1003 {
1004         struct wim_inode *inode = new_timeless_inode();
1005         if (inode) {
1006                 u64 now = get_wim_timestamp();
1007                 inode->i_creation_time = now;
1008                 inode->i_last_access_time = now;
1009                 inode->i_last_write_time = now;
1010         }
1011         return inode;
1012 }
1013
1014 /* Creates an unlinked directory entry. */
1015 int
1016 new_dentry(const tchar *name, struct wim_dentry **dentry_ret)
1017 {
1018         struct wim_dentry *dentry;
1019         int ret;
1020
1021         dentry = MALLOC(sizeof(struct wim_dentry));
1022         if (!dentry)
1023                 return WIMLIB_ERR_NOMEM;
1024
1025         dentry_common_init(dentry);
1026         ret = set_dentry_name(dentry, name);
1027         if (ret == 0) {
1028                 dentry->parent = dentry;
1029                 *dentry_ret = dentry;
1030         } else {
1031                 FREE(dentry);
1032                 ERROR("Failed to set name on new dentry with name \"%"TS"\"",
1033                       name);
1034         }
1035         return ret;
1036 }
1037
1038
1039 static int
1040 _new_dentry_with_inode(const tchar *name, struct wim_dentry **dentry_ret,
1041                         bool timeless)
1042 {
1043         struct wim_dentry *dentry;
1044         int ret;
1045
1046         ret = new_dentry(name, &dentry);
1047         if (ret)
1048                 return ret;
1049
1050         if (timeless)
1051                 dentry->d_inode = new_timeless_inode();
1052         else
1053                 dentry->d_inode = new_inode();
1054         if (!dentry->d_inode) {
1055                 free_dentry(dentry);
1056                 return WIMLIB_ERR_NOMEM;
1057         }
1058
1059         inode_add_dentry(dentry, dentry->d_inode);
1060         *dentry_ret = dentry;
1061         return 0;
1062 }
1063
1064 int
1065 new_dentry_with_timeless_inode(const tchar *name, struct wim_dentry **dentry_ret)
1066 {
1067         return _new_dentry_with_inode(name, dentry_ret, true);
1068 }
1069
1070 int
1071 new_dentry_with_inode(const tchar *name, struct wim_dentry **dentry_ret)
1072 {
1073         return _new_dentry_with_inode(name, dentry_ret, false);
1074 }
1075
1076 int
1077 new_filler_directory(const tchar *name, struct wim_dentry **dentry_ret)
1078 {
1079         int ret;
1080         struct wim_dentry *dentry;
1081
1082         DEBUG("Creating filler directory \"%"TS"\"", name);
1083         ret = new_dentry_with_inode(name, &dentry);
1084         if (ret)
1085                 return ret;
1086         /* Leave the inode number as 0; this is allowed for non
1087          * hard-linked files. */
1088         dentry->d_inode->i_resolved = 1;
1089         dentry->d_inode->i_attributes = FILE_ATTRIBUTE_DIRECTORY;
1090         *dentry_ret = dentry;
1091         return 0;
1092 }
1093
1094 static int
1095 init_ads_entry(struct wim_ads_entry *ads_entry, const void *name,
1096                size_t name_nbytes, bool is_utf16le)
1097 {
1098         int ret = 0;
1099         memset(ads_entry, 0, sizeof(*ads_entry));
1100
1101         if (is_utf16le) {
1102                 utf16lechar *p = MALLOC(name_nbytes + sizeof(utf16lechar));
1103                 if (!p)
1104                         return WIMLIB_ERR_NOMEM;
1105                 memcpy(p, name, name_nbytes);
1106                 p[name_nbytes / 2] = cpu_to_le16(0);
1107                 ads_entry->stream_name = p;
1108                 ads_entry->stream_name_nbytes = name_nbytes;
1109         } else {
1110                 if (name && *(const tchar*)name != T('\0')) {
1111                         ret = get_utf16le_name(name, &ads_entry->stream_name,
1112                                                &ads_entry->stream_name_nbytes);
1113                 }
1114         }
1115         return ret;
1116 }
1117
1118 static void
1119 destroy_ads_entry(struct wim_ads_entry *ads_entry)
1120 {
1121         FREE(ads_entry->stream_name);
1122 }
1123
1124 /* Frees an inode. */
1125 void
1126 free_inode(struct wim_inode *inode)
1127 {
1128         if (inode) {
1129                 if (inode->i_ads_entries) {
1130                         for (u16 i = 0; i < inode->i_num_ads; i++)
1131                                 destroy_ads_entry(&inode->i_ads_entries[i]);
1132                         FREE(inode->i_ads_entries);
1133                 }
1134         #ifdef WITH_FUSE
1135                 wimlib_assert(inode->i_num_opened_fds == 0);
1136                 FREE(inode->i_fds);
1137                 pthread_mutex_destroy(&inode->i_mutex);
1138         #endif
1139                 /* HACK: This may instead delete the inode from i_list, but the
1140                  * hlist_del() behaves the same as list_del(). */
1141                 if (!hlist_unhashed(&inode->i_hlist))
1142                         hlist_del(&inode->i_hlist);
1143                 FREE(inode->i_extracted_file);
1144                 FREE(inode);
1145         }
1146 }
1147
1148 /* Decrements link count on an inode and frees it if the link count reaches 0.
1149  * */
1150 static void
1151 put_inode(struct wim_inode *inode)
1152 {
1153         wimlib_assert(inode->i_nlink != 0);
1154         if (--inode->i_nlink == 0) {
1155         #ifdef WITH_FUSE
1156                 if (inode->i_num_opened_fds == 0)
1157         #endif
1158                 {
1159                         free_inode(inode);
1160                 }
1161         }
1162 }
1163
1164 /* Frees a WIM dentry.
1165  *
1166  * The corresponding inode (if any) is freed only if its link count is
1167  * decremented to 0.
1168  */
1169 void
1170 free_dentry(struct wim_dentry *dentry)
1171 {
1172         if (dentry) {
1173                 FREE(dentry->file_name);
1174                 FREE(dentry->short_name);
1175                 FREE(dentry->_full_path);
1176                 if (dentry->d_inode)
1177                         put_inode(dentry->d_inode);
1178                 FREE(dentry);
1179         }
1180 }
1181
1182 /* This function is passed as an argument to for_dentry_in_tree_depth() in order
1183  * to free a directory tree. */
1184 static int
1185 do_free_dentry(struct wim_dentry *dentry, void *_lookup_table)
1186 {
1187         struct wim_lookup_table *lookup_table = _lookup_table;
1188
1189         if (lookup_table) {
1190                 struct wim_inode *inode = dentry->d_inode;
1191                 for (unsigned i = 0; i <= inode->i_num_ads; i++) {
1192                         struct wim_lookup_table_entry *lte;
1193
1194                         lte = inode_stream_lte(inode, i, lookup_table);
1195                         if (lte)
1196                                 lte_decrement_refcnt(lte, lookup_table);
1197                 }
1198         }
1199         free_dentry(dentry);
1200         return 0;
1201 }
1202
1203 /*
1204  * Unlinks and frees a dentry tree.
1205  *
1206  * @root:               The root of the tree.
1207  * @lookup_table:       The lookup table for dentries.  If non-NULL, the
1208  *                      reference counts in the lookup table for the lookup
1209  *                      table entries corresponding to the dentries will be
1210  *                      decremented.
1211  */
1212 void
1213 free_dentry_tree(struct wim_dentry *root, struct wim_lookup_table *lookup_table)
1214 {
1215         for_dentry_in_tree_depth(root, do_free_dentry, lookup_table);
1216 }
1217
1218 #ifdef __WIN32__
1219
1220 /* Insert a dentry into the case insensitive index for a directory.
1221  *
1222  * This is a red-black tree, but when multiple dentries share the same
1223  * case-insensitive name, only one is inserted into the tree itself; the rest
1224  * are connected in a list.
1225  */
1226 static struct wim_dentry *
1227 dentry_add_child_case_insensitive(struct wim_dentry *parent,
1228                                   struct wim_dentry *child)
1229 {
1230         struct rb_root *root;
1231         struct rb_node **new;
1232         struct rb_node *rb_parent;
1233
1234         root = &parent->d_inode->i_children_case_insensitive;
1235         new = &root->rb_node;
1236         rb_parent = NULL;
1237         while (*new) {
1238                 struct wim_dentry *this = container_of(*new, struct wim_dentry,
1239                                                        rb_node_case_insensitive);
1240                 int result = dentry_compare_names_case_insensitive(child, this);
1241
1242                 rb_parent = *new;
1243
1244                 if (result < 0)
1245                         new = &((*new)->rb_left);
1246                 else if (result > 0)
1247                         new = &((*new)->rb_right);
1248                 else
1249                         return this;
1250         }
1251         rb_link_node(&child->rb_node_case_insensitive, rb_parent, new);
1252         rb_insert_color(&child->rb_node_case_insensitive, root);
1253         return NULL;
1254 }
1255 #endif
1256
1257 /*
1258  * Links a dentry into the directory tree.
1259  *
1260  * @parent: The dentry that will be the parent of @child.
1261  * @child: The dentry to link.
1262  *
1263  * Returns NULL if successful.  If @parent already contains a dentry with the
1264  * same case-sensitive name as @child, the pointer to this duplicate dentry is
1265  * returned.
1266  */
1267 struct wim_dentry *
1268 dentry_add_child(struct wim_dentry * restrict parent,
1269                  struct wim_dentry * restrict child)
1270 {
1271         struct rb_root *root;
1272         struct rb_node **new;
1273         struct rb_node *rb_parent;
1274
1275         wimlib_assert(dentry_is_directory(parent));
1276         wimlib_assert(parent != child);
1277
1278         /* Case sensitive child dentry index */
1279         root = &parent->d_inode->i_children;
1280         new = &root->rb_node;
1281         rb_parent = NULL;
1282         while (*new) {
1283                 struct wim_dentry *this = rbnode_dentry(*new);
1284                 int result = dentry_compare_names_case_sensitive(child, this);
1285
1286                 rb_parent = *new;
1287
1288                 if (result < 0)
1289                         new = &((*new)->rb_left);
1290                 else if (result > 0)
1291                         new = &((*new)->rb_right);
1292                 else
1293                         return this;
1294         }
1295         child->parent = parent;
1296         rb_link_node(&child->rb_node, rb_parent, new);
1297         rb_insert_color(&child->rb_node, root);
1298
1299 #ifdef __WIN32__
1300         {
1301                 struct wim_dentry *existing;
1302                 existing = dentry_add_child_case_insensitive(parent, child);
1303                 if (existing) {
1304                         list_add(&child->case_insensitive_conflict_list,
1305                                  &existing->case_insensitive_conflict_list);
1306                         child->rb_node_case_insensitive.__rb_parent_color = 0;
1307                 } else {
1308                         INIT_LIST_HEAD(&child->case_insensitive_conflict_list);
1309                 }
1310         }
1311 #endif
1312         return NULL;
1313 }
1314
1315 /* Unlink a WIM dentry from the directory entry tree. */
1316 void
1317 unlink_dentry(struct wim_dentry *dentry)
1318 {
1319         struct wim_dentry *parent = dentry->parent;
1320
1321         if (parent == dentry)
1322                 return;
1323         rb_erase(&dentry->rb_node, &parent->d_inode->i_children);
1324 #ifdef __WIN32__
1325         if (dentry->rb_node_case_insensitive.__rb_parent_color) {
1326                 /* This dentry was in the case-insensitive red-black tree. */
1327                 rb_erase(&dentry->rb_node_case_insensitive,
1328                          &parent->d_inode->i_children_case_insensitive);
1329                 if (!list_empty(&dentry->case_insensitive_conflict_list)) {
1330                         /* Make a different case-insensitively-the-same dentry
1331                          * be the "representative" in the red-black tree. */
1332                         struct list_head *next;
1333                         struct wim_dentry *other;
1334                         struct wim_dentry *existing;
1335
1336                         next = dentry->case_insensitive_conflict_list.next;
1337                         other = list_entry(next, struct wim_dentry, case_insensitive_conflict_list);
1338                         existing = dentry_add_child_case_insensitive(parent, other);
1339                         wimlib_assert(existing == NULL);
1340                 }
1341         }
1342         list_del(&dentry->case_insensitive_conflict_list);
1343 #endif
1344 }
1345
1346 /*
1347  * Returns the alternate data stream entry belonging to @inode that has the
1348  * stream name @stream_name.
1349  */
1350 struct wim_ads_entry *
1351 inode_get_ads_entry(struct wim_inode *inode, const tchar *stream_name,
1352                     u16 *idx_ret)
1353 {
1354         if (inode->i_num_ads == 0) {
1355                 return NULL;
1356         } else {
1357                 size_t stream_name_utf16le_nbytes;
1358                 u16 i;
1359                 struct wim_ads_entry *result;
1360
1361         #if TCHAR_IS_UTF16LE
1362                 const utf16lechar *stream_name_utf16le;
1363
1364                 stream_name_utf16le = stream_name;
1365                 stream_name_utf16le_nbytes = tstrlen(stream_name) * sizeof(tchar);
1366         #else
1367                 utf16lechar *stream_name_utf16le;
1368
1369                 {
1370                         int ret = tstr_to_utf16le(stream_name,
1371                                                   tstrlen(stream_name) *
1372                                                       sizeof(tchar),
1373                                                   &stream_name_utf16le,
1374                                                   &stream_name_utf16le_nbytes);
1375                         if (ret)
1376                                 return NULL;
1377                 }
1378         #endif
1379                 i = 0;
1380                 result = NULL;
1381                 do {
1382                         if (ads_entry_has_name(&inode->i_ads_entries[i],
1383                                                stream_name_utf16le,
1384                                                stream_name_utf16le_nbytes))
1385                         {
1386                                 if (idx_ret)
1387                                         *idx_ret = i;
1388                                 result = &inode->i_ads_entries[i];
1389                                 break;
1390                         }
1391                 } while (++i != inode->i_num_ads);
1392         #if !TCHAR_IS_UTF16LE
1393                 FREE(stream_name_utf16le);
1394         #endif
1395                 return result;
1396         }
1397 }
1398
1399 static struct wim_ads_entry *
1400 do_inode_add_ads(struct wim_inode *inode, const void *stream_name,
1401                  size_t stream_name_nbytes, bool is_utf16le)
1402 {
1403         u16 num_ads;
1404         struct wim_ads_entry *ads_entries;
1405         struct wim_ads_entry *new_entry;
1406
1407         if (inode->i_num_ads >= 0xfffe) {
1408                 ERROR("Too many alternate data streams in one inode!");
1409                 return NULL;
1410         }
1411         num_ads = inode->i_num_ads + 1;
1412         ads_entries = REALLOC(inode->i_ads_entries,
1413                               num_ads * sizeof(inode->i_ads_entries[0]));
1414         if (!ads_entries) {
1415                 ERROR("Failed to allocate memory for new alternate data stream");
1416                 return NULL;
1417         }
1418         inode->i_ads_entries = ads_entries;
1419
1420         new_entry = &inode->i_ads_entries[num_ads - 1];
1421         if (init_ads_entry(new_entry, stream_name, stream_name_nbytes, is_utf16le))
1422                 return NULL;
1423         new_entry->stream_id = inode->i_next_stream_id++;
1424         inode->i_num_ads = num_ads;
1425         return new_entry;
1426 }
1427
1428 struct wim_ads_entry *
1429 inode_add_ads_utf16le(struct wim_inode *inode,
1430                       const utf16lechar *stream_name,
1431                       size_t stream_name_nbytes)
1432 {
1433         DEBUG("Add alternate data stream \"%"WS"\"", stream_name);
1434         return do_inode_add_ads(inode, stream_name, stream_name_nbytes, true);
1435 }
1436
1437 /*
1438  * Add an alternate stream entry to a WIM inode and return a pointer to it, or
1439  * NULL if memory could not be allocated.
1440  */
1441 struct wim_ads_entry *
1442 inode_add_ads(struct wim_inode *inode, const tchar *stream_name)
1443 {
1444         DEBUG("Add alternate data stream \"%"TS"\"", stream_name);
1445         return do_inode_add_ads(inode, stream_name,
1446                                 tstrlen(stream_name) * sizeof(tchar),
1447                                 TCHAR_IS_UTF16LE);
1448 }
1449
1450 static struct wim_lookup_table_entry *
1451 add_stream_from_data_buffer(const void *buffer, size_t size,
1452                             struct wim_lookup_table *lookup_table)
1453 {
1454         u8 hash[SHA1_HASH_SIZE];
1455         struct wim_lookup_table_entry *lte, *existing_lte;
1456
1457         sha1_buffer(buffer, size, hash);
1458         existing_lte = __lookup_resource(lookup_table, hash);
1459         if (existing_lte) {
1460                 wimlib_assert(wim_resource_size(existing_lte) == size);
1461                 lte = existing_lte;
1462                 lte->refcnt++;
1463         } else {
1464                 void *buffer_copy;
1465                 lte = new_lookup_table_entry();
1466                 if (!lte)
1467                         return NULL;
1468                 buffer_copy = memdup(buffer, size);
1469                 if (!buffer_copy) {
1470                         free_lookup_table_entry(lte);
1471                         return NULL;
1472                 }
1473                 lte->resource_location            = RESOURCE_IN_ATTACHED_BUFFER;
1474                 lte->attached_buffer              = buffer_copy;
1475                 lte->resource_entry.original_size = size;
1476                 copy_hash(lte->hash, hash);
1477                 lookup_table_insert(lookup_table, lte);
1478         }
1479         return lte;
1480 }
1481
1482 int
1483 inode_add_ads_with_data(struct wim_inode *inode, const tchar *name,
1484                         const void *value, size_t size,
1485                         struct wim_lookup_table *lookup_table)
1486 {
1487         struct wim_ads_entry *new_ads_entry;
1488
1489         wimlib_assert(inode->i_resolved);
1490
1491         new_ads_entry = inode_add_ads(inode, name);
1492         if (!new_ads_entry)
1493                 return WIMLIB_ERR_NOMEM;
1494
1495         new_ads_entry->lte = add_stream_from_data_buffer(value, size,
1496                                                          lookup_table);
1497         if (!new_ads_entry->lte) {
1498                 inode_remove_ads(inode, new_ads_entry - inode->i_ads_entries,
1499                                  lookup_table);
1500                 return WIMLIB_ERR_NOMEM;
1501         }
1502         return 0;
1503 }
1504
1505 /* Set the unnamed stream of a WIM inode, given a data buffer containing the
1506  * stream contents. */
1507 int
1508 inode_set_unnamed_stream(struct wim_inode *inode, const void *data, size_t len,
1509                          struct wim_lookup_table *lookup_table)
1510 {
1511         inode->i_lte = add_stream_from_data_buffer(data, len, lookup_table);
1512         if (!inode->i_lte)
1513                 return WIMLIB_ERR_NOMEM;
1514         inode->i_resolved = 1;
1515         return 0;
1516 }
1517
1518 /* Remove an alternate data stream from a WIM inode  */
1519 void
1520 inode_remove_ads(struct wim_inode *inode, u16 idx,
1521                  struct wim_lookup_table *lookup_table)
1522 {
1523         struct wim_ads_entry *ads_entry;
1524         struct wim_lookup_table_entry *lte;
1525
1526         wimlib_assert(idx < inode->i_num_ads);
1527         wimlib_assert(inode->i_resolved);
1528
1529         ads_entry = &inode->i_ads_entries[idx];
1530
1531         DEBUG("Remove alternate data stream \"%"WS"\"", ads_entry->stream_name);
1532
1533         lte = ads_entry->lte;
1534         if (lte)
1535                 lte_decrement_refcnt(lte, lookup_table);
1536
1537         destroy_ads_entry(ads_entry);
1538
1539         memmove(&inode->i_ads_entries[idx],
1540                 &inode->i_ads_entries[idx + 1],
1541                 (inode->i_num_ads - idx - 1) * sizeof(inode->i_ads_entries[0]));
1542         inode->i_num_ads--;
1543 }
1544
1545 #ifndef __WIN32__
1546 int
1547 inode_get_unix_data(const struct wim_inode *inode,
1548                     struct wimlib_unix_data *unix_data,
1549                     u16 *stream_idx_ret)
1550 {
1551         const struct wim_ads_entry *ads_entry;
1552         const struct wim_lookup_table_entry *lte;
1553         size_t size;
1554         int ret;
1555
1556         wimlib_assert(inode->i_resolved);
1557
1558         ads_entry = inode_get_ads_entry((struct wim_inode*)inode,
1559                                         WIMLIB_UNIX_DATA_TAG, NULL);
1560         if (!ads_entry)
1561                 return NO_UNIX_DATA;
1562
1563         if (stream_idx_ret)
1564                 *stream_idx_ret = ads_entry - inode->i_ads_entries;
1565
1566         lte = ads_entry->lte;
1567         if (!lte)
1568                 return NO_UNIX_DATA;
1569
1570         size = wim_resource_size(lte);
1571         if (size != sizeof(struct wimlib_unix_data))
1572                 return BAD_UNIX_DATA;
1573
1574         ret = read_full_resource_into_buf(lte, unix_data);
1575         if (ret)
1576                 return ret;
1577
1578         if (unix_data->version != 0)
1579                 return BAD_UNIX_DATA;
1580         return 0;
1581 }
1582
1583 int
1584 inode_set_unix_data(struct wim_inode *inode, uid_t uid, gid_t gid, mode_t mode,
1585                     struct wim_lookup_table *lookup_table, int which)
1586 {
1587         struct wimlib_unix_data unix_data;
1588         int ret;
1589         bool have_good_unix_data = false;
1590         bool have_unix_data = false;
1591         u16 stream_idx;
1592
1593         if (!(which & UNIX_DATA_CREATE)) {
1594                 ret = inode_get_unix_data(inode, &unix_data, &stream_idx);
1595                 if (ret == 0 || ret == BAD_UNIX_DATA || ret > 0)
1596                         have_unix_data = true;
1597                 if (ret == 0)
1598                         have_good_unix_data = true;
1599         }
1600         unix_data.version = 0;
1601         if (which & UNIX_DATA_UID || !have_good_unix_data)
1602                 unix_data.uid = uid;
1603         if (which & UNIX_DATA_GID || !have_good_unix_data)
1604                 unix_data.gid = gid;
1605         if (which & UNIX_DATA_MODE || !have_good_unix_data)
1606                 unix_data.mode = mode;
1607         ret = inode_add_ads_with_data(inode, WIMLIB_UNIX_DATA_TAG,
1608                                       &unix_data,
1609                                       sizeof(struct wimlib_unix_data),
1610                                       lookup_table);
1611         if (ret == 0 && have_unix_data)
1612                 inode_remove_ads(inode, stream_idx, lookup_table);
1613         return ret;
1614 }
1615 #endif /* !__WIN32__ */
1616
1617 /*
1618  * Reads the alternate data stream entries of a WIM dentry.
1619  *
1620  * @p:  Pointer to buffer that starts with the first alternate stream entry.
1621  *
1622  * @inode:      Inode to load the alternate data streams into.
1623  *              @inode->i_num_ads must have been set to the number of
1624  *              alternate data streams that are expected.
1625  *
1626  * @remaining_size:     Number of bytes of data remaining in the buffer pointed
1627  *                      to by @p.
1628  *
1629  *
1630  * Return 0 on success or nonzero on failure.  On success, inode->i_ads_entries
1631  * is set to an array of `struct wim_ads_entry's of length inode->i_num_ads.  On
1632  * failure, @inode is not modified.
1633  */
1634 static int
1635 read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode,
1636                  size_t nbytes_remaining)
1637 {
1638         u16 num_ads;
1639         struct wim_ads_entry *ads_entries;
1640         int ret;
1641
1642         BUILD_BUG_ON(sizeof(struct wim_ads_entry_on_disk) != WIM_ADS_ENTRY_DISK_SIZE);
1643
1644         /* Allocate an array for our in-memory representation of the alternate
1645          * data stream entries. */
1646         num_ads = inode->i_num_ads;
1647         ads_entries = CALLOC(num_ads, sizeof(inode->i_ads_entries[0]));
1648         if (!ads_entries)
1649                 goto out_of_memory;
1650
1651         /* Read the entries into our newly allocated buffer. */
1652         for (u16 i = 0; i < num_ads; i++) {
1653                 u64 length;
1654                 struct wim_ads_entry *cur_entry;
1655                 const struct wim_ads_entry_on_disk *disk_entry =
1656                         (const struct wim_ads_entry_on_disk*)p;
1657
1658                 cur_entry = &ads_entries[i];
1659                 ads_entries[i].stream_id = i + 1;
1660
1661                 /* Do we have at least the size of the fixed-length data we know
1662                  * need? */
1663                 if (nbytes_remaining < sizeof(struct wim_ads_entry_on_disk))
1664                         goto out_invalid;
1665
1666                 /* Read the length field */
1667                 length = le64_to_cpu(disk_entry->length);
1668
1669                 /* Make sure the length field is neither so small it doesn't
1670                  * include all the fixed-length data nor so large it overflows
1671                  * the metadata resource buffer. */
1672                 if (length < sizeof(struct wim_ads_entry_on_disk) ||
1673                     length > nbytes_remaining)
1674                         goto out_invalid;
1675
1676                 /* Read the rest of the fixed-length data. */
1677
1678                 cur_entry->reserved = le64_to_cpu(disk_entry->reserved);
1679                 copy_hash(cur_entry->hash, disk_entry->hash);
1680                 cur_entry->stream_name_nbytes = le16_to_cpu(disk_entry->stream_name_nbytes);
1681
1682                 /* If stream_name_nbytes != 0, this is a named stream.
1683                  * Otherwise this is an unnamed stream, or in some cases (bugs
1684                  * in Microsoft's software I guess) a meaningless entry
1685                  * distinguished from the real unnamed stream entry, if any, by
1686                  * the fact that the real unnamed stream entry has a nonzero
1687                  * hash field. */
1688                 if (cur_entry->stream_name_nbytes) {
1689                         /* The name is encoded in UTF16-LE, which uses 2-byte
1690                          * coding units, so the length of the name had better be
1691                          * an even number of bytes... */
1692                         if (cur_entry->stream_name_nbytes & 1)
1693                                 goto out_invalid;
1694
1695                         /* Add the length of the stream name to get the length
1696                          * we actually need to read.  Make sure this isn't more
1697                          * than the specified length of the entry. */
1698                         if (sizeof(struct wim_ads_entry_on_disk) +
1699                             cur_entry->stream_name_nbytes > length)
1700                                 goto out_invalid;
1701
1702                         cur_entry->stream_name = MALLOC(cur_entry->stream_name_nbytes + 2);
1703                         if (!cur_entry->stream_name)
1704                                 goto out_of_memory;
1705
1706                         memcpy(cur_entry->stream_name,
1707                                disk_entry->stream_name,
1708                                cur_entry->stream_name_nbytes);
1709                         cur_entry->stream_name[cur_entry->stream_name_nbytes / 2] = cpu_to_le16(0);
1710                 }
1711
1712                 /* It's expected that the size of every ADS entry is a multiple
1713                  * of 8.  However, to be safe, I'm allowing the possibility of
1714                  * an ADS entry at the very end of the metadata resource ending
1715                  * un-aligned.  So although we still need to increment the input
1716                  * pointer by @length to reach the next ADS entry, it's possible
1717                  * that less than @length is actually remaining in the metadata
1718                  * resource. We should set the remaining bytes to 0 if this
1719                  * happens. */
1720                 length = (length + 7) & ~(u64)7;
1721                 p += length;
1722                 if (nbytes_remaining < length)
1723                         nbytes_remaining = 0;
1724                 else
1725                         nbytes_remaining -= length;
1726         }
1727         inode->i_ads_entries = ads_entries;
1728         inode->i_next_stream_id = inode->i_num_ads + 1;
1729         ret = 0;
1730         goto out;
1731 out_of_memory:
1732         ret = WIMLIB_ERR_NOMEM;
1733         goto out_free_ads_entries;
1734 out_invalid:
1735         ERROR("An alternate data stream entry is invalid");
1736         ret = WIMLIB_ERR_INVALID_DENTRY;
1737 out_free_ads_entries:
1738         if (ads_entries) {
1739                 for (u16 i = 0; i < num_ads; i++)
1740                         destroy_ads_entry(&ads_entries[i]);
1741                 FREE(ads_entries);
1742         }
1743 out:
1744         return ret;
1745 }
1746
1747 /*
1748  * Reads a WIM directory entry, including all alternate data stream entries that
1749  * follow it, from the WIM image's metadata resource.
1750  *
1751  * @metadata_resource:
1752  *              Pointer to the metadata resource buffer.
1753  *
1754  * @metadata_resource_len:
1755  *              Length of the metadata resource buffer, in bytes.
1756  *
1757  * @offset:     Offset of the dentry within the metadata resource.
1758  *
1759  * @dentry:     A `struct wim_dentry' that will be filled in by this function.
1760  *
1761  * Return 0 on success or nonzero on failure.  On failure, @dentry will have
1762  * been modified, but it will not be left with pointers to any allocated
1763  * buffers.  On success, the dentry->length field must be examined.  If zero,
1764  * this was a special "end of directory" dentry and not a real dentry.  If
1765  * nonzero, this was a real dentry.
1766  *
1767  * Possible errors include:
1768  *      WIMLIB_ERR_NOMEM
1769  *      WIMLIB_ERR_INVALID_DENTRY
1770  */
1771 int
1772 read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len,
1773             u64 offset, struct wim_dentry * restrict dentry)
1774 {
1775
1776         u64 calculated_size;
1777         utf16lechar *file_name;
1778         utf16lechar *short_name;
1779         u16 short_name_nbytes;
1780         u16 file_name_nbytes;
1781         int ret;
1782         struct wim_inode *inode;
1783         const u8 *p = &metadata_resource[offset];
1784         const struct wim_dentry_on_disk *disk_dentry =
1785                         (const struct wim_dentry_on_disk*)p;
1786
1787         BUILD_BUG_ON(sizeof(struct wim_dentry_on_disk) != WIM_DENTRY_DISK_SIZE);
1788
1789         if ((uintptr_t)p & 7)
1790                 WARNING("WIM dentry is not 8-byte aligned");
1791
1792         dentry_common_init(dentry);
1793
1794         /* Before reading the whole dentry, we need to read just the length.
1795          * This is because a dentry of length 8 (that is, just the length field)
1796          * terminates the list of sibling directory entries. */
1797         if (offset + sizeof(u64) > metadata_resource_len ||
1798             offset + sizeof(u64) < offset)
1799         {
1800                 ERROR("Directory entry starting at %"PRIu64" ends past the "
1801                       "end of the metadata resource (size %"PRIu64")",
1802                       offset, metadata_resource_len);
1803                 return WIMLIB_ERR_INVALID_DENTRY;
1804         }
1805         dentry->length = le64_to_cpu(disk_dentry->length);
1806
1807         /* A zero length field (really a length of 8, since that's how big the
1808          * directory entry is...) indicates that this is the end of directory
1809          * dentry.  We do not read it into memory as an actual dentry, so just
1810          * return successfully in this case. */
1811         if (dentry->length == 8)
1812                 dentry->length = 0;
1813         if (dentry->length == 0)
1814                 return 0;
1815
1816         /* Now that we have the actual length provided in the on-disk structure,
1817          * again make sure it doesn't overflow the metadata resource buffer. */
1818         if (offset + dentry->length > metadata_resource_len ||
1819             offset + dentry->length < offset)
1820         {
1821                 ERROR("Directory entry at offset %"PRIu64" and with size "
1822                       "%"PRIu64" ends past the end of the metadata resource "
1823                       "(size %"PRIu64")",
1824                       offset, dentry->length, metadata_resource_len);
1825                 return WIMLIB_ERR_INVALID_DENTRY;
1826         }
1827
1828         /* Make sure the dentry length is at least as large as the number of
1829          * fixed-length fields */
1830         if (dentry->length < sizeof(struct wim_dentry_on_disk)) {
1831                 ERROR("Directory entry has invalid length of %"PRIu64" bytes",
1832                       dentry->length);
1833                 return WIMLIB_ERR_INVALID_DENTRY;
1834         }
1835
1836         /* Allocate a `struct wim_inode' for this `struct wim_dentry'. */
1837         inode = new_timeless_inode();
1838         if (!inode)
1839                 return WIMLIB_ERR_NOMEM;
1840
1841         /* Read more fields; some into the dentry, and some into the inode. */
1842
1843         inode->i_attributes = le32_to_cpu(disk_dentry->attributes);
1844         inode->i_security_id = le32_to_cpu(disk_dentry->security_id);
1845         dentry->subdir_offset = le64_to_cpu(disk_dentry->subdir_offset);
1846         dentry->d_unused_1 = le64_to_cpu(disk_dentry->unused_1);
1847         dentry->d_unused_2 = le64_to_cpu(disk_dentry->unused_2);
1848         inode->i_creation_time = le64_to_cpu(disk_dentry->creation_time);
1849         inode->i_last_access_time = le64_to_cpu(disk_dentry->last_access_time);
1850         inode->i_last_write_time = le64_to_cpu(disk_dentry->last_write_time);
1851         copy_hash(inode->i_hash, disk_dentry->unnamed_stream_hash);
1852
1853         /* I don't know what's going on here.  It seems like M$ screwed up the
1854          * reparse points, then put the fields in the same place and didn't
1855          * document it.  So we have some fields we read for reparse points, and
1856          * some fields in the same place for non-reparse-point.s */
1857         if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
1858                 inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->reparse.rp_unknown_1);
1859                 inode->i_reparse_tag = le32_to_cpu(disk_dentry->reparse.reparse_tag);
1860                 inode->i_rp_unknown_2 = le16_to_cpu(disk_dentry->reparse.rp_unknown_2);
1861                 inode->i_not_rpfixed = le16_to_cpu(disk_dentry->reparse.not_rpfixed);
1862                 /* Leave inode->i_ino at 0.  Note that this means the WIM file
1863                  * cannot archive hard-linked reparse points.  Such a thing
1864                  * doesn't really make sense anyway, although I believe it's
1865                  * theoretically possible to have them on NTFS. */
1866         } else {
1867                 inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->nonreparse.rp_unknown_1);
1868                 inode->i_ino = le64_to_cpu(disk_dentry->nonreparse.hard_link_group_id);
1869         }
1870
1871         inode->i_num_ads = le16_to_cpu(disk_dentry->num_alternate_data_streams);
1872
1873         short_name_nbytes = le16_to_cpu(disk_dentry->short_name_nbytes);
1874         file_name_nbytes = le16_to_cpu(disk_dentry->file_name_nbytes);
1875
1876         if ((short_name_nbytes & 1) | (file_name_nbytes & 1))
1877         {
1878                 ERROR("Dentry name is not valid UTF-16LE (odd number of bytes)!");
1879                 ret = WIMLIB_ERR_INVALID_DENTRY;
1880                 goto out_free_inode;
1881         }
1882
1883         /* We now know the length of the file name and short name.  Make sure
1884          * the length of the dentry is large enough to actually hold them.
1885          *
1886          * The calculated length here is unaligned to allow for the possibility
1887          * that the dentry->length names an unaligned length, although this
1888          * would be unexpected. */
1889         calculated_size = _dentry_correct_length_unaligned(file_name_nbytes,
1890                                                            short_name_nbytes);
1891
1892         if (dentry->length < calculated_size) {
1893                 ERROR("Unexpected end of directory entry! (Expected "
1894                       "at least %"PRIu64" bytes, got %"PRIu64" bytes.)",
1895                       calculated_size, dentry->length);
1896                 ret = WIMLIB_ERR_INVALID_DENTRY;
1897                 goto out_free_inode;
1898         }
1899
1900         p += sizeof(struct wim_dentry_on_disk);
1901
1902         /* Read the filename if present.  Note: if the filename is empty, there
1903          * is no null terminator following it. */
1904         if (file_name_nbytes) {
1905                 file_name = MALLOC(file_name_nbytes + 2);
1906                 if (!file_name) {
1907                         ERROR("Failed to allocate %d bytes for dentry file name",
1908                               file_name_nbytes + 2);
1909                         ret = WIMLIB_ERR_NOMEM;
1910                         goto out_free_inode;
1911                 }
1912                 memcpy(file_name, p, file_name_nbytes);
1913                 p += file_name_nbytes + 2;
1914                 file_name[file_name_nbytes / 2] = cpu_to_le16(0);
1915         } else {
1916                 file_name = NULL;
1917         }
1918
1919
1920         /* Read the short filename if present.  Note: if there is no short
1921          * filename, there is no null terminator following it. */
1922         if (short_name_nbytes) {
1923                 short_name = MALLOC(short_name_nbytes + 2);
1924                 if (!short_name) {
1925                         ERROR("Failed to allocate %d bytes for dentry short name",
1926                               short_name_nbytes + 2);
1927                         ret = WIMLIB_ERR_NOMEM;
1928                         goto out_free_file_name;
1929                 }
1930                 memcpy(short_name, p, short_name_nbytes);
1931                 p += short_name_nbytes + 2;
1932                 short_name[short_name_nbytes / 2] = cpu_to_le16(0);
1933         } else {
1934                 short_name = NULL;
1935         }
1936
1937         /* Align the dentry length */
1938         dentry->length = (dentry->length + 7) & ~7;
1939
1940         /*
1941          * Read the alternate data streams, if present.  dentry->num_ads tells
1942          * us how many they are, and they will directly follow the dentry
1943          * on-disk.
1944          *
1945          * Note that each alternate data stream entry begins on an 8-byte
1946          * aligned boundary, and the alternate data stream entries seem to NOT
1947          * be included in the dentry->length field for some reason.
1948          */
1949         if (inode->i_num_ads != 0) {
1950                 ret = WIMLIB_ERR_INVALID_DENTRY;
1951                 if (offset + dentry->length > metadata_resource_len ||
1952                     (ret = read_ads_entries(&metadata_resource[offset + dentry->length],
1953                                             inode,
1954                                             metadata_resource_len - offset - dentry->length)))
1955                 {
1956                         ERROR("Failed to read alternate data stream "
1957                               "entries of WIM dentry \"%"WS"\"", file_name);
1958                         goto out_free_short_name;
1959                 }
1960         }
1961         /* We've read all the data for this dentry.  Set the names and their
1962          * lengths, and we've done. */
1963         dentry->d_inode           = inode;
1964         dentry->file_name         = file_name;
1965         dentry->short_name        = short_name;
1966         dentry->file_name_nbytes  = file_name_nbytes;
1967         dentry->short_name_nbytes = short_name_nbytes;
1968         ret = 0;
1969         goto out;
1970 out_free_short_name:
1971         FREE(short_name);
1972 out_free_file_name:
1973         FREE(file_name);
1974 out_free_inode:
1975         free_inode(inode);
1976 out:
1977         return ret;
1978 }
1979
1980 static const tchar *
1981 dentry_get_file_type_string(const struct wim_dentry *dentry)
1982 {
1983         const struct wim_inode *inode = dentry->d_inode;
1984         if (inode_is_directory(inode))
1985                 return T("directory");
1986         else if (inode_is_symlink(inode))
1987                 return T("symbolic link");
1988         else
1989                 return T("file");
1990 }
1991
1992 /* Reads the children of a dentry, and all their children, ..., etc. from the
1993  * metadata resource and into the dentry tree.
1994  *
1995  * @metadata_resource:  An array that contains the uncompressed metadata
1996  *                      resource for the WIM file.
1997  *
1998  * @metadata_resource_len:  The length of the uncompressed metadata resource, in
1999  *                          bytes.
2000  *
2001  * @dentry:     A pointer to a `struct wim_dentry' that is the root of the directory
2002  *              tree and has already been read from the metadata resource.  It
2003  *              does not need to be the real root because this procedure is
2004  *              called recursively.
2005  *
2006  * Returns zero on success; nonzero on failure.
2007  */
2008 int
2009 read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len,
2010                  struct wim_dentry *dentry)
2011 {
2012         u64 cur_offset = dentry->subdir_offset;
2013         struct wim_dentry *child;
2014         struct wim_dentry *duplicate;
2015         struct wim_dentry *parent;
2016         struct wim_dentry cur_child;
2017         int ret;
2018
2019         /*
2020          * If @dentry has no child dentries, nothing more needs to be done for
2021          * this branch.  This is the case for regular files, symbolic links, and
2022          * *possibly* empty directories (although an empty directory may also
2023          * have one child dentry that is the special end-of-directory dentry)
2024          */
2025         if (cur_offset == 0)
2026                 return 0;
2027
2028         /* Check for cyclic directory structure */
2029         for (parent = dentry->parent; !dentry_is_root(parent); parent = parent->parent)
2030         {
2031                 if (unlikely(parent->subdir_offset == cur_offset)) {
2032                         ERROR("Cyclic directory structure directed: children "
2033                               "of \"%"TS"\" coincide with children of \"%"TS"\"",
2034                               dentry_full_path(dentry),
2035                               dentry_full_path(parent));
2036                         return WIMLIB_ERR_INVALID_DENTRY;
2037                 }
2038         }
2039
2040         /* Find and read all the children of @dentry. */
2041         for (;;) {
2042
2043                 /* Read next child of @dentry into @cur_child. */
2044                 ret = read_dentry(metadata_resource, metadata_resource_len,
2045                                   cur_offset, &cur_child);
2046                 if (ret)
2047                         break;
2048
2049                 /* Check for end of directory. */
2050                 if (cur_child.length == 0)
2051                         break;
2052
2053                 /* Not end of directory.  Allocate this child permanently and
2054                  * link it to the parent and previous child. */
2055                 child = memdup(&cur_child, sizeof(struct wim_dentry));
2056                 if (!child) {
2057                         ERROR("Failed to allocate new dentry!");
2058                         ret = WIMLIB_ERR_NOMEM;
2059                         break;
2060                 }
2061
2062                 /* Advance to the offset of the next child.  Note: We need to
2063                  * advance by the TOTAL length of the dentry, not by the length
2064                  * cur_child.length, which although it does take into account
2065                  * the padding, it DOES NOT take into account alternate stream
2066                  * entries. */
2067                 cur_offset += dentry_total_length(child);
2068
2069                 if (unlikely(!dentry_has_long_name(child))) {
2070                         WARNING("Ignoring unnamed dentry in "
2071                                 "directory \"%"TS"\"",
2072                                 dentry_full_path(dentry));
2073                         free_dentry(child);
2074                         continue;
2075                 }
2076
2077                 duplicate = dentry_add_child(dentry, child);
2078                 if (unlikely(duplicate)) {
2079                         const tchar *child_type, *duplicate_type;
2080                         child_type = dentry_get_file_type_string(child);
2081                         duplicate_type = dentry_get_file_type_string(duplicate);
2082                         WARNING("Ignoring duplicate %"TS" \"%"TS"\" "
2083                                 "(the WIM image already contains a %"TS" "
2084                                 "at that path with the exact same name)",
2085                                 child_type, dentry_full_path(duplicate),
2086                                 duplicate_type);
2087                         free_dentry(child);
2088                         continue;
2089                 }
2090
2091                 inode_add_dentry(child, child->d_inode);
2092                 /* If there are children of this child, call this
2093                  * procedure recursively. */
2094                 if (child->subdir_offset != 0) {
2095                         if (likely(dentry_is_directory(child))) {
2096                                 ret = read_dentry_tree(metadata_resource,
2097                                                        metadata_resource_len,
2098                                                        child);
2099                                 if (ret)
2100                                         break;
2101                         } else {
2102                                 WARNING("Ignoring children of non-directory \"%"TS"\"",
2103                                         dentry_full_path(child));
2104                         }
2105                 }
2106         }
2107         return ret;
2108 }
2109
2110 /*
2111  * Writes a WIM dentry to an output buffer.
2112  *
2113  * @dentry:  The dentry structure.
2114  * @p:       The memory location to write the data to.
2115  *
2116  * Returns the pointer to the byte after the last byte we wrote as part of the
2117  * dentry, including any alternate data stream entries.
2118  */
2119 static u8 *
2120 write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p)
2121 {
2122         const struct wim_inode *inode;
2123         struct wim_dentry_on_disk *disk_dentry;
2124         const u8 *orig_p;
2125         const u8 *hash;
2126
2127         wimlib_assert(((uintptr_t)p & 7) == 0); /* 8 byte aligned */
2128         orig_p = p;
2129
2130         inode = dentry->d_inode;
2131         disk_dentry = (struct wim_dentry_on_disk*)p;
2132
2133         disk_dentry->attributes = cpu_to_le32(inode->i_attributes);
2134         disk_dentry->security_id = cpu_to_le32(inode->i_security_id);
2135         disk_dentry->subdir_offset = cpu_to_le64(dentry->subdir_offset);
2136         disk_dentry->unused_1 = cpu_to_le64(dentry->d_unused_1);
2137         disk_dentry->unused_2 = cpu_to_le64(dentry->d_unused_2);
2138         disk_dentry->creation_time = cpu_to_le64(inode->i_creation_time);
2139         disk_dentry->last_access_time = cpu_to_le64(inode->i_last_access_time);
2140         disk_dentry->last_write_time = cpu_to_le64(inode->i_last_write_time);
2141         hash = inode_stream_hash(inode, 0);
2142         copy_hash(disk_dentry->unnamed_stream_hash, hash);
2143         if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
2144                 disk_dentry->reparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1);
2145                 disk_dentry->reparse.reparse_tag = cpu_to_le32(inode->i_reparse_tag);
2146                 disk_dentry->reparse.rp_unknown_2 = cpu_to_le16(inode->i_rp_unknown_2);
2147                 disk_dentry->reparse.not_rpfixed = cpu_to_le16(inode->i_not_rpfixed);
2148         } else {
2149                 disk_dentry->nonreparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1);
2150                 disk_dentry->nonreparse.hard_link_group_id =
2151                         cpu_to_le64((inode->i_nlink == 1) ? 0 : inode->i_ino);
2152         }
2153         disk_dentry->num_alternate_data_streams = cpu_to_le16(inode->i_num_ads);
2154         disk_dentry->short_name_nbytes = cpu_to_le16(dentry->short_name_nbytes);
2155         disk_dentry->file_name_nbytes = cpu_to_le16(dentry->file_name_nbytes);
2156         p += sizeof(struct wim_dentry_on_disk);
2157
2158         wimlib_assert(dentry_is_root(dentry) != dentry_has_long_name(dentry));
2159
2160         if (dentry_has_long_name(dentry))
2161                 p = mempcpy(p, dentry->file_name, dentry->file_name_nbytes + 2);
2162
2163         if (dentry_has_short_name(dentry))
2164                 p = mempcpy(p, dentry->short_name, dentry->short_name_nbytes + 2);
2165
2166         /* Align to 8-byte boundary */
2167         while ((uintptr_t)p & 7)
2168                 *p++ = 0;
2169
2170         /* We calculate the correct length of the dentry ourselves because the
2171          * dentry->length field may been set to an unexpected value from when we
2172          * read the dentry in (for example, there may have been unknown data
2173          * appended to the end of the dentry...).  Furthermore, the dentry may
2174          * have been renamed, thus changing its needed length. */
2175         disk_dentry->length = cpu_to_le64(p - orig_p);
2176
2177         /* Write the alternate data streams entries, if any. */
2178         for (u16 i = 0; i < inode->i_num_ads; i++) {
2179                 const struct wim_ads_entry *ads_entry =
2180                                 &inode->i_ads_entries[i];
2181                 struct wim_ads_entry_on_disk *disk_ads_entry =
2182                                 (struct wim_ads_entry_on_disk*)p;
2183                 orig_p = p;
2184
2185                 disk_ads_entry->reserved = cpu_to_le64(ads_entry->reserved);
2186
2187                 hash = inode_stream_hash(inode, i + 1);
2188                 copy_hash(disk_ads_entry->hash, hash);
2189                 disk_ads_entry->stream_name_nbytes = cpu_to_le16(ads_entry->stream_name_nbytes);
2190                 p += sizeof(struct wim_ads_entry_on_disk);
2191                 if (ads_entry->stream_name_nbytes) {
2192                         p = mempcpy(p, ads_entry->stream_name,
2193                                     ads_entry->stream_name_nbytes + 2);
2194                 }
2195                 /* Align to 8-byte boundary */
2196                 while ((uintptr_t)p & 7)
2197                         *p++ = 0;
2198                 disk_ads_entry->length = cpu_to_le64(p - orig_p);
2199         }
2200         return p;
2201 }
2202
2203 static int
2204 write_dentry_cb(struct wim_dentry *dentry, void *_p)
2205 {
2206         u8 **p = _p;
2207         *p = write_dentry(dentry, *p);
2208         return 0;
2209 }
2210
2211 static u8 *
2212 write_dentry_tree_recursive(const struct wim_dentry *parent, u8 *p);
2213
2214 static int
2215 write_dentry_tree_recursive_cb(struct wim_dentry *dentry, void *_p)
2216 {
2217         u8 **p = _p;
2218         *p = write_dentry_tree_recursive(dentry, *p);
2219         return 0;
2220 }
2221
2222 /* Recursive function that writes a dentry tree rooted at @parent, not including
2223  * @parent itself, which has already been written. */
2224 static u8 *
2225 write_dentry_tree_recursive(const struct wim_dentry *parent, u8 *p)
2226 {
2227         /* Nothing to do if this dentry has no children. */
2228         if (parent->subdir_offset == 0)
2229                 return p;
2230
2231         /* Write child dentries and end-of-directory entry.
2232          *
2233          * Note: we need to write all of this dentry's children before
2234          * recursively writing the directory trees rooted at each of the child
2235          * dentries, since the on-disk dentries for a dentry's children are
2236          * always located at consecutive positions in the metadata resource! */
2237         for_dentry_child(parent, write_dentry_cb, &p);
2238
2239         /* write end of directory entry */
2240         *(le64*)p = cpu_to_le64(0);
2241         p += 8;
2242
2243         /* Recurse on children. */
2244         for_dentry_child(parent, write_dentry_tree_recursive_cb, &p);
2245         return p;
2246 }
2247
2248 /* Writes a directory tree to the metadata resource.
2249  *
2250  * @root:       Root of the dentry tree.
2251  * @p:          Pointer to a buffer with enough space for the dentry tree.
2252  *
2253  * Returns pointer to the byte after the last byte we wrote.
2254  */
2255 u8 *
2256 write_dentry_tree(const struct wim_dentry *root, u8 *p)
2257 {
2258         DEBUG("Writing dentry tree.");
2259         wimlib_assert(dentry_is_root(root));
2260
2261         /* If we're the root dentry, we have no parent that already
2262          * wrote us, so we need to write ourselves. */
2263         p = write_dentry(root, p);
2264
2265         /* Write end of directory entry after the root dentry just to be safe;
2266          * however the root dentry obviously cannot have any siblings. */
2267         *(le64*)p = cpu_to_le64(0);
2268         p += 8;
2269
2270         /* Recursively write the rest of the dentry tree. */
2271         return write_dentry_tree_recursive(root, p);
2272 }