v1.14.2-BETA2

[wimlib] / src / dentry.c
diff --git a/src/dentry.c b/src/dentry.c

index 302409f9fe56034a1c29c191ebc1995af7fbab6b..f20d7dffdebcc76dc7a9a489c34c94d5b9c4429b 100644 (file)
--- a/src/dentry.c
+++ b/src/dentry.c
@@ -3,7 +3,7 @@
   */
  
  /*
- * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
+ * Copyright 2012-2023 Eric Biggers
   *
   * This file is free software; you can redistribute it and/or modify it under
   * the terms of the GNU Lesser General Public License as published by the Free
@@ -16,7 +16,7 @@
   * details.
   *
   * You should have received a copy of the GNU Lesser General Public License
- * along with this file; if not, see http://www.gnu.org/licenses/.
+ * along with this file; if not, see https://www.gnu.org/licenses/.
   */
  
  /*
@@ -46,8 +46,9 @@
   *  - wimlib does not allow *directory* hard links, so a WIM image really does
   *    have a *tree* of dentries (and not an arbitrary graph of dentries).
   *
- *  - wimlib indexes dentries both case-insensitively and case-sensitively,
- *    allowing either behavior to be used for path lookup.
+ *  - wimlib supports both case-sensitive and case-insensitive path lookups.
+ *    The implementation uses a single in-memory index per directory, using a
+ *    collation order like that used by NTFS; see collate_dentry_names().
   *
   *  - Multiple dentries in a directory might have the same case-insensitive
   *    name.  But wimlib enforces that at most one dentry in a directory can have
@@ -92,10 +93,10 @@ struct wim_dentry_on_disk {
         le32 attributes;
  
         /* A value that specifies the security descriptor for this file or
-        * directory.  If -1, the file or directory has no security descriptor.
-        * Otherwise, it is a 0-based index into the WIM image's table of
-        * security descriptors (see: `struct wim_security_data') */
-       sle32 security_id;
+        * directory.  If 0xFFFFFFFF, the file or directory has no security
+        * descriptor.  Otherwise, it is a 0-based index into the WIM image's
+        * table of security descriptors (see: `struct wim_security_data') */
+       le32 security_id;
  
         /* Offset, in bytes, from the start of the uncompressed metadata
          * resource of this directory's child directory entries, or 0 if this
@@ -116,56 +117,42 @@ struct wim_dentry_on_disk {
         le64 last_write_time;
  
         /*
-        * Usually this is the SHA-1 message digest of the file's "contents"
-        * (the unnamed data stream).
-        *
-        * If the file has FILE_ATTRIBUTE_REPARSE_POINT set, then this is
-        * instead usually the SHA-1 message digest of the uncompressed reparse
-        * point data.
-        *
-        * However, there are some special rules that need to be applied to
-        * interpret this field correctly when extra stream entries are present.
-        * See the code for details.
+        * Usually this is the SHA-1 message digest of the file's contents, or
+        * all zeroes if the file is a directory or is empty.  However, special
+        * rules apply if the file has FILE_ATTRIBUTE_REPARSE_POINT set or has
+        * named data streams.  See assign_stream_types_unencrypted().
          */
-       u8 default_hash[SHA1_HASH_SIZE];
+       u8 main_hash[SHA1_HASH_SIZE];
+
+       /* Unknown field (maybe accidental padding)  */
+       le32 unknown_0x54;
  
-       /* The format of the following data is not yet completely known and they
-        * do not correspond to Microsoft's documentation.
+       /*
+        * The following 8-byte union contains either information about the
+        * reparse point (for files with FILE_ATTRIBUTE_REPARSE_POINT set), or
+        * the "hard link group ID" (for other files).
+        *
+        * The reparse point information contains ReparseTag and ReparseReserved
+        * from the header of the reparse point buffer.  It also contains a flag
+        * that indicates whether a reparse point fixup (for the target of an
+        * absolute symbolic link or junction) was done or not.
          *
-        * If this directory entry is for a reparse point (has
-        * FILE_ATTRIBUTE_REPARSE_POINT set in the 'attributes' field), then the
-        * version of the following fields containing the reparse tag is valid.
-        * Furthermore, the field notated as not_rpfixed, as far as I can tell,
-        * is supposed to be set to 1 if reparse point fixups (a.k.a. fixing the
-        * targets of absolute symbolic links) were *not* done, and otherwise 0.
+        * The "hard link group ID" is like an inode number; all dentries for
+        * the same inode share the same value.  See inode_fixup.c for more
+        * information.
          *
-        * If this directory entry is not for a reparse point, then the version
-        * of the following fields containing the hard_link_group_id is valid.
-        * All MS says about this field is that "If this file is part of a hard
-        * link set, all the directory entries in the set will share the same
-        * value in this field.".  However, more specifically I have observed
-        * the following:
-        *    - If the file is part of a hard link set of size 1, then the
-        *    hard_link_group_id should be set to either 0, which is treated
-        *    specially as indicating "not hardlinked", or any unique value.
-        *    - The specific nonzero values used to identity hard link sets do
-        *    not matter, as long as they are unique.
-        *    - However, due to bugs in Microsoft's software, it is actually NOT
-        *    guaranteed that directory entries that share the same hard link
-        *    group ID are actually hard linked to each either.  See
-        *    inode_fixup.c for the code that handles this.
+        * Note that this union creates the limitation that reparse point files
+        * cannot have multiple names (hard links).
          */
         union {
                 struct {
-                       le32 rp_unknown_1;
                         le32 reparse_tag;
-                       le16 rp_unknown_2;
-                       le16 not_rpfixed;
-               } _packed_attribute reparse;
+                       le16 rp_reserved;
+                       le16 rp_flags;
+               } __attribute__((packed)) reparse;
                 struct {
-                       le32 rp_unknown_1;
                         le64 hard_link_group_id;
-               } _packed_attribute nonreparse;
+               } __attribute__((packed)) nonreparse;
         };
  
         /* Number of extra stream entries that directly follow this dentry
@@ -183,14 +170,14 @@ struct wim_dentry_on_disk {
          * encoded "long" name, excluding the null terminator.  If zero, then
          * this file has no long name.  The root dentry should not have a long
          * name, but all other dentries in the image should have long names.  */
-       le16 file_name_nbytes;
+       le16 name_nbytes;
  
         /* Beginning of optional, variable-length fields  */
  
-       /* If file_name_nbytes != 0, the next field will be the UTF-16LE encoded
-        * long file name.  This will be null-terminated, so the size of this
-        * field will really be file_name_nbytes + 2.  */
-       /*utf16lechar file_name[];*/
+       /* If name_nbytes != 0, the next field will be the UTF-16LE encoded long
+        * name.  This will be null-terminated, so the size of this field will
+        * really be name_nbytes + 2.  */
+       /*utf16lechar name[];*/
  
         /* If short_name_nbytes != 0, the next field will be the UTF-16LE
          * encoded short name.  This will be null-terminated, so the size of
@@ -201,9 +188,9 @@ struct wim_dentry_on_disk {
          * field) after 8-byte alignment, then the remaining space will be a
          * variable-length list of tagged metadata items.  See tagged_items.c
          * for more information.  */
-       /* u8 tagged_items[] _aligned_attribute(8); */
+       /* u8 tagged_items[] __attribute__((aligned(8))); */
  
-} _packed_attribute;
+} __attribute__((packed));
         /* If num_extra_streams != 0, then there are that many extra stream
          * entries following the dentry, starting on the next 8-byte aligned
          * boundary.  They are not counted in the 'length' field of the dentry.
@@ -233,20 +220,20 @@ struct wim_extra_stream_entry_on_disk {
          * the null terminator.  There is a null terminator character if
          * @name_nbytes != 0; i.e., if this stream is named.  */
         utf16lechar name[];
-} _packed_attribute;
+} __attribute__((packed));
  
  static void
-do_dentry_set_name(struct wim_dentry *dentry, utf16lechar *file_name,
-                  size_t file_name_nbytes)
+do_dentry_set_name(struct wim_dentry *dentry, utf16lechar *name,
+                  size_t name_nbytes)
  {
-       FREE(dentry->file_name);
-       dentry->file_name = file_name;
-       dentry->file_name_nbytes = file_name_nbytes;
+       FREE(dentry->d_name);
+       dentry->d_name = name;
+       dentry->d_name_nbytes = name_nbytes;
  
         if (dentry_has_short_name(dentry)) {
-               FREE(dentry->short_name);
-               dentry->short_name = NULL;
-               dentry->short_name_nbytes = 0;
+               FREE(dentry->d_short_name);
+               dentry->d_short_name = NULL;
+               dentry->d_short_name_nbytes = 0;
         }
  }
  
@@ -323,11 +310,11 @@ dentry_set_name(struct wim_dentry *dentry, const tchar *name)
   * tagged metadata items as well as any extra stream entries that may need to
   * follow the dentry.  */
  static size_t
-dentry_min_len_with_names(u16 file_name_nbytes, u16 short_name_nbytes)
+dentry_min_len_with_names(u16 name_nbytes, u16 short_name_nbytes)
  {
         size_t length = sizeof(struct wim_dentry_on_disk);
-       if (file_name_nbytes)
-               length += (u32)file_name_nbytes + 2;
+       if (name_nbytes)
+               length += (u32)name_nbytes + 2;
         if (short_name_nbytes)
                 length += (u32)short_name_nbytes + 2;
         return length;
@@ -347,7 +334,7 @@ stream_out_total_length(const struct wim_inode_stream *strm)
                 len += utf16le_len_bytes(strm->stream_name) + 2;
  
         /* Account for any necessary padding to the next 8-byte boundary.  */
-       return (len + 7) & ~7;
+       return ALIGN(len, 8);
  }
  
  /*
@@ -361,46 +348,41 @@ dentry_out_total_length(const struct wim_dentry *dentry)
  {
         const struct wim_inode *inode = dentry->d_inode;
         size_t len;
+       unsigned num_unnamed_streams = 0;
+       bool have_named_data_stream = false;
  
-       len = dentry_min_len_with_names(dentry->file_name_nbytes,
-                                       dentry->short_name_nbytes);
-       len = (len + 7) & ~7;
+       len = dentry_min_len_with_names(dentry->d_name_nbytes,
+                                       dentry->d_short_name_nbytes);
+       len = ALIGN(len, 8);
  
-       if (inode->i_extra_size) {
-               len += inode->i_extra_size;
-               len = (len + 7) & ~7;
-       }
+       if (inode->i_extra)
+               len += ALIGN(inode->i_extra->size, 8);
  
-       if (!(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) {
-               /*
-                * Extra stream entries:
-                *
-                * - Use one extra stream entry for each named data stream
-                * - Use one extra stream entry for the unnamed data stream when there is either:
-                *      - a reparse point stream
-                *      - at least one named data stream (for Windows PE bug workaround)
-                * - Use one extra stream entry for the reparse point stream if there is one
-                */
-               bool have_named_data_stream = false;
-               bool have_reparse_point_stream = false;
+       /*
+        * Calculate the total length of the extra stream entries that will be
+        * written.  To match DISM, some odd rules need to be followed here.
+        * See write_dentry_streams() for explanation.  Keep this in sync with
+        * write_dentry_streams()!
+        */
+       if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) {
+               num_unnamed_streams++;
+       } else {
                 for (unsigned i = 0; i < inode->i_num_streams; i++) {
                         const struct wim_inode_stream *strm = &inode->i_streams[i];
+
                         if (stream_is_named_data_stream(strm)) {
                                 len += stream_out_total_length(strm);
                                 have_named_data_stream = true;
-                       } else if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) {
-                               wimlib_assert(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT);
-                               have_reparse_point_stream = true;
                         }
                 }
-
-               if (have_named_data_stream || have_reparse_point_stream) {
-                       if (have_reparse_point_stream)
-                               len += (sizeof(struct wim_extra_stream_entry_on_disk) + 7) & ~7;
-                       len += (sizeof(struct wim_extra_stream_entry_on_disk) + 7) & ~7;
-               }
+               if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT)
+                       num_unnamed_streams++;
+               if (!(inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY))
+                       num_unnamed_streams++;
         }
-
+       if (num_unnamed_streams > 1 || have_named_data_stream)
+               len += num_unnamed_streams *
+                      ALIGN(sizeof(struct wim_extra_stream_entry_on_disk), 8);
         return len;
  }
  
@@ -446,9 +428,8 @@ do_for_dentry_in_tree_depth(struct wim_dentry *dentry,
   * @arg will be passed as the second argument to each invocation of @visitor.
   *
   * This function does a pre-order traversal --- that is, a parent will be
- * visited before its children.  It also will visit siblings in order of
- * case-sensitive filename.  Equivalently, this function visits the entire tree
- * in the case-sensitive lexicographic order of the full paths.
+ * visited before its children.  Furthermore, siblings will be visited in their
+ * collation order.
   *
   * It is safe to pass NULL for @root, which means that the dentry tree is empty.
   * In this case, this function does nothing.
@@ -483,10 +464,10 @@ for_dentry_in_tree_depth(struct wim_dentry *root,
  /*
   * Calculate the full path to @dentry within the WIM image, if not already done.
   *
- * The full name will be saved in the cached value 'dentry->_full_path'.
+ * The full name will be saved in the cached value 'dentry->d_full_path'.
   *
   * Whenever possible, use dentry_full_path() instead of calling this and
- * accessing _full_path directly.
+ * accessing d_full_path directly.
   *
   * Returns 0 or an error code resulting from a failed string conversion.
   */
@@ -494,16 +475,15 @@ int
  calculate_dentry_full_path(struct wim_dentry *dentry)
  {
         size_t ulen;
-       size_t dummy;
         const struct wim_dentry *d;
  
-       if (dentry->_full_path)
+       if (dentry->d_full_path)
                 return 0;
  
         ulen = 0;
         d = dentry;
         do {
-               ulen += d->file_name_nbytes / sizeof(utf16lechar);
+               ulen += d->d_name_nbytes / sizeof(utf16lechar);
                 ulen++;
                 d = d->d_parent;  /* assumes d == d->d_parent for root  */
         } while (!dentry_is_root(d));
@@ -513,8 +493,9 @@ calculate_dentry_full_path(struct wim_dentry *dentry)
  
         d = dentry;
         do {
-               p -= d->file_name_nbytes / sizeof(utf16lechar);
-               memcpy(p, d->file_name, d->file_name_nbytes);
+               p -= d->d_name_nbytes / sizeof(utf16lechar);
+               if (d->d_name_nbytes)
+                       memcpy(p, d->d_name, d->d_name_nbytes);
                 *--p = cpu_to_le16(WIM_PATH_SEPARATOR);
                 d = d->d_parent;  /* assumes d == d->d_parent for root  */
         } while (!dentry_is_root(d));
@@ -522,7 +503,7 @@ calculate_dentry_full_path(struct wim_dentry *dentry)
         wimlib_assert(p == ubuf);
  
         return utf16le_to_tstr(ubuf, ulen * sizeof(utf16lechar),
-                              &dentry->_full_path, &dummy);
+                              &dentry->d_full_path, NULL);
  }
  
  /*
@@ -536,7 +517,7 @@ tchar *
  dentry_full_path(struct wim_dentry *dentry)
  {
         calculate_dentry_full_path(dentry);
-       return dentry->_full_path;
+       return dentry->d_full_path;
  }
  
  static int
@@ -547,7 +528,7 @@ dentry_calculate_subdir_offset(struct wim_dentry *dentry, void *_subdir_offset_p
                 struct wim_dentry *child;
  
                 /* Set offset of directory's child dentries  */
-               dentry->subdir_offset = *subdir_offset_p;
+               dentry->d_subdir_offset = *subdir_offset_p;
  
                 /* Account for child dentries  */
                 for_dentry_child(child, dentry)
@@ -556,8 +537,8 @@ dentry_calculate_subdir_offset(struct wim_dentry *dentry, void *_subdir_offset_p
                 /* Account for end-of-directory entry  */
                 *subdir_offset_p += 8;
         } else {
-               /* Not a directory; set subdir_offset to 0  */
-               dentry->subdir_offset = 0;
+               /* Not a directory; set the subdir offset to 0  */
+               dentry->d_subdir_offset = 0;
         }
         return 0;
  }
@@ -582,50 +563,34 @@ calculate_subdir_offsets(struct wim_dentry *root, u64 *subdir_offset_p)
         for_dentry_in_tree(root, dentry_calculate_subdir_offset, subdir_offset_p);
  }
  
-/* Compare the UTF-16LE long filenames of two dentries case insensitively.  */
-static int
-dentry_compare_names_case_insensitive(const struct wim_dentry *d1,
-                                     const struct wim_dentry *d2)
-{
-       return cmp_utf16le_strings(d1->file_name,
-                                  d1->file_name_nbytes / 2,
-                                  d2->file_name,
-                                  d2->file_name_nbytes / 2,
-                                  true);
-}
-
-/* Compare the UTF-16LE long filenames of two dentries case sensitively.  */
  static int
-dentry_compare_names_case_sensitive(const struct wim_dentry *d1,
-                                   const struct wim_dentry *d2)
+dentry_compare_names(const struct wim_dentry *d1, const struct wim_dentry *d2,
+                    bool ignore_case)
  {
-       return cmp_utf16le_strings(d1->file_name,
-                                  d1->file_name_nbytes / 2,
-                                  d2->file_name,
-                                  d2->file_name_nbytes / 2,
-                                  false);
-}
-
-static int
-_avl_dentry_compare_names_ci(const struct avl_tree_node *n1,
-                            const struct avl_tree_node *n2)
-{
-       const struct wim_dentry *d1, *d2;
-
-       d1 = avl_tree_entry(n1, struct wim_dentry, d_index_node_ci);
-       d2 = avl_tree_entry(n2, struct wim_dentry, d_index_node_ci);
-       return dentry_compare_names_case_insensitive(d1, d2);
+       return cmp_utf16le_strings(d1->d_name, d1->d_name_nbytes / 2,
+                                  d2->d_name, d2->d_name_nbytes / 2,
+                                  ignore_case);
  }
  
+/*
+ * Collate (compare) the long filenames of two dentries.  This first compares
+ * the names ignoring case, then falls back to a case-sensitive comparison if
+ * the names are the same ignoring case.
+ */
  static int
-_avl_dentry_compare_names(const struct avl_tree_node *n1,
-                         const struct avl_tree_node *n2)
+collate_dentry_names(const struct avl_tree_node *n1,
+                    const struct avl_tree_node *n2)
  {
         const struct wim_dentry *d1, *d2;
+       int res;
  
         d1 = avl_tree_entry(n1, struct wim_dentry, d_index_node);
         d2 = avl_tree_entry(n2, struct wim_dentry, d_index_node);
-       return dentry_compare_names_case_sensitive(d1, d2);
+
+       res = dentry_compare_names(d1, d2, true);
+       if (res)
+               return res;
+       return dentry_compare_names(d1, d2, false);
  }
  
  /* Default case sensitivity behavior for searches with
@@ -633,106 +598,92 @@ _avl_dentry_compare_names(const struct avl_tree_node *n1,
   * WIMLIB_INIT_FLAG_DEFAULT_CASE_SENSITIVE or
   * WIMLIB_INIT_FLAG_DEFAULT_CASE_INSENSITIVE to wimlib_global_init().  */
  bool default_ignore_case =
-#ifdef __WIN32__
+#ifdef _WIN32
         true
  #else
         false
  #endif
  ;
  
-/* Case-sensitive dentry lookup.  Only @file_name and @file_name_nbytes of
- * @dummy must be valid.  */
-static struct wim_dentry *
-dir_lookup(const struct wim_inode *dir, const struct wim_dentry *dummy)
+/*
+ * Find the dentry within the given directory that has the given UTF-16LE
+ * filename.  Return it if found, otherwise return NULL.  This has configurable
+ * case sensitivity, and @name need not be null-terminated.
+ */
+struct wim_dentry *
+get_dentry_child_with_utf16le_name(const struct wim_dentry *dir,
+                                  const utf16lechar *name,
+                                  size_t name_nbytes,
+                                  CASE_SENSITIVITY_TYPE case_type)
  {
-       struct avl_tree_node *node;
+       struct wim_dentry wanted;
+       struct avl_tree_node *cur = dir->d_inode->i_children;
+       struct wim_dentry *ci_match = NULL;
  
-       node = avl_tree_lookup_node(dir->i_children,
-                                   &dummy->d_index_node,
-                                   _avl_dentry_compare_names);
-       if (!node)
-               return NULL;
-       return avl_tree_entry(node, struct wim_dentry, d_index_node);
-}
+       wanted.d_name = (utf16lechar *)name;
+       wanted.d_name_nbytes = name_nbytes;
  
-/* Case-insensitive dentry lookup.  Only @file_name and @file_name_nbytes of
- * @dummy must be valid.  */
-static struct wim_dentry *
-dir_lookup_ci(const struct wim_inode *dir, const struct wim_dentry *dummy)
-{
-       struct avl_tree_node *node;
+       if (unlikely(wanted.d_name_nbytes != name_nbytes))
+               return NULL; /* overflow */
  
-       node = avl_tree_lookup_node(dir->i_children_ci,
-                                   &dummy->d_index_node_ci,
-                                   _avl_dentry_compare_names_ci);
-       if (!node)
-               return NULL;
-       return avl_tree_entry(node, struct wim_dentry, d_index_node_ci);
-}
+       /* Note: we can't use avl_tree_lookup_node() here because we need to
+        * save case-insensitive matches. */
+       while (cur) {
+               struct wim_dentry *child;
+               int res;
  
-/* Given a UTF-16LE filename and a directory, look up the dentry for the file.
- * Return it if found, otherwise NULL.  This has configurable case sensitivity,
- * and @name need not be null-terminated.  */
-struct wim_dentry *
-get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry,
-                                  const utf16lechar *name,
-                                  size_t name_nbytes,
-                                  CASE_SENSITIVITY_TYPE case_ctype)
-{
-       const struct wim_inode *dir = dentry->d_inode;
-       bool ignore_case = will_ignore_case(case_ctype);
-       struct wim_dentry dummy;
-       struct wim_dentry *child;
+               child = avl_tree_entry(cur, struct wim_dentry, d_index_node);
  
-       dummy.file_name = (utf16lechar*)name;
-       dummy.file_name_nbytes = name_nbytes;
+               res = dentry_compare_names(&wanted, child, true);
+               if (!res) {
+                       /* case-insensitive match found */
+                       ci_match = child;
+
+                       res = dentry_compare_names(&wanted, child, false);
+                       if (!res)
+                               return child; /* case-sensitive match found */
+               }
  
-       if (!ignore_case)
-               /* Case-sensitive lookup.  */
-               return dir_lookup(dir, &dummy);
+               if (res < 0)
+                       cur = cur->left;
+               else
+                       cur = cur->right;
+       }
  
-       /* Case-insensitive lookup.  */
+       /* No case-sensitive match; use a case-insensitive match if possible. */
  
-       child = dir_lookup_ci(dir, &dummy);
-       if (!child)
+       if (!will_ignore_case(case_type))
                 return NULL;
  
-       if (likely(list_empty(&child->d_ci_conflict_list)))
-               /* Only one dentry has this case-insensitive name; return it */
-               return child;
+       if (ci_match) {
+               size_t num_other_ci_matches = 0;
+               struct wim_dentry *other_ci_match, *d;
  
-       /* Multiple dentries have the same case-insensitive name.  Choose the
-        * dentry with the same case-sensitive name, if one exists; otherwise
-        * print a warning and choose one of the possible dentries arbitrarily.
-        */
-       struct wim_dentry *alt = child;
-       size_t num_alts = 0;
+               dentry_for_each_ci_match(d, ci_match) {
+                       num_other_ci_matches++;
+                       other_ci_match = d;
+               }
  
-       do {
-               num_alts++;
-               if (!dentry_compare_names_case_sensitive(&dummy, alt))
-                       return alt;
-               alt = list_entry(alt->d_ci_conflict_list.next,
-                                struct wim_dentry, d_ci_conflict_list);
-       } while (alt != child);
-
-       WARNING("Result of case-insensitive lookup is ambiguous\n"
-               "          (returning \"%"TS"\" of %zu "
-               "possible files, including \"%"TS"\")",
-               dentry_full_path(child),
-               num_alts,
-               dentry_full_path(list_entry(child->d_ci_conflict_list.next,
-                                           struct wim_dentry,
-                                           d_ci_conflict_list)));
-       return child;
+               if (num_other_ci_matches != 0) {
+                       WARNING("Result of case-insensitive lookup is ambiguous\n"
+                               "          (returning \"%"TS"\" of %zu "
+                               "possible files, including \"%"TS"\")",
+                               dentry_full_path(ci_match), num_other_ci_matches,
+                               dentry_full_path(other_ci_match));
+               }
+       }
+
+       return ci_match;
  }
  
-/* Given a 'tchar' filename and a directory, look up the dentry for the file.
- * If the filename was successfully converted to UTF-16LE and the dentry was
- * found, return it; otherwise return NULL.  This has configurable case
- * sensitivity.  */
+/*
+ * Find the dentry within the given directory that has the given 'tstr'
+ * filename.  If the filename was successfully converted to UTF-16LE and the
+ * dentry was found, return it; otherwise return NULL.  This has configurable
+ * case sensitivity.
+ */
  struct wim_dentry *
-get_dentry_child_with_name(const struct wim_dentry *dentry, const tchar *name,
+get_dentry_child_with_name(const struct wim_dentry *dir, const tchar *name,
                            CASE_SENSITIVITY_TYPE case_type)
  {
         int ret;
@@ -745,7 +696,7 @@ get_dentry_child_with_name(const struct wim_dentry *dentry, const tchar *name,
         if (ret)
                 return NULL;
  
-       child = get_dentry_child_with_utf16le_name(dentry,
+       child = get_dentry_child_with_utf16le_name(dir,
                                                    name_utf16le,
                                                    name_utf16le_nbytes,
                                                    case_type);
@@ -993,19 +944,6 @@ new_filler_directory(struct wim_dentry **dentry_ret)
         return 0;
  }
  
-static int
-dentry_clear_inode_visited(struct wim_dentry *dentry, void *_ignore)
-{
-       dentry->d_inode->i_visited = 0;
-       return 0;
-}
-
-void
-dentry_tree_clear_inode_visited(struct wim_dentry *root)
-{
-       for_dentry_in_tree(root, dentry_clear_inode_visited, NULL);
-}
-
  /*
   * Free a WIM dentry.
   *
@@ -1017,9 +955,9 @@ free_dentry(struct wim_dentry *dentry)
  {
         if (dentry) {
                 d_disassociate(dentry);
-               FREE(dentry->file_name);
-               FREE(dentry->short_name);
-               FREE(dentry->_full_path);
+               FREE(dentry->d_name);
+               FREE(dentry->d_short_name);
+               FREE(dentry->d_full_path);
                 FREE(dentry);
         }
  }
@@ -1070,134 +1008,99 @@ free_dentry_tree(struct wim_dentry *root, struct blob_table *blob_table)
         for_dentry_in_tree_depth(root, f, blob_table);
  }
  
-/* Insert the @child dentry into the case sensitive index of the @dir directory.
- * Return NULL if successfully inserted, otherwise a pointer to the
- * already-inserted duplicate.  */
-static struct wim_dentry *
-dir_index_child(struct wim_inode *dir, struct wim_dentry *child)
+/*
+ * Return the first dentry in the list of dentries which have the same
+ * case-insensitive name as the one given.
+ */
+struct wim_dentry *
+dentry_get_first_ci_match(struct wim_dentry *dentry)
  {
-       struct avl_tree_node *duplicate;
+       struct wim_dentry *ci_match = dentry;
  
-       duplicate = avl_tree_insert(&dir->i_children,
-                                   &child->d_index_node,
-                                   _avl_dentry_compare_names);
-       if (!duplicate)
-               return NULL;
-       return avl_tree_entry(duplicate, struct wim_dentry, d_index_node);
-}
+       for (;;) {
+               struct avl_tree_node *node;
+               struct wim_dentry *prev;
+
+               node = avl_tree_prev_in_order(&ci_match->d_index_node);
+               if (!node)
+                       break;
+               prev = avl_tree_entry(node, struct wim_dentry, d_index_node);
+               if (dentry_compare_names(prev, dentry, true))
+                       break;
+               ci_match = prev;
+       }
  
-/* Insert the @child dentry into the case insensitive index of the @dir
- * directory.  Return NULL if successfully inserted, otherwise a pointer to the
- * already-inserted duplicate.  */
-static struct wim_dentry *
-dir_index_child_ci(struct wim_inode *dir, struct wim_dentry *child)
-{
-       struct avl_tree_node *duplicate;
+       if (ci_match == dentry)
+               return dentry_get_next_ci_match(dentry, dentry);
  
-       duplicate = avl_tree_insert(&dir->i_children_ci,
-                                   &child->d_index_node_ci,
-                                   _avl_dentry_compare_names_ci);
-       if (!duplicate)
-               return NULL;
-       return avl_tree_entry(duplicate, struct wim_dentry, d_index_node_ci);
+       return ci_match;
  }
  
-/* Remove the specified dentry from its directory's case-sensitive index.  */
-static void
-dir_unindex_child(struct wim_inode *dir, struct wim_dentry *child)
+/*
+ * Return the next dentry in the list of dentries which have the same
+ * case-insensitive name as the one given.
+ */
+struct wim_dentry *
+dentry_get_next_ci_match(struct wim_dentry *dentry, struct wim_dentry *ci_match)
  {
-       avl_tree_remove(&dir->i_children, &child->d_index_node);
-}
+       do {
+               struct avl_tree_node *node;
  
-/* Remove the specified dentry from its directory's case-insensitive index.  */
-static void
-dir_unindex_child_ci(struct wim_inode *dir, struct wim_dentry *child)
-{
-       avl_tree_remove(&dir->i_children_ci, &child->d_index_node_ci);
-}
+               node = avl_tree_next_in_order(&ci_match->d_index_node);
+               if (!node)
+                       return NULL;
+               ci_match = avl_tree_entry(node, struct wim_dentry, d_index_node);
+       } while (ci_match == dentry);
  
-/* Return true iff the specified dentry is in its parent directory's
- * case-insensitive index.  */
-static bool
-dentry_in_ci_index(const struct wim_dentry *dentry)
-{
-       return !avl_tree_node_is_unlinked(&dentry->d_index_node_ci);
+       if (dentry_compare_names(ci_match, dentry, true))
+               return NULL;
+
+       return ci_match;
  }
  
  /*
- * Link a dentry into the tree.
+ * Link a dentry into a directory.
   *
   * @parent:
- *     The dentry that will be the parent of @child.  It must name a directory.
+ *     The directory into which to link the dentry.
   *
   * @child:
- *     The dentry to link.  It must be currently unlinked.
+ *     The dentry to link into the directory.  It must be currently unlinked.
   *
- * Returns NULL if successful.  If @parent already contains a dentry with the
- * same case-sensitive name as @child, returns a pointer to this duplicate
- * dentry.
+ * Returns NULL if successful; or, if @parent already contains a dentry with the
+ * same case-sensitive name as @child, then a pointer to this duplicate dentry
+ * is returned.
   */
  struct wim_dentry *
  dentry_add_child(struct wim_dentry *parent, struct wim_dentry *child)
  {
-       struct wim_dentry *duplicate;
-       struct wim_inode *dir;
+       struct wim_inode *dir = parent->d_inode;
+       struct avl_tree_node *duplicate;
  
         wimlib_assert(parent != child);
-
-       dir = parent->d_inode;
-
         wimlib_assert(inode_is_directory(dir));
  
-       duplicate = dir_index_child(dir, child);
+       duplicate = avl_tree_insert(&dir->i_children, &child->d_index_node,
+                                   collate_dentry_names);
         if (duplicate)
-               return duplicate;
+               return avl_tree_entry(duplicate, struct wim_dentry, d_index_node);
  
-       duplicate = dir_index_child_ci(dir, child);
-       if (duplicate) {
-               list_add(&child->d_ci_conflict_list, &duplicate->d_ci_conflict_list);
-               avl_tree_node_set_unlinked(&child->d_index_node_ci);
-       } else {
-               INIT_LIST_HEAD(&child->d_ci_conflict_list);
-       }
         child->d_parent = parent;
         return NULL;
  }
  
-/* Unlink a dentry from the tree.  */
+/* Unlink a dentry from its parent directory. */
  void
  unlink_dentry(struct wim_dentry *dentry)
  {
-       struct wim_inode *dir;
-
         /* Do nothing if the dentry is root or it's already unlinked.  Not
          * actually necessary based on the current callers, but we do the check
          * here to be safe.  */
         if (unlikely(dentry->d_parent == dentry))
                 return;
  
-       dir = dentry->d_parent->d_inode;
-
-       dir_unindex_child(dir, dentry);
-
-       if (dentry_in_ci_index(dentry)) {
-
-               dir_unindex_child_ci(dir, dentry);
-
-               if (!list_empty(&dentry->d_ci_conflict_list)) {
-                       /* Make a different case-insensitively-the-same dentry
-                        * be the "representative" in the search index.  */
-                       struct list_head *next;
-                       struct wim_dentry *other;
-                       struct wim_dentry *existing;
-
-                       next = dentry->d_ci_conflict_list.next;
-                       other = list_entry(next, struct wim_dentry, d_ci_conflict_list);
-                       existing = dir_index_child_ci(dir, other);
-                       wimlib_assert(existing == NULL);
-               }
-       }
-       list_del(&dentry->d_ci_conflict_list);
+       avl_tree_remove(&dentry->d_parent->d_inode->i_children,
+                       &dentry->d_index_node);
  
         /* Not actually necessary, but to be safe don't retain the now-obsolete
          * parent pointer.  */
@@ -1211,10 +1114,12 @@ read_extra_data(const u8 *p, const u8 *end, struct wim_inode *inode)
                 p++;
  
         if (unlikely(p < end)) {
-               inode->i_extra = memdup(p, end - p);
+               inode->i_extra = MALLOC(sizeof(struct wim_inode_extra) +
+                                       end - p);
                 if (!inode->i_extra)
                         return WIMLIB_ERR_NOMEM;
-               inode->i_extra_size = end - p;
+               inode->i_extra->size = end - p;
+               memcpy(inode->i_extra->data, p, end - p);
         }
         return 0;
  }
@@ -1248,43 +1153,66 @@ assign_stream_types_encrypted(struct wim_inode *inode)
  /*
   * Set the type of each stream for an unencrypted file.
   *
- * There will be an unnamed data stream, a reparse point stream, or both an
- * unnamed data stream and a reparse point stream.  In addition, there may be
- * named data streams.
+ * To specify the streams of each file, the WIM provides a main_hash and an
+ * optional list of "extra stream entries".  Each extra stream entry is a
+ * (name, hash) pair where the name is optional.  Hashes can be the special
+ * value of zero_hash, which means the stream is empty (zero-length).
+ *
+ * While extra stream entries with names always refer to "named data streams",
+ * the main hash and any extra unnamed hashes can be hard to interpret.  This is
+ * because the WIM file format unfortunately doesn't make it very clear which is
+ * the unnamed data stream (i.e. standard file contents) and which is the
+ * reparse stream.  The way this ambiguity is resolved (based on what MS
+ * software seems to do) is by (1) a file can have at most one unnamed data
+ * stream and at most one reparse stream, (2) a reparse stream is present if and
+ * only if the file has FILE_ATTRIBUTE_REPARSE_POINT, and (3) the reparse
+ * stream, if present, is stored before the unnamed data stream if present
+ * (considering main_hash to come before any extra hashes).  Note: directories
+ * need not have an unnamed data stream stored, even with a zero hash, as
+ * "unnamed data stream" isn't meaningful for a directory in the first place.
+ *
+ * With those rules in mind, one would expect that the first unnamed stream
+ * would use main_hash, and the second (if present) would use an extra stream
+ * entry.  However, there is another quirk that we must be compatible with:
+ * sometimes main_hash isn't used and only extra stream entries are used.  To
+ * handle this, we ignore main_hash if it is zero and there is at least one
+ * unnamed extra stream entry.  This works correctly as long as a zero main_hash
+ * and an unnamed extra stream entry is never used to represent an empty reparse
+ * stream and an unnamed data stream.  (It's not, as the reparse stream always
+ * goes in the extra stream entries in this case.  See write_dentry_streams().)
   */
  static void
  assign_stream_types_unencrypted(struct wim_inode *inode)
  {
-       bool found_reparse_point_stream = false;
+       bool found_reparse_stream = false;
         bool found_unnamed_data_stream = false;
-       struct wim_inode_stream *unnamed_stream_with_zero_hash = NULL;
  
         for (unsigned i = 0; i < inode->i_num_streams; i++) {
                 struct wim_inode_stream *strm = &inode->i_streams[i];
  
                 if (stream_is_named(strm)) {
-                       /* Named data stream  */
+                       /* Named extra stream entry */
                         strm->stream_type = STREAM_TYPE_DATA;
-               } else if (!is_zero_hash(strm->_stream_hash)) {
+               } else if (i != 0 || !is_zero_hash(strm->_stream_hash)) {
+                       /* Unnamed extra stream entry or a nonzero main_hash */
                         if ((inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) &&
-                           !found_reparse_point_stream) {
-                               found_reparse_point_stream = true;
+                           !found_reparse_stream) {
+                               found_reparse_stream = true;
                                 strm->stream_type = STREAM_TYPE_REPARSE_POINT;
                         } else if (!found_unnamed_data_stream) {
                                 found_unnamed_data_stream = true;
                                 strm->stream_type = STREAM_TYPE_DATA;
-                       }
-               } else {
-                       /* If no stream name is specified and the hash is zero,
-                        * then remember this stream for later so that we can
-                        * assign it to the unnamed data stream if we don't find
-                        * a better candidate.  */
-                       unnamed_stream_with_zero_hash = strm;
-               }
+                       } /* Else, too many unnamed streams were found. */
+
+               } /* Else, it's a zero main_hash. */
         }
  
-       if (!found_unnamed_data_stream && unnamed_stream_with_zero_hash != NULL)
-               unnamed_stream_with_zero_hash->stream_type = STREAM_TYPE_DATA;
+       /* If needed, use the zero main_hash. */
+       if (!found_reparse_stream && !found_unnamed_data_stream) {
+               inode->i_streams[0].stream_type =
+                       (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) ?
+                       STREAM_TYPE_REPARSE_POINT : STREAM_TYPE_DATA;
+       }
  }
  
  /*
@@ -1292,29 +1220,27 @@ assign_stream_types_unencrypted(struct wim_inode *inode)
   */
  static int
  setup_inode_streams(const u8 *p, const u8 *end, struct wim_inode *inode,
-                   unsigned num_extra_streams, const u8 *default_hash,
+                   unsigned num_extra_streams, const u8 *main_hash,
                     u64 *offset_p)
  {
         const u8 *orig_p = p;
  
         inode->i_num_streams = 1 + num_extra_streams;
  
-       if (likely(inode->i_num_streams <= ARRAY_LEN(inode->i_embedded_streams))) {
-               inode->i_streams = inode->i_embedded_streams;
-       } else {
+       if (unlikely(inode->i_num_streams > ARRAY_LEN(inode->i_embedded_streams))) {
                 inode->i_streams = CALLOC(inode->i_num_streams,
                                           sizeof(inode->i_streams[0]));
                 if (!inode->i_streams)
                         return WIMLIB_ERR_NOMEM;
         }
  
-       /* Use the default hash field for the first stream  */
+       /* Use main_hash for the first stream. */
         inode->i_streams[0].stream_name = (utf16lechar *)NO_STREAM_NAME;
-       copy_hash(inode->i_streams[0]._stream_hash, default_hash);
+       copy_hash(inode->i_streams[0]._stream_hash, main_hash);
         inode->i_streams[0].stream_type = STREAM_TYPE_UNKNOWN;
         inode->i_streams[0].stream_id = 0;
  
-       /* Read the extra stream entries  */
+       /* Read the extra stream entries. */
         for (unsigned i = 1; i < inode->i_num_streams; i++) {
                 struct wim_inode_stream *strm;
                 const struct wim_extra_stream_entry_on_disk *disk_strm;
@@ -1333,10 +1259,7 @@ setup_inode_streams(const u8 *p, const u8 *end, struct wim_inode *inode,
                 disk_strm = (const struct wim_extra_stream_entry_on_disk *)p;
  
                 /* Read the length field  */
-               length = le64_to_cpu(disk_strm->length);
-
-               /* 8-byte align the length  */
-               length = (length + 7) & ~7;
+               length = ALIGN(le64_to_cpu(disk_strm->length), 8);
  
                 /* Make sure the length field is neither so small it doesn't
                  * include all the fixed-length data nor so large it overflows
@@ -1406,11 +1329,11 @@ read_dentry(const u8 * restrict buf, size_t buf_len,
         struct wim_dentry *dentry;
         struct wim_inode *inode;
         u16 short_name_nbytes;
-       u16 file_name_nbytes;
+       u16 name_nbytes;
         u64 calculated_size;
         int ret;
  
-       BUILD_BUG_ON(sizeof(struct wim_dentry_on_disk) != WIM_DENTRY_DISK_SIZE);
+       STATIC_ASSERT(sizeof(struct wim_dentry_on_disk) == WIM_DENTRY_DISK_SIZE);
  
         /* Before reading the whole dentry, we need to read just the length.
          * This is because a dentry of length 8 (that is, just the length field)
@@ -1426,7 +1349,7 @@ read_dentry(const u8 * restrict buf, size_t buf_len,
         disk_dentry = (const struct wim_dentry_on_disk*)p;
  
         /* Get dentry length.  */
-       length = (le64_to_cpu(disk_dentry->length) + 7) & ~7;
+       length = ALIGN(le64_to_cpu(disk_dentry->length), 8);
  
         /* Check for end-of-directory.  */
         if (length <= 8) {
@@ -1453,24 +1376,19 @@ read_dentry(const u8 * restrict buf, size_t buf_len,
         /* Read more fields: some into the dentry, and some into the inode.  */
         inode->i_attributes = le32_to_cpu(disk_dentry->attributes);
         inode->i_security_id = le32_to_cpu(disk_dentry->security_id);
-       dentry->subdir_offset = le64_to_cpu(disk_dentry->subdir_offset);
+       dentry->d_subdir_offset = le64_to_cpu(disk_dentry->subdir_offset);
         inode->i_creation_time = le64_to_cpu(disk_dentry->creation_time);
         inode->i_last_access_time = le64_to_cpu(disk_dentry->last_access_time);
         inode->i_last_write_time = le64_to_cpu(disk_dentry->last_write_time);
+       inode->i_unknown_0x54 = le32_to_cpu(disk_dentry->unknown_0x54);
  
-       /* I don't know what's going on here.  It seems like M$ screwed up the
-        * reparse points, then put the fields in the same place and didn't
-        * document it.  So we have some fields we read for reparse points, and
-        * some fields in the same place for non-reparse-points.  */
         if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
-               inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->reparse.rp_unknown_1);
                 inode->i_reparse_tag = le32_to_cpu(disk_dentry->reparse.reparse_tag);
-               inode->i_rp_unknown_2 = le16_to_cpu(disk_dentry->reparse.rp_unknown_2);
-               inode->i_not_rpfixed = le16_to_cpu(disk_dentry->reparse.not_rpfixed);
+               inode->i_rp_reserved = le16_to_cpu(disk_dentry->reparse.rp_reserved);
+               inode->i_rp_flags = le16_to_cpu(disk_dentry->reparse.rp_flags);
                 /* Leave inode->i_ino at 0.  Note: this means that WIM cannot
                  * represent multiple hard links to a reparse point file.  */
         } else {
-               inode->i_rp_unknown_1 = le32_to_cpu(disk_dentry->nonreparse.rp_unknown_1);
                 inode->i_ino = le64_to_cpu(disk_dentry->nonreparse.hard_link_group_id);
         }
  
@@ -1478,16 +1396,16 @@ read_dentry(const u8 * restrict buf, size_t buf_len,
          * name, and the short name.  */
  
         short_name_nbytes = le16_to_cpu(disk_dentry->short_name_nbytes);
-       file_name_nbytes = le16_to_cpu(disk_dentry->file_name_nbytes);
+       name_nbytes = le16_to_cpu(disk_dentry->name_nbytes);
  
-       if (unlikely((short_name_nbytes & 1) | (file_name_nbytes & 1))) {
+       if (unlikely((short_name_nbytes & 1) | (name_nbytes & 1))) {
                 ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE;
                 goto err_free_dentry;
         }
  
         /* We now know the length of the file name and short name.  Make sure
          * the length of the dentry is large enough to actually hold them.  */
-       calculated_size = dentry_min_len_with_names(file_name_nbytes,
+       calculated_size = dentry_min_len_with_names(name_nbytes,
                                                     short_name_nbytes);
  
         if (unlikely(length < calculated_size)) {
@@ -1500,25 +1418,25 @@ read_dentry(const u8 * restrict buf, size_t buf_len,
  
         /* Read the filename if present.  Note: if the filename is empty, there
          * is no null terminator following it.  */
-       if (file_name_nbytes) {
-               dentry->file_name = utf16le_dupz(p, file_name_nbytes);
-               if (dentry->file_name == NULL) {
+       if (name_nbytes) {
+               dentry->d_name = utf16le_dupz(p, name_nbytes);
+               if (unlikely(!dentry->d_name)) {
                         ret = WIMLIB_ERR_NOMEM;
                         goto err_free_dentry;
                 }
-               dentry->file_name_nbytes = file_name_nbytes;
-               p += (u32)file_name_nbytes + 2;
+               dentry->d_name_nbytes = name_nbytes;
+               p += (u32)name_nbytes + 2;
         }
  
         /* Read the short filename if present.  Note: if there is no short
          * filename, there is no null terminator following it. */
         if (short_name_nbytes) {
-               dentry->short_name = utf16le_dupz(p, short_name_nbytes);
-               if (dentry->short_name == NULL) {
+               dentry->d_short_name = utf16le_dupz(p, short_name_nbytes);
+               if (unlikely(!dentry->d_short_name)) {
                         ret = WIMLIB_ERR_NOMEM;
                         goto err_free_dentry;
                 }
-               dentry->short_name_nbytes = short_name_nbytes;
+               dentry->d_short_name_nbytes = short_name_nbytes;
                 p += (u32)short_name_nbytes + 2;
         }
  
@@ -1535,7 +1453,7 @@ read_dentry(const u8 * restrict buf, size_t buf_len,
                                   &buf[buf_len],
                                   inode,
                                   le16_to_cpu(disk_dentry->num_extra_streams),
-                                 disk_dentry->default_hash,
+                                 disk_dentry->main_hash,
                                   &offset);
         if (ret)
                 goto err_free_dentry;
@@ -1549,40 +1467,72 @@ err_free_dentry:
         return ret;
  }
  
-/* Is the dentry named "." or ".." ?  */
  static bool
  dentry_is_dot_or_dotdot(const struct wim_dentry *dentry)
  {
-       if (dentry->file_name_nbytes <= 4) {
-               if (dentry->file_name_nbytes == 4) {
-                       if (dentry->file_name[0] == cpu_to_le16('.') &&
-                           dentry->file_name[1] == cpu_to_le16('.'))
+       if (dentry->d_name_nbytes <= 4) {
+               if (dentry->d_name_nbytes == 4) {
+                       if (dentry->d_name[0] == cpu_to_le16('.') &&
+                           dentry->d_name[1] == cpu_to_le16('.'))
                                 return true;
-               } else if (dentry->file_name_nbytes == 2) {
-                       if (dentry->file_name[0] == cpu_to_le16('.'))
+               } else if (dentry->d_name_nbytes == 2) {
+                       if (dentry->d_name[0] == cpu_to_le16('.'))
                                 return true;
                 }
         }
         return false;
  }
  
+static bool
+dentry_contains_embedded_null(const struct wim_dentry *dentry)
+{
+       for (unsigned i = 0; i < dentry->d_name_nbytes / 2; i++)
+               if (dentry->d_name[i] == cpu_to_le16('\0'))
+                       return true;
+       return false;
+}
+
+static bool
+should_ignore_dentry(struct wim_dentry *dir, const struct wim_dentry *dentry)
+{
+       /* All dentries except the root must be named. */
+       if (!dentry_has_long_name(dentry)) {
+               WARNING("Ignoring unnamed file in directory \"%"TS"\"",
+                       dentry_full_path(dir));
+               return true;
+       }
+
+       /* Don't allow files named "." or "..".  Such filenames could be used in
+        * path traversal attacks. */
+       if (dentry_is_dot_or_dotdot(dentry)) {
+               WARNING("Ignoring file named \".\" or \"..\" in directory "
+                       "\"%"TS"\"", dentry_full_path(dir));
+               return true;
+       }
+
+       /* Don't allow filenames containing embedded null characters.  Although
+        * the null character is already considered an unsupported character for
+        * extraction by all targets, it is probably a good idea to just forbid
+        * such names entirely. */
+       if (dentry_contains_embedded_null(dentry)) {
+               WARNING("Ignoring filename with embedded null character in "
+                       "directory \"%"TS"\"", dentry_full_path(dir));
+               return true;
+       }
+
+       return false;
+}
+
  static int
  read_dentry_tree_recursive(const u8 * restrict buf, size_t buf_len,
-                          struct wim_dentry * restrict dir)
+                          struct wim_dentry * restrict dir, unsigned depth)
  {
-       u64 cur_offset = dir->subdir_offset;
-
-       /* Check for cyclic directory structure, which would cause infinite
-        * recursion if not handled.  */
-       for (struct wim_dentry *d = dir->d_parent;
-            !dentry_is_root(d); d = d->d_parent)
-       {
-               if (unlikely(d->subdir_offset == cur_offset)) {
-                       ERROR("Cyclic directory structure detected: children "
-                             "of \"%"TS"\" coincide with children of \"%"TS"\"",
-                             dentry_full_path(dir), dentry_full_path(d));
-                       return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
-               }
+       u64 cur_offset = dir->d_subdir_offset;
+
+       /* Disallow extremely deep or cyclic directory structures  */
+       if (unlikely(depth >= 16384)) {
+               ERROR("Directory structure too deep!");
+               return WIMLIB_ERR_INVALID_METADATA_RESOURCE;
         }
  
         for (;;) {
@@ -1599,18 +1549,8 @@ read_dentry_tree_recursive(const u8 * restrict buf, size_t buf_len,
                 if (child == NULL)
                         return 0;
  
-               /* All dentries except the root should be named.  */
-               if (unlikely(!dentry_has_long_name(child))) {
-                       WARNING("Ignoring unnamed dentry in "
-                               "directory \"%"TS"\"", dentry_full_path(dir));
-                       free_dentry(child);
-                       continue;
-               }
-
-               /* Don't allow files named "." or "..".  */
-               if (unlikely(dentry_is_dot_or_dotdot(child))) {
-                       WARNING("Ignoring file named \".\" or \"..\"; "
-                               "potentially malicious archive!!!");
+               /* Ignore dentries with bad names.  */
+               if (unlikely(should_ignore_dentry(dir, child))) {
                         free_dentry(child);
                         continue;
                 }
@@ -1631,11 +1571,12 @@ read_dentry_tree_recursive(const u8 * restrict buf, size_t buf_len,
  
                 /* If this child is a directory that itself has children, call
                  * this procedure recursively.  */
-               if (child->subdir_offset != 0) {
+               if (child->d_subdir_offset != 0) {
                         if (likely(dentry_is_directory(child))) {
                                 ret = read_dentry_tree_recursive(buf,
                                                                  buf_len,
-                                                                child);
+                                                                child,
+                                                                depth + 1);
                                 if (ret)
                                         return ret;
                         } else {
@@ -1676,8 +1617,6 @@ read_dentry_tree(const u8 *buf, size_t buf_len,
         int ret;
         struct wim_dentry *root;
  
-       DEBUG("Reading dentry tree (root_offset=%"PRIu64")", root_offset);
-
         ret = read_dentry(buf, buf_len, &root_offset, &root);
         if (ret)
                 return ret;
@@ -1697,8 +1636,8 @@ read_dentry_tree(const u8 *buf, size_t buf_len,
                         goto err_free_dentry_tree;
                 }
  
-               if (likely(root->subdir_offset != 0)) {
-                       ret = read_dentry_tree_recursive(buf, buf_len, root);
+               if (likely(root->d_subdir_offset != 0)) {
+                       ret = read_dentry_tree_recursive(buf, buf_len, root, 0);
                         if (ret)
                                 goto err_free_dentry_tree;
                 }
@@ -1741,6 +1680,105 @@ write_extra_stream_entry(u8 * restrict p, const utf16lechar * restrict name,
         return p;
  }
  
+/*
+ * Write the stream references for a WIM dentry.  To be compatible with DISM, we
+ * follow the below rules:
+ *
+ * 1. If the file has FILE_ATTRIBUTE_ENCRYPTED, then only the EFSRPC_RAW_DATA
+ *    stream is stored.  Otherwise, the streams that are stored are:
+ *    - Reparse stream if the file has FILE_ATTRIBUTE_REPARSE_POINT
+ *    - Unnamed data stream if the file doesn't have FILE_ATTRIBUTE_DIRECTORY
+ *    - Named data streams
+ *
+ * 2. If only one stream is being stored and it is the EFSRPC_RAW_DATA, unnamed
+ *    data, or reparse stream, then its hash goes in main_hash, and no extra
+ *    stream entries are stored.  Otherwise, *all* streams go in the extra
+ *    stream entries, and main_hash is left zeroed!
+ *
+ * 3. If both the reparse stream and unnamed data stream are being stored, then
+ *    the reparse stream comes first.
+ *
+ * 4. The unnamed stream(s) come before the named stream(s).  (Actually, DISM
+ *    puts the named streams between the first and second unnamed streams, but
+ *    this is incompatible with itself...  Tested with DISM 10.0.20348.681.)
+ *
+ * wimlib v1.14.1 and earlier behaved slightly differently for directories.
+ * First, wimlib always put the hash of the reparse stream in an extra stream
+ * entry, never in main_hash.  This difference vs. DISM went unnoticed for a
+ * long time, but eventually it was found that it broke the Windows 8 setup
+ * wizard.  Second, when a directory had any extra streams, wimlib created an
+ * extra stream entry to represent the (empty) unnamed data stream.  However,
+ * DISM now rejects that (though I think it used to accept it).  There isn't
+ * really any such thing as "unnamed data stream" for a directory.
+ *
+ * Keep this in sync with dentry_out_total_length()!
+ */
+static u8 *
+write_dentry_streams(const struct wim_inode *inode,
+                    struct wim_dentry_on_disk *disk_dentry, u8 *p)
+{
+       const u8 *unnamed_data_stream_hash = zero_hash;
+       const u8 *reparse_stream_hash = zero_hash;
+       const u8 *efsrpc_stream_hash = zero_hash;
+       const u8 *unnamed_stream_hashes[2] = { zero_hash };
+       unsigned num_unnamed_streams = 0;
+       unsigned num_named_streams = 0;
+
+       for (unsigned i = 0; i < inode->i_num_streams; i++) {
+               const struct wim_inode_stream *strm = &inode->i_streams[i];
+
+               switch (strm->stream_type) {
+               case STREAM_TYPE_DATA:
+                       if (stream_is_named(strm))
+                               num_named_streams++;
+                       else
+                               unnamed_data_stream_hash = stream_hash(strm);
+                       break;
+               case STREAM_TYPE_REPARSE_POINT:
+                       reparse_stream_hash = stream_hash(strm);
+                       break;
+               case STREAM_TYPE_EFSRPC_RAW_DATA:
+                       efsrpc_stream_hash = stream_hash(strm);
+                       break;
+               }
+       }
+
+       if (inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED) {
+               unnamed_stream_hashes[num_unnamed_streams++] = efsrpc_stream_hash;
+               num_named_streams = 0;
+       } else {
+               if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT)
+                       unnamed_stream_hashes[num_unnamed_streams++] = reparse_stream_hash;
+               if (!(inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY))
+                       unnamed_stream_hashes[num_unnamed_streams++] = unnamed_data_stream_hash;
+       }
+
+       if (num_unnamed_streams <= 1 && num_named_streams == 0) {
+               /* No extra stream entries are needed. */
+               copy_hash(disk_dentry->main_hash, unnamed_stream_hashes[0]);
+               disk_dentry->num_extra_streams = 0;
+               return p;
+       }
+
+       /* Else, all streams go in extra stream entries. */
+       copy_hash(disk_dentry->main_hash, zero_hash);
+       wimlib_assert(num_unnamed_streams + num_named_streams <= 0xFFFF);
+       disk_dentry->num_extra_streams = cpu_to_le16(num_unnamed_streams +
+                                                    num_named_streams);
+       for (unsigned i = 0; i < num_unnamed_streams; i++)
+               p = write_extra_stream_entry(p, NO_STREAM_NAME,
+                                            unnamed_stream_hashes[i]);
+       for (unsigned i = 0; i < inode->i_num_streams; i++) {
+               const struct wim_inode_stream *strm = &inode->i_streams[i];
+
+               if (stream_is_named_data_stream(strm)) {
+                       p = write_extra_stream_entry(p, strm->stream_name,
+                                                    stream_hash(strm));
+               }
+       }
+       return p;
+}
+
  /*
   * Write a WIM dentry to an output buffer.
   *
@@ -1769,7 +1807,7 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p)
  
         disk_dentry->attributes = cpu_to_le32(inode->i_attributes);
         disk_dentry->security_id = cpu_to_le32(inode->i_security_id);
-       disk_dentry->subdir_offset = cpu_to_le64(dentry->subdir_offset);
+       disk_dentry->subdir_offset = cpu_to_le64(dentry->d_subdir_offset);
  
         disk_dentry->unused_1 = cpu_to_le64(0);
         disk_dentry->unused_2 = cpu_to_le64(0);
@@ -1777,36 +1815,35 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p)
         disk_dentry->creation_time = cpu_to_le64(inode->i_creation_time);
         disk_dentry->last_access_time = cpu_to_le64(inode->i_last_access_time);
         disk_dentry->last_write_time = cpu_to_le64(inode->i_last_write_time);
+       disk_dentry->unknown_0x54 = cpu_to_le32(inode->i_unknown_0x54);
         if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
-               disk_dentry->reparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1);
                 disk_dentry->reparse.reparse_tag = cpu_to_le32(inode->i_reparse_tag);
-               disk_dentry->reparse.rp_unknown_2 = cpu_to_le16(inode->i_rp_unknown_2);
-               disk_dentry->reparse.not_rpfixed = cpu_to_le16(inode->i_not_rpfixed);
+               disk_dentry->reparse.rp_reserved = cpu_to_le16(inode->i_rp_reserved);
+               disk_dentry->reparse.rp_flags = cpu_to_le16(inode->i_rp_flags);
         } else {
-               disk_dentry->nonreparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1);
                 disk_dentry->nonreparse.hard_link_group_id =
                         cpu_to_le64((inode->i_nlink == 1) ? 0 : inode->i_ino);
         }
  
-       disk_dentry->short_name_nbytes = cpu_to_le16(dentry->short_name_nbytes);
-       disk_dentry->file_name_nbytes = cpu_to_le16(dentry->file_name_nbytes);
+       disk_dentry->short_name_nbytes = cpu_to_le16(dentry->d_short_name_nbytes);
+       disk_dentry->name_nbytes = cpu_to_le16(dentry->d_name_nbytes);
         p += sizeof(struct wim_dentry_on_disk);
  
         wimlib_assert(dentry_is_root(dentry) != dentry_has_long_name(dentry));
  
         if (dentry_has_long_name(dentry))
-               p = mempcpy(p, dentry->file_name, (u32)dentry->file_name_nbytes + 2);
+               p = mempcpy(p, dentry->d_name, (u32)dentry->d_name_nbytes + 2);
  
         if (dentry_has_short_name(dentry))
-               p = mempcpy(p, dentry->short_name, (u32)dentry->short_name_nbytes + 2);
+               p = mempcpy(p, dentry->d_short_name, (u32)dentry->d_short_name_nbytes + 2);
  
         /* Align to 8-byte boundary */
         while ((uintptr_t)p & 7)
                 *p++ = 0;
  
-       if (inode->i_extra_size) {
+       if (inode->i_extra) {
                 /* Extra tagged items --- not usually present.  */
-               p = mempcpy(p, inode->i_extra, inode->i_extra_size);
+               p = mempcpy(p, inode->i_extra->data, inode->i_extra->size);
  
                 /* Align to 8-byte boundary */
                 while ((uintptr_t)p & 7)
@@ -1815,80 +1852,17 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p)
  
         disk_dentry->length = cpu_to_le64(p - orig_p);
  
-       /* Streams  */
-
-       if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) {
-               const struct wim_inode_stream *efs_strm;
-               const u8 *efs_hash;
-
-               efs_strm = inode_get_stream(inode, STREAM_TYPE_EFSRPC_RAW_DATA,
-                                           NO_STREAM_NAME);
-               efs_hash = efs_strm ? stream_hash(efs_strm) : zero_hash;
-               copy_hash(disk_dentry->default_hash, efs_hash);
-               disk_dentry->num_extra_streams = cpu_to_le16(0);
-       } else {
-               /*
-                * Extra stream entries:
-                *
-                * - Use one extra stream entry for each named data stream
-                * - Use one extra stream entry for the unnamed data stream when there is either:
-                *      - a reparse point stream
-                *      - at least one named data stream (for Windows PE bug workaround)
-                * - Use one extra stream entry for the reparse point stream if there is one
-                */
-               bool have_named_data_stream = false;
-               bool have_reparse_point_stream = false;
-               u16 num_extra_streams = 0;
-               const u8 *unnamed_data_stream_hash = zero_hash;
-               const u8 *reparse_point_hash;
-               for (unsigned i = 0; i < inode->i_num_streams; i++) {
-                       const struct wim_inode_stream *strm = &inode->i_streams[i];
-                       if (strm->stream_type == STREAM_TYPE_DATA) {
-                               if (stream_is_named(strm))
-                                       have_named_data_stream = true;
-                               else
-                                       unnamed_data_stream_hash = stream_hash(strm);
-                       } else if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) {
-                               have_reparse_point_stream = true;
-                               reparse_point_hash = stream_hash(strm);
-                       }
-               }
-
-               if (have_reparse_point_stream || have_named_data_stream) {
-
-                       copy_hash(disk_dentry->default_hash, zero_hash);
-
-                       if (have_reparse_point_stream) {
-                               p = write_extra_stream_entry(p, NO_STREAM_NAME,
-                                                            reparse_point_hash);
-                               num_extra_streams++;
-                       }
-
-                       p = write_extra_stream_entry(p, NO_STREAM_NAME,
-                                                    unnamed_data_stream_hash);
-                       num_extra_streams++;
-               } else {
-                       copy_hash(disk_dentry->default_hash, unnamed_data_stream_hash);
-               }
-
-               for (unsigned i = 0; i < inode->i_num_streams; i++) {
-                       const struct wim_inode_stream *strm = &inode->i_streams[i];
-                       if (stream_is_named_data_stream(strm)) {
-                               p = write_extra_stream_entry(p, strm->stream_name,
-                                                            stream_hash(strm));
-                               num_extra_streams++;
-                       }
-               }
-               disk_dentry->num_extra_streams = cpu_to_le16(num_extra_streams);
-       }
-
-       return p;
+       /*
+        * Set disk_dentry->main_hash and disk_dentry->num_extra_streams,
+        * and write any extra stream entries that are needed.
+        */
+       return write_dentry_streams(inode, disk_dentry, p);
  }
  
  static int
  write_dir_dentries(struct wim_dentry *dir, void *_pp)
  {
-       if (dir->subdir_offset != 0) {
+       if (dir->d_subdir_offset != 0) {
                 u8 **pp = _pp;
                 u8 *p = *pp;
                 struct wim_dentry *child;
@@ -1922,10 +1896,6 @@ write_dir_dentries(struct wim_dentry *dir, void *_pp)
  u8 *
  write_dentry_tree(struct wim_dentry *root, u8 *p)
  {
-       DEBUG("Writing dentry tree.");
-
-       wimlib_assert(root != NULL);
-
         /* write root dentry and end-of-directory entry following it */
         p = write_dentry(root, p);
         *(u64*)p = 0;