]> wimlib.net Git - wimlib/blobdiff - src/dentry.c
hardlink.c: Fix some error paths
[wimlib] / src / dentry.c
index 04ed1092900889fc2917d0ffea28c226c55221c2..c62a694f77ae3284e6309ee11d0007df79cb0c20 100644 (file)
@@ -338,7 +338,7 @@ for_dentry_in_tree_depth(struct wim_dentry *root,
 
 /* Calculate the full path of @dentry.  The full path of its parent must have
  * already been calculated, or it must be the root dentry. */
-static int
+int
 calculate_dentry_full_path(struct wim_dentry *dentry)
 {
        tchar *full_path;
@@ -349,7 +349,8 @@ calculate_dentry_full_path(struct wim_dentry *dentry)
                return 0;
 
        if (dentry_is_root(dentry)) {
-               full_path = TSTRDUP(T("/"));
+               static const tchar _root_path[] = {WIM_PATH_SEPARATOR, T('\0')};
+               full_path = TSTRDUP(_root_path);
                if (!full_path)
                        return WIMLIB_ERR_NOMEM;
                full_path_nbytes = 1 * sizeof(tchar);
@@ -393,7 +394,7 @@ calculate_dentry_full_path(struct wim_dentry *dentry)
                if (!full_path)
                        return WIMLIB_ERR_NOMEM;
                memcpy(full_path, parent_full_path, parent_full_path_nbytes);
-               full_path[parent_full_path_nbytes / sizeof(tchar)] = T('/');
+               full_path[parent_full_path_nbytes / sizeof(tchar)] = WIM_PATH_SEPARATOR;
        #if TCHAR_IS_UTF16LE
                memcpy(&full_path[parent_full_path_nbytes / sizeof(tchar) + 1],
                       dentry->file_name,
@@ -479,19 +480,19 @@ calculate_subdir_offsets(struct wim_dentry *dentry, u64 *subdir_offset_p)
        }
 }
 
-/* UNIX: Case-sensitive UTF-16LE dentry or stream name comparison.  We call this
- * on Windows as well to distinguish true duplicates from names differing by
- * case only. */
+/* Case-sensitive UTF-16LE dentry or stream name comparison.  Used on both UNIX
+ * (always) and Windows (sometimes) */
 static int
 compare_utf16le_names_case_sensitive(const utf16lechar *name1, size_t nbytes1,
                                     const utf16lechar *name2, size_t nbytes2)
 {
        /* Return the result if the strings differ up to their minimum length.
-        * Note that we cannot strcmp() or strncmp() here, as the strings are in
-        * UTF-16LE format. */
+        * Note that we cannot use strcmp() or strncmp() here, as the strings
+        * are in UTF-16LE format. */
        int result = memcmp(name1, name2, min(nbytes1, nbytes2));
        if (result)
                return result;
+
        /* The strings are the same up to their minimum length, so return a
         * result based on their lengths. */
        if (nbytes1 < nbytes2)
@@ -508,10 +509,13 @@ static int
 compare_utf16le_names_case_insensitive(const utf16lechar *name1, size_t nbytes1,
                                       const utf16lechar *name2, size_t nbytes2)
 {
-       /* Only call _wcsicmp() if both strings are of nonzero length; otherwise
-        * one could be NULL. */
-       if (nbytes1 && nbytes2)
-               return _wcsicmp((const wchar_t*)name1, (const wchar_t*)name2);
+       /* Return the result if the strings differ up to their minimum length.
+        * */
+       int result = _wcsnicmp((const wchar_t*)name1, (const wchar_t*)name2,
+                              min(nbytes1 / 2, nbytes2 / 2));
+       if (result)
+               return result;
+
        /* The strings are the same up to their minimum length, so return a
         * result based on their lengths. */
        if (nbytes1 < nbytes2)
@@ -569,15 +573,29 @@ ads_entry_has_name(const struct wim_ads_entry *entry,
                                      entry->stream_name_nbytes);
 }
 
+/* Given a UTF-16LE filename and a directory, look up the dentry for the file.
+ * Return it if found, otherwise NULL.  This is case-sensitive on UNIX and
+ * case-insensitive on Windows. */
 struct wim_dentry *
 get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry,
                                   const utf16lechar *name,
                                   size_t name_nbytes)
 {
-       struct rb_node *node = dentry->d_inode->i_children.rb_node;
+       struct rb_node *node;
+
+#ifdef __WIN32__
+       node = dentry->d_inode->i_children_case_insensitive.rb_node;
+#else
+       node = dentry->d_inode->i_children.rb_node;
+#endif
+
        struct wim_dentry *child;
        while (node) {
+       #ifdef __WIN32__
+               child = rb_entry(node, struct wim_dentry, rb_node_case_insensitive);
+       #else
                child = rbnode_dentry(node);
+       #endif
                int result = compare_utf16le_names(name, name_nbytes,
                                                   child->file_name,
                                                   child->file_name_nbytes);
@@ -585,8 +603,20 @@ get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry,
                        node = node->rb_left;
                else if (result > 0)
                        node = node->rb_right;
-               else
+               else {
+               #ifdef __WIN32__
+                       if (!list_empty(&child->case_insensitive_conflict_list))
+                       {
+                               WARNING("Result of case-insensitive lookup is ambiguous "
+                                       "(returning \"%ls\" instead of \"%ls\")",
+                                       child->file_name,
+                                       container_of(child->case_insensitive_conflict_list.next,
+                                                    struct wim_dentry,
+                                                    case_insensitive_conflict_list)->file_name);
+                       }
+               #endif
                        return child;
+               }
        }
        return NULL;
 }
@@ -632,12 +662,13 @@ get_dentry_utf16le(WIMStruct *wim, const utf16lechar *path)
        }
        p = path;
        while (1) {
-               while (*p == cpu_to_le16('/'))
+               while (*p == cpu_to_le16(WIM_PATH_SEPARATOR))
                        p++;
                if (*p == cpu_to_le16('\0'))
                        break;
                pp = p;
-               while (*pp != cpu_to_le16('/') && *pp != cpu_to_le16('\0'))
+               while (*pp != cpu_to_le16(WIM_PATH_SEPARATOR) &&
+                      *pp != cpu_to_le16('\0'))
                        pp++;
 
                cur_dentry = get_dentry_child_with_utf16le_name(parent_dentry, p,
@@ -696,11 +727,11 @@ static void
 to_parent_name(tchar *buf, size_t len)
 {
        ssize_t i = (ssize_t)len - 1;
-       while (i >= 0 && buf[i] == T('/'))
+       while (i >= 0 && buf[i] == WIM_PATH_SEPARATOR)
                i--;
-       while (i >= 0 && buf[i] != T('/'))
+       while (i >= 0 && buf[i] != WIM_PATH_SEPARATOR)
                i--;
-       while (i >= 0 && buf[i] == T('/'))
+       while (i >= 0 && buf[i] == WIM_PATH_SEPARATOR)
                i--;
        buf[i + 1] = T('\0');
 }
@@ -1001,7 +1032,8 @@ free_inode(struct wim_inode *inode)
        #endif
                /* HACK: This may instead delete the inode from i_list, but the
                 * hlist_del() behaves the same as list_del(). */
-               hlist_del(&inode->i_hlist);
+               if (!hlist_unhashed(&inode->i_hlist))
+                       hlist_del(&inode->i_hlist);
                FREE(inode->i_extracted_file);
                FREE(inode);
        }
@@ -1077,6 +1109,45 @@ free_dentry_tree(struct wim_dentry *root, struct wim_lookup_table *lookup_table)
        for_dentry_in_tree_depth(root, do_free_dentry, lookup_table);
 }
 
+#ifdef __WIN32__
+
+/* Insert a dentry into the case insensitive index for a directory.
+ *
+ * This is a red-black tree, but when multiple dentries share the same
+ * case-insensitive name, only one is inserted into the tree itself; the rest
+ * are connected in a list.
+ */
+static struct wim_dentry *
+dentry_add_child_case_insensitive(struct wim_dentry *parent,
+                                 struct wim_dentry *child)
+{
+       struct rb_root *root;
+       struct rb_node **new;
+       struct rb_node *rb_parent;
+
+       root = &parent->d_inode->i_children_case_insensitive;
+       new = &root->rb_node;
+       rb_parent = NULL;
+       while (*new) {
+               struct wim_dentry *this = container_of(*new, struct wim_dentry,
+                                                      rb_node_case_insensitive);
+               int result = dentry_compare_names_case_insensitive(child, this);
+
+               rb_parent = *new;
+
+               if (result < 0)
+                       new = &((*new)->rb_left);
+               else if (result > 0)
+                       new = &((*new)->rb_right);
+               else
+                       return this;
+       }
+       rb_link_node(&child->rb_node_case_insensitive, rb_parent, new);
+       rb_insert_color(&child->rb_node_case_insensitive, root);
+       return NULL;
+}
+#endif
+
 /*
  * Links a dentry into the directory tree.
  *
@@ -1084,23 +1155,27 @@ free_dentry_tree(struct wim_dentry *root, struct wim_lookup_table *lookup_table)
  * @child: The dentry to link.
  *
  * Returns NULL if successful.  If @parent already contains a dentry with the
- * same name as @child (see compare_utf16le_names() for what names are
- * considered the "same"), the pointer to this duplicate dentry is returned.
+ * same case-sensitive name as @child, the pointer to this duplicate dentry is
+ * returned.
  */
 struct wim_dentry *
 dentry_add_child(struct wim_dentry * restrict parent,
                 struct wim_dentry * restrict child)
 {
+       struct rb_root *root;
+       struct rb_node **new;
+       struct rb_node *rb_parent;
+
        wimlib_assert(dentry_is_directory(parent));
        wimlib_assert(parent != child);
 
-       struct rb_root *root = &parent->d_inode->i_children;
-       struct rb_node **new = &(root->rb_node);
-       struct rb_node *rb_parent = NULL;
-
+       /* Case sensitive child dentry index */
+       root = &parent->d_inode->i_children;
+       new = &root->rb_node;
+       rb_parent = NULL;
        while (*new) {
                struct wim_dentry *this = rbnode_dentry(*new);
-               int result = dentry_compare_names(child, this);
+               int result = dentry_compare_names_case_sensitive(child, this);
 
                rb_parent = *new;
 
@@ -1114,6 +1189,20 @@ dentry_add_child(struct wim_dentry * restrict parent,
        child->parent = parent;
        rb_link_node(&child->rb_node, rb_parent, new);
        rb_insert_color(&child->rb_node, root);
+
+#ifdef __WIN32__
+       {
+               struct wim_dentry *existing;
+               existing = dentry_add_child_case_insensitive(parent, child);
+               if (existing) {
+                       list_add(&child->case_insensitive_conflict_list,
+                                &existing->case_insensitive_conflict_list);
+                       child->rb_node_case_insensitive.__rb_parent_color = 0;
+               } else {
+                       INIT_LIST_HEAD(&child->case_insensitive_conflict_list);
+               }
+       }
+#endif
        return NULL;
 }
 
@@ -1121,8 +1210,31 @@ dentry_add_child(struct wim_dentry * restrict parent,
 void
 unlink_dentry(struct wim_dentry *dentry)
 {
-       if (!dentry_is_root(dentry))
-               rb_erase(&dentry->rb_node, &dentry->parent->d_inode->i_children);
+       struct wim_dentry *parent = dentry->parent;
+
+       if (parent == dentry)
+               return;
+       rb_erase(&dentry->rb_node, &parent->d_inode->i_children);
+#ifdef __WIN32__
+       if (dentry->rb_node_case_insensitive.__rb_parent_color) {
+               /* This dentry was in the case-insensitive red-black tree. */
+               rb_erase(&dentry->rb_node_case_insensitive,
+                        &parent->d_inode->i_children_case_insensitive);
+               if (!list_empty(&dentry->case_insensitive_conflict_list)) {
+                       /* Make a different case-insensitively-the-same dentry
+                        * be the "representative" in the red-black tree. */
+                       struct list_head *next;
+                       struct wim_dentry *other;
+                       struct wim_dentry *existing;
+
+                       next = dentry->case_insensitive_conflict_list.next;
+                       other = list_entry(next, struct wim_dentry, case_insensitive_conflict_list);
+                       existing = dentry_add_child_case_insensitive(parent, other);
+                       wimlib_assert(existing == NULL);
+               }
+       }
+       list_del(&dentry->case_insensitive_conflict_list);
+#endif
 }
 
 /*
@@ -1396,39 +1508,6 @@ inode_set_unix_data(struct wim_inode *inode, uid_t uid, gid_t gid, mode_t mode,
 }
 #endif /* !__WIN32__ */
 
-/* Replace weird characters in filenames and alternate data stream names.
- *
- * In particular we do not want the path separator to appear in any names, as
- * that would make it possible for a "malicious" WIM to extract itself to any
- * location it wanted to. */
-static void
-replace_forbidden_characters(utf16lechar *name)
-{
-       utf16lechar *p;
-
-       for (p = name; *p; p++) {
-       #ifdef __WIN32__
-               if (wcschr(L"<>:\"/\\|?*", (wchar_t)*p))
-       #else
-               if (*p == cpu_to_le16('/'))
-       #endif
-               {
-                       if (name) {
-                               WARNING("File, directory, or stream name \"%"WS"\"\n"
-                                       "          contains forbidden characters; "
-                                       "substituting replacement characters.",
-                                       name);
-                               name = NULL;
-                       }
-               #ifdef __WIN32__
-                       *p = cpu_to_le16(0xfffd);
-               #else
-                       *p = cpu_to_le16('?');
-               #endif
-               }
-       }
-}
-
 /*
  * Reads the alternate data stream entries of a WIM dentry.
  *
@@ -1522,7 +1601,6 @@ read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode,
                               disk_entry->stream_name,
                               cur_entry->stream_name_nbytes);
                        cur_entry->stream_name[cur_entry->stream_name_nbytes / 2] = cpu_to_le16(0);
-                       replace_forbidden_characters(cur_entry->stream_name);
                }
 
                /* It's expected that the size of every ADS entry is a multiple
@@ -1728,7 +1806,6 @@ read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len,
                memcpy(file_name, p, file_name_nbytes);
                p += file_name_nbytes + 2;
                file_name[file_name_nbytes / 2] = cpu_to_le16(0);
-               replace_forbidden_characters(file_name);
        } else {
                file_name = NULL;
        }
@@ -1747,7 +1824,6 @@ read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len,
                memcpy(short_name, p, short_name_nbytes);
                p += short_name_nbytes + 2;
                short_name[short_name_nbytes / 2] = cpu_to_le16(0);
-               replace_forbidden_characters(short_name);
        } else {
                short_name = NULL;
        }
@@ -1876,30 +1952,11 @@ read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len,
                        const tchar *child_type, *duplicate_type;
                        child_type = dentry_get_file_type_string(child);
                        duplicate_type = dentry_get_file_type_string(duplicate);
-                       /* On UNIX, duplicates are exact.  On Windows,
-                        * duplicates may differ by case and we wish to provide
-                        * a different warning message in this case. */
-               #ifdef __WIN32__
-                       if (dentry_compare_names_case_sensitive(child, duplicate))
-                       {
-                               child->parent = dentry;
-                               WARNING("Ignoring %ls \"%ls\", which differs "
-                                       "only in case from %ls \"%ls\"",
-                                       child_type,
-                                       dentry_full_path(child),
-                                       duplicate_type,
-                                       dentry_full_path(duplicate));
-                       }
-                       else
-               #endif
-                       {
-                               WARNING("Ignoring duplicate %"TS" \"%"TS"\" "
-                                       "(the WIM image already contains a %"TS" "
-                                       "at that path with the exact same name)",
-                                       child_type, dentry_full_path(duplicate),
-                                       duplicate_type);
-                       }
-                       free_dentry(child);
+                       WARNING("Ignoring duplicate %"TS" \"%"TS"\" "
+                               "(the WIM image already contains a %"TS" "
+                               "at that path with the exact same name)",
+                               child_type, dentry_full_path(duplicate),
+                               duplicate_type);
                } else {
                        inode_add_dentry(child, child->d_inode);
                        /* If there are children of this child, call this