]> wimlib.net Git - wimlib/blobdiff - src/dentry.c
Improve handling of invalid filenames
[wimlib] / src / dentry.c
index b7a0032aa7be7e9bdcf7827a5e17ba06b22ca028..abe5095c2f7f0ce0a1597bdf54803060e1f3afa2 100644 (file)
@@ -70,6 +70,8 @@ struct wim_ads_entry_on_disk {
        utf16lechar stream_name[];
 } _packed_attribute;
 
+#define WIM_ADS_ENTRY_DISK_SIZE 38
+
 /* WIM directory entry (on-disk format) */
 struct wim_dentry_on_disk {
        le64 length;
@@ -105,6 +107,8 @@ struct wim_dentry_on_disk {
        /*utf16lechar short_name[];*/
 } _packed_attribute;
 
+#define WIM_DENTRY_DISK_SIZE 102
+
 /* Calculates the unaligned length, in bytes, of an on-disk WIM dentry that has
  * a file name and short name that take the specified numbers of bytes.  This
  * excludes any alternate data stream entries that may follow the dentry. */
@@ -130,16 +134,6 @@ dentry_correct_length_unaligned(const struct wim_dentry *dentry)
                                                dentry->short_name_nbytes);
 }
 
-/* Return %true iff the alternate data stream entry @entry has the UTF-16LE
- * stream name @name that has length @name_nbytes bytes. */
-static inline bool
-ads_entry_has_name(const struct wim_ads_entry *entry,
-                  const utf16lechar *name, size_t name_nbytes)
-{
-       return entry->stream_name_nbytes == name_nbytes &&
-              memcmp(entry->stream_name, name, name_nbytes) == 0;
-}
-
 /* Duplicates a string of system-dependent encoding into a UTF-16LE string and
  * returns the string and its length, in bytes, in the pointer arguments.  Frees
  * any existing string at the return location before overwriting it. */
@@ -344,7 +338,7 @@ for_dentry_in_tree_depth(struct wim_dentry *root,
 
 /* Calculate the full path of @dentry.  The full path of its parent must have
  * already been calculated, or it must be the root dentry. */
-static int
+int
 calculate_dentry_full_path(struct wim_dentry *dentry)
 {
        tchar *full_path;
@@ -485,25 +479,100 @@ calculate_subdir_offsets(struct wim_dentry *dentry, u64 *subdir_offset_p)
        }
 }
 
+/* UNIX: Case-sensitive UTF-16LE dentry or stream name comparison.  We call this
+ * on Windows as well to distinguish true duplicates from names differing by
+ * case only. */
 static int
-compare_utf16le_names(const utf16lechar *name1, size_t nbytes1,
-                     const utf16lechar *name2, size_t nbytes2)
+compare_utf16le_names_case_sensitive(const utf16lechar *name1, size_t nbytes1,
+                                    const utf16lechar *name2, size_t nbytes2)
 {
+       /* Return the result if the strings differ up to their minimum length.
+        * Note that we cannot use strcmp() or strncmp() here, as the strings
+        * are in UTF-16LE format. */
        int result = memcmp(name1, name2, min(nbytes1, nbytes2));
        if (result)
                return result;
+
+       /* The strings are the same up to their minimum length, so return a
+        * result based on their lengths. */
+       if (nbytes1 < nbytes2)
+               return -1;
+       else if (nbytes1 > nbytes2)
+               return 1;
        else
-               return (int)nbytes1 - (int)nbytes2;
+               return 0;
 }
 
+#ifdef __WIN32__
+/* Windoze: Case-insensitive UTF-16LE dentry or stream name comparison */
 static int
-dentry_compare_names(const struct wim_dentry *d1, const struct wim_dentry *d2)
+compare_utf16le_names_case_insensitive(const utf16lechar *name1, size_t nbytes1,
+                                      const utf16lechar *name2, size_t nbytes2)
 {
-       return compare_utf16le_names(d1->file_name, d1->file_name_nbytes,
-                                    d2->file_name, d2->file_name_nbytes);
+       /* Return the result if the strings differ up to their minimum length.
+        * */
+       int result = _wcsnicmp((const wchar_t*)name1, (const wchar_t*)name2,
+                              min(nbytes1 / 2, nbytes2 / 2));
+       if (result)
+               return result;
+
+       /* The strings are the same up to their minimum length, so return a
+        * result based on their lengths. */
+       if (nbytes1 < nbytes2)
+               return -1;
+       else if (nbytes1 > nbytes2)
+               return 1;
+       else
+               return 0;
 }
+#endif /* __WIN32__ */
+
+#ifdef __WIN32__
+#  define compare_utf16le_names compare_utf16le_names_case_insensitive
+#else
+#  define compare_utf16le_names compare_utf16le_names_case_sensitive
+#endif
 
 
+#ifdef __WIN32__
+static int
+dentry_compare_names_case_insensitive(const struct wim_dentry *d1,
+                                     const struct wim_dentry *d2)
+{
+       return compare_utf16le_names_case_insensitive(d1->file_name,
+                                                     d1->file_name_nbytes,
+                                                     d2->file_name,
+                                                     d2->file_name_nbytes);
+}
+#endif /* __WIN32__ */
+
+static int
+dentry_compare_names_case_sensitive(const struct wim_dentry *d1,
+                                   const struct wim_dentry *d2)
+{
+       return compare_utf16le_names_case_sensitive(d1->file_name,
+                                                   d1->file_name_nbytes,
+                                                   d2->file_name,
+                                                   d2->file_name_nbytes);
+}
+
+#ifdef __WIN32__
+#  define dentry_compare_names dentry_compare_names_case_insensitive
+#else
+#  define dentry_compare_names dentry_compare_names_case_sensitive
+#endif
+
+/* Return %true iff the alternate data stream entry @entry has the UTF-16LE
+ * stream name @name that has length @name_nbytes bytes. */
+static inline bool
+ads_entry_has_name(const struct wim_ads_entry *entry,
+                  const utf16lechar *name, size_t name_nbytes)
+{
+       return !compare_utf16le_names(name, name_nbytes,
+                                     entry->stream_name,
+                                     entry->stream_name_nbytes);
+}
+
 struct wim_dentry *
 get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry,
                                   const utf16lechar *name,
@@ -520,8 +589,20 @@ get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry,
                        node = node->rb_left;
                else if (result > 0)
                        node = node->rb_right;
-               else
+               else {
+               #ifdef __WIN32__
+                       if (!list_empty(&child->case_insensitive_conflict_list))
+                       {
+                               WARNING("Result of case-insensitive lookup is ambiguous "
+                                       "(returning \"%ls\" instead of \"%ls\")",
+                                       child->file_name,
+                                       container_of(child->case_insensitive_conflict_list.next,
+                                                    struct wim_dentry,
+                                                    case_insensitive_conflict_list)->file_name);
+                       }
+               #endif
                        return child;
+               }
        }
        return NULL;
 }
@@ -1018,22 +1099,28 @@ free_dentry_tree(struct wim_dentry *root, struct wim_lookup_table *lookup_table)
  * @parent: The dentry that will be the parent of @child.
  * @child: The dentry to link.
  *
- * Returns non-NULL if a duplicate dentry was detected.
+ * Returns NULL if successful.  If @parent already contains a dentry with the
+ * same case-sensitive name as @child, the pointer to this duplicate dentry is
+ * returned.
  */
 struct wim_dentry *
 dentry_add_child(struct wim_dentry * restrict parent,
                 struct wim_dentry * restrict child)
 {
+       struct rb_root *root;
+       struct rb_node **new;
+       struct rb_node *rb_parent;
+
        wimlib_assert(dentry_is_directory(parent));
        wimlib_assert(parent != child);
 
-       struct rb_root *root = &parent->d_inode->i_children;
-       struct rb_node **new = &(root->rb_node);
-       struct rb_node *rb_parent = NULL;
-
+       /* Case sensitive child dentry index */
+       root = &parent->d_inode->i_children;
+       new = &root->rb_node;
+       rb_parent = NULL;
        while (*new) {
                struct wim_dentry *this = rbnode_dentry(*new);
-               int result = dentry_compare_names(child, this);
+               int result = dentry_compare_names_case_sensitive(child, this);
 
                rb_parent = *new;
 
@@ -1047,6 +1134,34 @@ dentry_add_child(struct wim_dentry * restrict parent,
        child->parent = parent;
        rb_link_node(&child->rb_node, rb_parent, new);
        rb_insert_color(&child->rb_node, root);
+
+#ifdef __WIN32__
+       /* Case insensitive child dentry index */
+       root = &parent->d_inode->i_children_case_insensitive;
+       new = &root->rb_node;
+       rb_parent = NULL;
+       while (*new) {
+               struct wim_dentry *this = container_of(*new, struct wim_dentry,
+                                                      rb_node_case_insensitive);
+               int result = dentry_compare_names_case_insensitive(child, this);
+
+               rb_parent = *new;
+
+               if (result < 0)
+                       new = &((*new)->rb_left);
+               else if (result > 0)
+                       new = &((*new)->rb_right);
+               else {
+                       list_add(&child->case_insensitive_conflict_list,
+                                &this->case_insensitive_conflict_list);
+                       return NULL;
+
+               }
+       }
+       rb_link_node(&child->rb_node_case_insensitive, rb_parent, new);
+       rb_insert_color(&child->rb_node_case_insensitive, root);
+       INIT_LIST_HEAD(&child->case_insensitive_conflict_list);
+#endif
        return NULL;
 }
 
@@ -1054,8 +1169,14 @@ dentry_add_child(struct wim_dentry * restrict parent,
 void
 unlink_dentry(struct wim_dentry *dentry)
 {
-       if (!dentry_is_root(dentry))
+       if (!dentry_is_root(dentry)) {
                rb_erase(&dentry->rb_node, &dentry->parent->d_inode->i_children);
+       #ifdef __WIN32__
+               rb_erase(&dentry->rb_node_case_insensitive,
+                        &dentry->parent->d_inode->i_children_case_insensitive);
+               list_del(&dentry->case_insensitive_conflict_list);
+       #endif
+       }
 }
 
 /*
@@ -1180,12 +1301,11 @@ add_stream_from_data_buffer(const void *buffer, size_t size,
                lte = new_lookup_table_entry();
                if (!lte)
                        return NULL;
-               buffer_copy = MALLOC(size);
+               buffer_copy = memdup(buffer, size);
                if (!buffer_copy) {
                        free_lookup_table_entry(lte);
                        return NULL;
                }
-               memcpy(buffer_copy, buffer, size);
                lte->resource_location            = RESOURCE_IN_ATTACHED_BUFFER;
                lte->attached_buffer              = buffer_copy;
                lte->resource_entry.original_size = size;
@@ -1330,39 +1450,6 @@ inode_set_unix_data(struct wim_inode *inode, uid_t uid, gid_t gid, mode_t mode,
 }
 #endif /* !__WIN32__ */
 
-/* Replace weird characters in filenames and alternate data stream names.
- *
- * In particular we do not want the path separator to appear in any names, as
- * that would make it possible for a "malicious" WIM to extract itself to any
- * location it wanted to. */
-static void
-replace_forbidden_characters(utf16lechar *name)
-{
-       utf16lechar *p;
-
-       for (p = name; *p; p++) {
-       #ifdef __WIN32__
-               if (wcschr(L"<>:\"/\\|?*", (wchar_t)*p))
-       #else
-               if (*p == cpu_to_le16('/'))
-       #endif
-               {
-                       if (name) {
-                               WARNING("File, directory, or stream name \"%"WS"\"\n"
-                                       "          contains forbidden characters; "
-                                       "substituting replacement characters.",
-                                       name);
-                               name = NULL;
-                       }
-               #ifdef __WIN32__
-                       *p = cpu_to_le16(0xfffd);
-               #else
-                       *p = cpu_to_le16('?');
-               #endif
-               }
-       }
-}
-
 /*
  * Reads the alternate data stream entries of a WIM dentry.
  *
@@ -1388,6 +1475,8 @@ read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode,
        struct wim_ads_entry *ads_entries;
        int ret;
 
+       BUILD_BUG_ON(sizeof(struct wim_ads_entry_on_disk) != WIM_ADS_ENTRY_DISK_SIZE);
+
        /* Allocate an array for our in-memory representation of the alternate
         * data stream entries. */
        num_ads = inode->i_num_ads;
@@ -1454,7 +1543,6 @@ read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode,
                               disk_entry->stream_name,
                               cur_entry->stream_name_nbytes);
                        cur_entry->stream_name[cur_entry->stream_name_nbytes / 2] = cpu_to_le16(0);
-                       replace_forbidden_characters(cur_entry->stream_name);
                }
 
                /* It's expected that the size of every ADS entry is a multiple
@@ -1532,6 +1620,8 @@ read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len,
        const struct wim_dentry_on_disk *disk_dentry =
                        (const struct wim_dentry_on_disk*)p;
 
+       BUILD_BUG_ON(sizeof(struct wim_dentry_on_disk) != WIM_DENTRY_DISK_SIZE);
+
        if ((uintptr_t)p & 7)
                WARNING("WIM dentry is not 8-byte aligned");
 
@@ -1658,7 +1748,6 @@ read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len,
                memcpy(file_name, p, file_name_nbytes);
                p += file_name_nbytes + 2;
                file_name[file_name_nbytes / 2] = cpu_to_le16(0);
-               replace_forbidden_characters(file_name);
        } else {
                file_name = NULL;
        }
@@ -1677,7 +1766,6 @@ read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len,
                memcpy(short_name, p, short_name_nbytes);
                p += short_name_nbytes + 2;
                short_name[short_name_nbytes / 2] = cpu_to_le16(0);
-               replace_forbidden_characters(short_name);
        } else {
                short_name = NULL;
        }
@@ -1725,6 +1813,18 @@ out:
        return ret;
 }
 
+static const tchar *
+dentry_get_file_type_string(const struct wim_dentry *dentry)
+{
+       const struct wim_inode *inode = dentry->d_inode;
+       if (inode_is_directory(inode))
+               return T("directory");
+       else if (inode_is_symlink(inode))
+               return T("symbolic link");
+       else
+               return T("file");
+}
+
 /* Reads the children of a dentry, and all their children, ..., etc. from the
  * metadata resource and into the dentry tree.
  *
@@ -1747,6 +1847,7 @@ read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len,
 {
        u64 cur_offset = dentry->subdir_offset;
        struct wim_dentry *child;
+       struct wim_dentry *duplicate;
        struct wim_dentry cur_child;
        int ret;
 
@@ -1774,14 +1875,12 @@ read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len,
 
                /* Not end of directory.  Allocate this child permanently and
                 * link it to the parent and previous child. */
-               child = MALLOC(sizeof(struct wim_dentry));
+               child = memdup(&cur_child, sizeof(struct wim_dentry));
                if (!child) {
-                       ERROR("Failed to allocate %zu bytes for new dentry",
-                             sizeof(struct wim_dentry));
+                       ERROR("Failed to allocate new dentry!");
                        ret = WIMLIB_ERR_NOMEM;
                        break;
                }
-               memcpy(child, &cur_child, sizeof(struct wim_dentry));
 
                /* Advance to the offset of the next child.  Note: We need to
                 * advance by the TOTAL length of the dentry, not by the length
@@ -1790,17 +1889,22 @@ read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len,
                 * entries. */
                cur_offset += dentry_total_length(child);
 
-               if (dentry_add_child(dentry, child)) {
-                       WARNING("Ignoring duplicate dentry \"%"WS"\"",
-                               child->file_name);
-                       WARNING("(In directory \"%"TS"\")", dentry_full_path(dentry));
-                       free_dentry(child);
+               duplicate = dentry_add_child(dentry, child);
+               if (duplicate) {
+                       const tchar *child_type, *duplicate_type;
+                       child_type = dentry_get_file_type_string(child);
+                       duplicate_type = dentry_get_file_type_string(duplicate);
+                       WARNING("Ignoring duplicate %"TS" \"%"TS"\" "
+                               "(the WIM image already contains a %"TS" "
+                               "at that path with the exact same name)",
+                               child_type, dentry_full_path(duplicate),
+                               duplicate_type);
                } else {
                        inode_add_dentry(child, child->d_inode);
                        /* If there are children of this child, call this
                         * procedure recursively. */
                        if (child->subdir_offset != 0) {
-                               if (!dentry_is_directory(child)) {
+                               if (dentry_is_directory(child)) {
                                        ret = read_dentry_tree(metadata_resource,
                                                               metadata_resource_len,
                                                               child);