utf16lechar stream_name[];
} _packed_attribute;
+#define WIM_ADS_ENTRY_DISK_SIZE 38
+
/* WIM directory entry (on-disk format) */
struct wim_dentry_on_disk {
le64 length;
/*utf16lechar short_name[];*/
} _packed_attribute;
+#define WIM_DENTRY_DISK_SIZE 102
+
/* Calculates the unaligned length, in bytes, of an on-disk WIM dentry that has
* a file name and short name that take the specified numbers of bytes. This
* excludes any alternate data stream entries that may follow the dentry. */
dentry->short_name_nbytes);
}
-/* Return %true iff the alternate data stream entry @entry has the UTF-16LE
- * stream name @name that has length @name_nbytes bytes. */
-static inline bool
-ads_entry_has_name(const struct wim_ads_entry *entry,
- const utf16lechar *name, size_t name_nbytes)
-{
- return entry->stream_name_nbytes == name_nbytes &&
- memcmp(entry->stream_name, name, name_nbytes) == 0;
-}
-
/* Duplicates a string of system-dependent encoding into a UTF-16LE string and
* returns the string and its length, in bytes, in the pointer arguments. Frees
* any existing string at the return location before overwriting it. */
/* Calculate the full path of @dentry. The full path of its parent must have
* already been calculated, or it must be the root dentry. */
-static int
+int
calculate_dentry_full_path(struct wim_dentry *dentry)
{
tchar *full_path;
}
}
+/* Case-sensitive UTF-16LE dentry or stream name comparison. Used on both UNIX
+ * (always) and Windows (sometimes) */
static int
-compare_utf16le_names(const utf16lechar *name1, size_t nbytes1,
- const utf16lechar *name2, size_t nbytes2)
+compare_utf16le_names_case_sensitive(const utf16lechar *name1, size_t nbytes1,
+ const utf16lechar *name2, size_t nbytes2)
{
+ /* Return the result if the strings differ up to their minimum length.
+ * Note that we cannot use strcmp() or strncmp() here, as the strings
+ * are in UTF-16LE format. */
int result = memcmp(name1, name2, min(nbytes1, nbytes2));
if (result)
return result;
+
+ /* The strings are the same up to their minimum length, so return a
+ * result based on their lengths. */
+ if (nbytes1 < nbytes2)
+ return -1;
+ else if (nbytes1 > nbytes2)
+ return 1;
else
- return (int)nbytes1 - (int)nbytes2;
+ return 0;
}
+#ifdef __WIN32__
+/* Windoze: Case-insensitive UTF-16LE dentry or stream name comparison */
static int
-dentry_compare_names(const struct wim_dentry *d1, const struct wim_dentry *d2)
+compare_utf16le_names_case_insensitive(const utf16lechar *name1, size_t nbytes1,
+ const utf16lechar *name2, size_t nbytes2)
{
- return compare_utf16le_names(d1->file_name, d1->file_name_nbytes,
- d2->file_name, d2->file_name_nbytes);
+ /* Return the result if the strings differ up to their minimum length.
+ * */
+ int result = _wcsnicmp((const wchar_t*)name1, (const wchar_t*)name2,
+ min(nbytes1 / 2, nbytes2 / 2));
+ if (result)
+ return result;
+
+ /* The strings are the same up to their minimum length, so return a
+ * result based on their lengths. */
+ if (nbytes1 < nbytes2)
+ return -1;
+ else if (nbytes1 > nbytes2)
+ return 1;
+ else
+ return 0;
}
+#endif /* __WIN32__ */
+
+#ifdef __WIN32__
+# define compare_utf16le_names compare_utf16le_names_case_insensitive
+#else
+# define compare_utf16le_names compare_utf16le_names_case_sensitive
+#endif
+#ifdef __WIN32__
+static int
+dentry_compare_names_case_insensitive(const struct wim_dentry *d1,
+ const struct wim_dentry *d2)
+{
+ return compare_utf16le_names_case_insensitive(d1->file_name,
+ d1->file_name_nbytes,
+ d2->file_name,
+ d2->file_name_nbytes);
+}
+#endif /* __WIN32__ */
+
+static int
+dentry_compare_names_case_sensitive(const struct wim_dentry *d1,
+ const struct wim_dentry *d2)
+{
+ return compare_utf16le_names_case_sensitive(d1->file_name,
+ d1->file_name_nbytes,
+ d2->file_name,
+ d2->file_name_nbytes);
+}
+
+#ifdef __WIN32__
+# define dentry_compare_names dentry_compare_names_case_insensitive
+#else
+# define dentry_compare_names dentry_compare_names_case_sensitive
+#endif
+
+/* Return %true iff the alternate data stream entry @entry has the UTF-16LE
+ * stream name @name that has length @name_nbytes bytes. */
+static inline bool
+ads_entry_has_name(const struct wim_ads_entry *entry,
+ const utf16lechar *name, size_t name_nbytes)
+{
+ return !compare_utf16le_names(name, name_nbytes,
+ entry->stream_name,
+ entry->stream_name_nbytes);
+}
+
struct wim_dentry *
get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry,
const utf16lechar *name,
node = node->rb_left;
else if (result > 0)
node = node->rb_right;
- else
+ else {
+ #ifdef __WIN32__
+ if (!list_empty(&child->case_insensitive_conflict_list))
+ {
+ WARNING("Result of case-insensitive lookup is ambiguous "
+ "(returning \"%ls\" instead of \"%ls\")",
+ child->file_name,
+ container_of(child->case_insensitive_conflict_list.next,
+ struct wim_dentry,
+ case_insensitive_conflict_list)->file_name);
+ }
+ #endif
return child;
+ }
}
return NULL;
}
* @parent: The dentry that will be the parent of @child.
* @child: The dentry to link.
*
- * Returns non-NULL if a duplicate dentry was detected.
+ * Returns NULL if successful. If @parent already contains a dentry with the
+ * same case-sensitive name as @child, the pointer to this duplicate dentry is
+ * returned.
*/
struct wim_dentry *
dentry_add_child(struct wim_dentry * restrict parent,
struct wim_dentry * restrict child)
{
+ struct rb_root *root;
+ struct rb_node **new;
+ struct rb_node *rb_parent;
+
wimlib_assert(dentry_is_directory(parent));
wimlib_assert(parent != child);
- struct rb_root *root = &parent->d_inode->i_children;
- struct rb_node **new = &(root->rb_node);
- struct rb_node *rb_parent = NULL;
-
+ /* Case sensitive child dentry index */
+ root = &parent->d_inode->i_children;
+ new = &root->rb_node;
+ rb_parent = NULL;
while (*new) {
struct wim_dentry *this = rbnode_dentry(*new);
- int result = dentry_compare_names(child, this);
+ int result = dentry_compare_names_case_sensitive(child, this);
rb_parent = *new;
child->parent = parent;
rb_link_node(&child->rb_node, rb_parent, new);
rb_insert_color(&child->rb_node, root);
+
+#ifdef __WIN32__
+ /* Case insensitive child dentry index */
+ root = &parent->d_inode->i_children_case_insensitive;
+ new = &root->rb_node;
+ rb_parent = NULL;
+ while (*new) {
+ struct wim_dentry *this = container_of(*new, struct wim_dentry,
+ rb_node_case_insensitive);
+ int result = dentry_compare_names_case_insensitive(child, this);
+
+ rb_parent = *new;
+
+ if (result < 0)
+ new = &((*new)->rb_left);
+ else if (result > 0)
+ new = &((*new)->rb_right);
+ else {
+ list_add(&child->case_insensitive_conflict_list,
+ &this->case_insensitive_conflict_list);
+ return NULL;
+
+ }
+ }
+ rb_link_node(&child->rb_node_case_insensitive, rb_parent, new);
+ rb_insert_color(&child->rb_node_case_insensitive, root);
+ INIT_LIST_HEAD(&child->case_insensitive_conflict_list);
+#endif
return NULL;
}
void
unlink_dentry(struct wim_dentry *dentry)
{
- if (!dentry_is_root(dentry))
+ if (!dentry_is_root(dentry)) {
rb_erase(&dentry->rb_node, &dentry->parent->d_inode->i_children);
+ #ifdef __WIN32__
+ rb_erase(&dentry->rb_node_case_insensitive,
+ &dentry->parent->d_inode->i_children_case_insensitive);
+ list_del(&dentry->case_insensitive_conflict_list);
+ #endif
+ }
}
/*
}
#endif /* !__WIN32__ */
-/* Replace weird characters in filenames and alternate data stream names.
- *
- * In particular we do not want the path separator to appear in any names, as
- * that would make it possible for a "malicious" WIM to extract itself to any
- * location it wanted to. */
-static void
-replace_forbidden_characters(utf16lechar *name)
-{
- utf16lechar *p;
-
- for (p = name; *p; p++) {
- #ifdef __WIN32__
- if (wcschr(L"<>:\"/\\|?*", (wchar_t)*p))
- #else
- if (*p == cpu_to_le16('/'))
- #endif
- {
- if (name) {
- WARNING("File, directory, or stream name \"%"WS"\"\n"
- " contains forbidden characters; "
- "substituting replacement characters.",
- name);
- name = NULL;
- }
- #ifdef __WIN32__
- *p = cpu_to_le16(0xfffd);
- #else
- *p = cpu_to_le16('?');
- #endif
- }
- }
-}
-
/*
* Reads the alternate data stream entries of a WIM dentry.
*
struct wim_ads_entry *ads_entries;
int ret;
+ BUILD_BUG_ON(sizeof(struct wim_ads_entry_on_disk) != WIM_ADS_ENTRY_DISK_SIZE);
+
/* Allocate an array for our in-memory representation of the alternate
* data stream entries. */
num_ads = inode->i_num_ads;
disk_entry->stream_name,
cur_entry->stream_name_nbytes);
cur_entry->stream_name[cur_entry->stream_name_nbytes / 2] = cpu_to_le16(0);
- replace_forbidden_characters(cur_entry->stream_name);
}
/* It's expected that the size of every ADS entry is a multiple
const struct wim_dentry_on_disk *disk_dentry =
(const struct wim_dentry_on_disk*)p;
+ BUILD_BUG_ON(sizeof(struct wim_dentry_on_disk) != WIM_DENTRY_DISK_SIZE);
+
if ((uintptr_t)p & 7)
WARNING("WIM dentry is not 8-byte aligned");
memcpy(file_name, p, file_name_nbytes);
p += file_name_nbytes + 2;
file_name[file_name_nbytes / 2] = cpu_to_le16(0);
- replace_forbidden_characters(file_name);
} else {
file_name = NULL;
}
memcpy(short_name, p, short_name_nbytes);
p += short_name_nbytes + 2;
short_name[short_name_nbytes / 2] = cpu_to_le16(0);
- replace_forbidden_characters(short_name);
} else {
short_name = NULL;
}
return ret;
}
+static const tchar *
+dentry_get_file_type_string(const struct wim_dentry *dentry)
+{
+ const struct wim_inode *inode = dentry->d_inode;
+ if (inode_is_directory(inode))
+ return T("directory");
+ else if (inode_is_symlink(inode))
+ return T("symbolic link");
+ else
+ return T("file");
+}
+
/* Reads the children of a dentry, and all their children, ..., etc. from the
* metadata resource and into the dentry tree.
*
{
u64 cur_offset = dentry->subdir_offset;
struct wim_dentry *child;
+ struct wim_dentry *duplicate;
struct wim_dentry cur_child;
int ret;
* entries. */
cur_offset += dentry_total_length(child);
- if (dentry_add_child(dentry, child)) {
- WARNING("Ignoring duplicate dentry \"%"WS"\"",
- child->file_name);
- WARNING("(In directory \"%"TS"\")", dentry_full_path(dentry));
- free_dentry(child);
+ duplicate = dentry_add_child(dentry, child);
+ if (duplicate) {
+ const tchar *child_type, *duplicate_type;
+ child_type = dentry_get_file_type_string(child);
+ duplicate_type = dentry_get_file_type_string(duplicate);
+ WARNING("Ignoring duplicate %"TS" \"%"TS"\" "
+ "(the WIM image already contains a %"TS" "
+ "at that path with the exact same name)",
+ child_type, dentry_full_path(duplicate),
+ duplicate_type);
} else {
inode_add_dentry(child, child->d_inode);
/* If there are children of this child, call this