X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fdentry.c;h=c62a694f77ae3284e6309ee11d0007df79cb0c20;hp=5082eac64d48af415a88daef375efed499f31c3b;hb=ac0f66feae348981def9e4fcf0af84868ac0a731;hpb=55491147fce2bc03ffb602a3985e7fd4e32169a3 diff --git a/src/dentry.c b/src/dentry.c index 5082eac6..c62a694f 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -70,6 +70,8 @@ struct wim_ads_entry_on_disk { utf16lechar stream_name[]; } _packed_attribute; +#define WIM_ADS_ENTRY_DISK_SIZE 38 + /* WIM directory entry (on-disk format) */ struct wim_dentry_on_disk { le64 length; @@ -105,6 +107,8 @@ struct wim_dentry_on_disk { /*utf16lechar short_name[];*/ } _packed_attribute; +#define WIM_DENTRY_DISK_SIZE 102 + /* Calculates the unaligned length, in bytes, of an on-disk WIM dentry that has * a file name and short name that take the specified numbers of bytes. This * excludes any alternate data stream entries that may follow the dentry. */ @@ -130,16 +134,6 @@ dentry_correct_length_unaligned(const struct wim_dentry *dentry) dentry->short_name_nbytes); } -/* Return %true iff the alternate data stream entry @entry has the UTF-16LE - * stream name @name that has length @name_nbytes bytes. */ -static inline bool -ads_entry_has_name(const struct wim_ads_entry *entry, - const utf16lechar *name, size_t name_nbytes) -{ - return entry->stream_name_nbytes == name_nbytes && - memcmp(entry->stream_name, name, name_nbytes) == 0; -} - /* Duplicates a string of system-dependent encoding into a UTF-16LE string and * returns the string and its length, in bytes, in the pointer arguments. Frees * any existing string at the return location before overwriting it. */ @@ -344,7 +338,7 @@ for_dentry_in_tree_depth(struct wim_dentry *root, /* Calculate the full path of @dentry. The full path of its parent must have * already been calculated, or it must be the root dentry. */ -static int +int calculate_dentry_full_path(struct wim_dentry *dentry) { tchar *full_path; @@ -355,7 +349,8 @@ calculate_dentry_full_path(struct wim_dentry *dentry) return 0; if (dentry_is_root(dentry)) { - full_path = TSTRDUP(T("/")); + static const tchar _root_path[] = {WIM_PATH_SEPARATOR, T('\0')}; + full_path = TSTRDUP(_root_path); if (!full_path) return WIMLIB_ERR_NOMEM; full_path_nbytes = 1 * sizeof(tchar); @@ -399,7 +394,7 @@ calculate_dentry_full_path(struct wim_dentry *dentry) if (!full_path) return WIMLIB_ERR_NOMEM; memcpy(full_path, parent_full_path, parent_full_path_nbytes); - full_path[parent_full_path_nbytes / sizeof(tchar)] = T('/'); + full_path[parent_full_path_nbytes / sizeof(tchar)] = WIM_PATH_SEPARATOR; #if TCHAR_IS_UTF16LE memcpy(&full_path[parent_full_path_nbytes / sizeof(tchar) + 1], dentry->file_name, @@ -485,34 +480,122 @@ calculate_subdir_offsets(struct wim_dentry *dentry, u64 *subdir_offset_p) } } +/* Case-sensitive UTF-16LE dentry or stream name comparison. Used on both UNIX + * (always) and Windows (sometimes) */ static int -compare_utf16le_names(const utf16lechar *name1, size_t nbytes1, - const utf16lechar *name2, size_t nbytes2) +compare_utf16le_names_case_sensitive(const utf16lechar *name1, size_t nbytes1, + const utf16lechar *name2, size_t nbytes2) { + /* Return the result if the strings differ up to their minimum length. + * Note that we cannot use strcmp() or strncmp() here, as the strings + * are in UTF-16LE format. */ int result = memcmp(name1, name2, min(nbytes1, nbytes2)); if (result) return result; + + /* The strings are the same up to their minimum length, so return a + * result based on their lengths. */ + if (nbytes1 < nbytes2) + return -1; + else if (nbytes1 > nbytes2) + return 1; else - return (int)nbytes1 - (int)nbytes2; + return 0; } +#ifdef __WIN32__ +/* Windoze: Case-insensitive UTF-16LE dentry or stream name comparison */ static int -dentry_compare_names(const struct wim_dentry *d1, const struct wim_dentry *d2) +compare_utf16le_names_case_insensitive(const utf16lechar *name1, size_t nbytes1, + const utf16lechar *name2, size_t nbytes2) { - return compare_utf16le_names(d1->file_name, d1->file_name_nbytes, - d2->file_name, d2->file_name_nbytes); + /* Return the result if the strings differ up to their minimum length. + * */ + int result = _wcsnicmp((const wchar_t*)name1, (const wchar_t*)name2, + min(nbytes1 / 2, nbytes2 / 2)); + if (result) + return result; + + /* The strings are the same up to their minimum length, so return a + * result based on their lengths. */ + if (nbytes1 < nbytes2) + return -1; + else if (nbytes1 > nbytes2) + return 1; + else + return 0; } +#endif /* __WIN32__ */ + +#ifdef __WIN32__ +# define compare_utf16le_names compare_utf16le_names_case_insensitive +#else +# define compare_utf16le_names compare_utf16le_names_case_sensitive +#endif + +#ifdef __WIN32__ +static int +dentry_compare_names_case_insensitive(const struct wim_dentry *d1, + const struct wim_dentry *d2) +{ + return compare_utf16le_names_case_insensitive(d1->file_name, + d1->file_name_nbytes, + d2->file_name, + d2->file_name_nbytes); +} +#endif /* __WIN32__ */ + +static int +dentry_compare_names_case_sensitive(const struct wim_dentry *d1, + const struct wim_dentry *d2) +{ + return compare_utf16le_names_case_sensitive(d1->file_name, + d1->file_name_nbytes, + d2->file_name, + d2->file_name_nbytes); +} +#ifdef __WIN32__ +# define dentry_compare_names dentry_compare_names_case_insensitive +#else +# define dentry_compare_names dentry_compare_names_case_sensitive +#endif + +/* Return %true iff the alternate data stream entry @entry has the UTF-16LE + * stream name @name that has length @name_nbytes bytes. */ +static inline bool +ads_entry_has_name(const struct wim_ads_entry *entry, + const utf16lechar *name, size_t name_nbytes) +{ + return !compare_utf16le_names(name, name_nbytes, + entry->stream_name, + entry->stream_name_nbytes); +} + +/* Given a UTF-16LE filename and a directory, look up the dentry for the file. + * Return it if found, otherwise NULL. This is case-sensitive on UNIX and + * case-insensitive on Windows. */ struct wim_dentry * get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry, const utf16lechar *name, size_t name_nbytes) { - struct rb_node *node = dentry->d_inode->i_children.rb_node; + struct rb_node *node; + +#ifdef __WIN32__ + node = dentry->d_inode->i_children_case_insensitive.rb_node; +#else + node = dentry->d_inode->i_children.rb_node; +#endif + struct wim_dentry *child; while (node) { + #ifdef __WIN32__ + child = rb_entry(node, struct wim_dentry, rb_node_case_insensitive); + #else child = rbnode_dentry(node); + #endif int result = compare_utf16le_names(name, name_nbytes, child->file_name, child->file_name_nbytes); @@ -520,8 +603,20 @@ get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry, node = node->rb_left; else if (result > 0) node = node->rb_right; - else + else { + #ifdef __WIN32__ + if (!list_empty(&child->case_insensitive_conflict_list)) + { + WARNING("Result of case-insensitive lookup is ambiguous " + "(returning \"%ls\" instead of \"%ls\")", + child->file_name, + container_of(child->case_insensitive_conflict_list.next, + struct wim_dentry, + case_insensitive_conflict_list)->file_name); + } + #endif return child; + } } return NULL; } @@ -567,12 +662,13 @@ get_dentry_utf16le(WIMStruct *wim, const utf16lechar *path) } p = path; while (1) { - while (*p == cpu_to_le16('/')) + while (*p == cpu_to_le16(WIM_PATH_SEPARATOR)) p++; if (*p == cpu_to_le16('\0')) break; pp = p; - while (*pp != cpu_to_le16('/') && *pp != cpu_to_le16('\0')) + while (*pp != cpu_to_le16(WIM_PATH_SEPARATOR) && + *pp != cpu_to_le16('\0')) pp++; cur_dentry = get_dentry_child_with_utf16le_name(parent_dentry, p, @@ -631,11 +727,11 @@ static void to_parent_name(tchar *buf, size_t len) { ssize_t i = (ssize_t)len - 1; - while (i >= 0 && buf[i] == T('/')) + while (i >= 0 && buf[i] == WIM_PATH_SEPARATOR) i--; - while (i >= 0 && buf[i] != T('/')) + while (i >= 0 && buf[i] != WIM_PATH_SEPARATOR) i--; - while (i >= 0 && buf[i] == T('/')) + while (i >= 0 && buf[i] == WIM_PATH_SEPARATOR) i--; buf[i + 1] = T('\0'); } @@ -936,7 +1032,8 @@ free_inode(struct wim_inode *inode) #endif /* HACK: This may instead delete the inode from i_list, but the * hlist_del() behaves the same as list_del(). */ - hlist_del(&inode->i_hlist); + if (!hlist_unhashed(&inode->i_hlist)) + hlist_del(&inode->i_hlist); FREE(inode->i_extracted_file); FREE(inode); } @@ -1012,27 +1109,73 @@ free_dentry_tree(struct wim_dentry *root, struct wim_lookup_table *lookup_table) for_dentry_in_tree_depth(root, do_free_dentry, lookup_table); } +#ifdef __WIN32__ + +/* Insert a dentry into the case insensitive index for a directory. + * + * This is a red-black tree, but when multiple dentries share the same + * case-insensitive name, only one is inserted into the tree itself; the rest + * are connected in a list. + */ +static struct wim_dentry * +dentry_add_child_case_insensitive(struct wim_dentry *parent, + struct wim_dentry *child) +{ + struct rb_root *root; + struct rb_node **new; + struct rb_node *rb_parent; + + root = &parent->d_inode->i_children_case_insensitive; + new = &root->rb_node; + rb_parent = NULL; + while (*new) { + struct wim_dentry *this = container_of(*new, struct wim_dentry, + rb_node_case_insensitive); + int result = dentry_compare_names_case_insensitive(child, this); + + rb_parent = *new; + + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else + return this; + } + rb_link_node(&child->rb_node_case_insensitive, rb_parent, new); + rb_insert_color(&child->rb_node_case_insensitive, root); + return NULL; +} +#endif + /* * Links a dentry into the directory tree. * * @parent: The dentry that will be the parent of @child. * @child: The dentry to link. * - * Returns non-NULL if a duplicate dentry was detected. + * Returns NULL if successful. If @parent already contains a dentry with the + * same case-sensitive name as @child, the pointer to this duplicate dentry is + * returned. */ struct wim_dentry * dentry_add_child(struct wim_dentry * restrict parent, struct wim_dentry * restrict child) { - wimlib_assert(dentry_is_directory(parent)); + struct rb_root *root; + struct rb_node **new; + struct rb_node *rb_parent; - struct rb_root *root = &parent->d_inode->i_children; - struct rb_node **new = &(root->rb_node); - struct rb_node *rb_parent = NULL; + wimlib_assert(dentry_is_directory(parent)); + wimlib_assert(parent != child); + /* Case sensitive child dentry index */ + root = &parent->d_inode->i_children; + new = &root->rb_node; + rb_parent = NULL; while (*new) { struct wim_dentry *this = rbnode_dentry(*new); - int result = dentry_compare_names(child, this); + int result = dentry_compare_names_case_sensitive(child, this); rb_parent = *new; @@ -1046,6 +1189,20 @@ dentry_add_child(struct wim_dentry * restrict parent, child->parent = parent; rb_link_node(&child->rb_node, rb_parent, new); rb_insert_color(&child->rb_node, root); + +#ifdef __WIN32__ + { + struct wim_dentry *existing; + existing = dentry_add_child_case_insensitive(parent, child); + if (existing) { + list_add(&child->case_insensitive_conflict_list, + &existing->case_insensitive_conflict_list); + child->rb_node_case_insensitive.__rb_parent_color = 0; + } else { + INIT_LIST_HEAD(&child->case_insensitive_conflict_list); + } + } +#endif return NULL; } @@ -1053,8 +1210,31 @@ dentry_add_child(struct wim_dentry * restrict parent, void unlink_dentry(struct wim_dentry *dentry) { - if (!dentry_is_root(dentry)) - rb_erase(&dentry->rb_node, &dentry->parent->d_inode->i_children); + struct wim_dentry *parent = dentry->parent; + + if (parent == dentry) + return; + rb_erase(&dentry->rb_node, &parent->d_inode->i_children); +#ifdef __WIN32__ + if (dentry->rb_node_case_insensitive.__rb_parent_color) { + /* This dentry was in the case-insensitive red-black tree. */ + rb_erase(&dentry->rb_node_case_insensitive, + &parent->d_inode->i_children_case_insensitive); + if (!list_empty(&dentry->case_insensitive_conflict_list)) { + /* Make a different case-insensitively-the-same dentry + * be the "representative" in the red-black tree. */ + struct list_head *next; + struct wim_dentry *other; + struct wim_dentry *existing; + + next = dentry->case_insensitive_conflict_list.next; + other = list_entry(next, struct wim_dentry, case_insensitive_conflict_list); + existing = dentry_add_child_case_insensitive(parent, other); + wimlib_assert(existing == NULL); + } + } + list_del(&dentry->case_insensitive_conflict_list); +#endif } /* @@ -1179,12 +1359,11 @@ add_stream_from_data_buffer(const void *buffer, size_t size, lte = new_lookup_table_entry(); if (!lte) return NULL; - buffer_copy = MALLOC(size); + buffer_copy = memdup(buffer, size); if (!buffer_copy) { free_lookup_table_entry(lte); return NULL; } - memcpy(buffer_copy, buffer, size); lte->resource_location = RESOURCE_IN_ATTACHED_BUFFER; lte->attached_buffer = buffer_copy; lte->resource_entry.original_size = size; @@ -1329,39 +1508,6 @@ inode_set_unix_data(struct wim_inode *inode, uid_t uid, gid_t gid, mode_t mode, } #endif /* !__WIN32__ */ -/* Replace weird characters in filenames and alternate data stream names. - * - * In particular we do not want the path separator to appear in any names, as - * that would make it possible for a "malicious" WIM to extract itself to any - * location it wanted to. */ -static void -replace_forbidden_characters(utf16lechar *name) -{ - utf16lechar *p; - - for (p = name; *p; p++) { - #ifdef __WIN32__ - if (wcschr(L"<>:\"/\\|?*", (wchar_t)*p)) - #else - if (*p == cpu_to_le16('/')) - #endif - { - if (name) { - WARNING("File, directory, or stream name \"%"WS"\"\n" - " contains forbidden characters; " - "substituting replacement characters.", - name); - name = NULL; - } - #ifdef __WIN32__ - *p = cpu_to_le16(0xfffd); - #else - *p = cpu_to_le16('?'); - #endif - } - } -} - /* * Reads the alternate data stream entries of a WIM dentry. * @@ -1387,6 +1533,8 @@ read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode, struct wim_ads_entry *ads_entries; int ret; + BUILD_BUG_ON(sizeof(struct wim_ads_entry_on_disk) != WIM_ADS_ENTRY_DISK_SIZE); + /* Allocate an array for our in-memory representation of the alternate * data stream entries. */ num_ads = inode->i_num_ads; @@ -1453,7 +1601,6 @@ read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode, disk_entry->stream_name, cur_entry->stream_name_nbytes); cur_entry->stream_name[cur_entry->stream_name_nbytes / 2] = cpu_to_le16(0); - replace_forbidden_characters(cur_entry->stream_name); } /* It's expected that the size of every ADS entry is a multiple @@ -1531,6 +1678,8 @@ read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len, const struct wim_dentry_on_disk *disk_dentry = (const struct wim_dentry_on_disk*)p; + BUILD_BUG_ON(sizeof(struct wim_dentry_on_disk) != WIM_DENTRY_DISK_SIZE); + if ((uintptr_t)p & 7) WARNING("WIM dentry is not 8-byte aligned"); @@ -1657,7 +1806,6 @@ read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len, memcpy(file_name, p, file_name_nbytes); p += file_name_nbytes + 2; file_name[file_name_nbytes / 2] = cpu_to_le16(0); - replace_forbidden_characters(file_name); } else { file_name = NULL; } @@ -1676,7 +1824,6 @@ read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len, memcpy(short_name, p, short_name_nbytes); p += short_name_nbytes + 2; short_name[short_name_nbytes / 2] = cpu_to_le16(0); - replace_forbidden_characters(short_name); } else { short_name = NULL; } @@ -1724,6 +1871,18 @@ out: return ret; } +static const tchar * +dentry_get_file_type_string(const struct wim_dentry *dentry) +{ + const struct wim_inode *inode = dentry->d_inode; + if (inode_is_directory(inode)) + return T("directory"); + else if (inode_is_symlink(inode)) + return T("symbolic link"); + else + return T("file"); +} + /* Reads the children of a dentry, and all their children, ..., etc. from the * metadata resource and into the dentry tree. * @@ -1746,6 +1905,7 @@ read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, { u64 cur_offset = dentry->subdir_offset; struct wim_dentry *child; + struct wim_dentry *duplicate; struct wim_dentry cur_child; int ret; @@ -1773,32 +1933,48 @@ read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, /* Not end of directory. Allocate this child permanently and * link it to the parent and previous child. */ - child = MALLOC(sizeof(struct wim_dentry)); + child = memdup(&cur_child, sizeof(struct wim_dentry)); if (!child) { - ERROR("Failed to allocate %zu bytes for new dentry", - sizeof(struct wim_dentry)); + ERROR("Failed to allocate new dentry!"); ret = WIMLIB_ERR_NOMEM; break; } - memcpy(child, &cur_child, sizeof(struct wim_dentry)); - dentry_add_child(dentry, child); - inode_add_dentry(child, child->d_inode); - - /* If there are children of this child, call this procedure - * recursively. */ - if (child->subdir_offset != 0) { - ret = read_dentry_tree(metadata_resource, - metadata_resource_len, child); - if (ret) - break; - } /* Advance to the offset of the next child. Note: We need to * advance by the TOTAL length of the dentry, not by the length - * child->length, which although it does take into account the - * padding, it DOES NOT take into account alternate stream + * cur_child.length, which although it does take into account + * the padding, it DOES NOT take into account alternate stream * entries. */ cur_offset += dentry_total_length(child); + + duplicate = dentry_add_child(dentry, child); + if (duplicate) { + const tchar *child_type, *duplicate_type; + child_type = dentry_get_file_type_string(child); + duplicate_type = dentry_get_file_type_string(duplicate); + WARNING("Ignoring duplicate %"TS" \"%"TS"\" " + "(the WIM image already contains a %"TS" " + "at that path with the exact same name)", + child_type, dentry_full_path(duplicate), + duplicate_type); + } else { + inode_add_dentry(child, child->d_inode); + /* If there are children of this child, call this + * procedure recursively. */ + if (child->subdir_offset != 0) { + if (dentry_is_directory(child)) { + ret = read_dentry_tree(metadata_resource, + metadata_resource_len, + child); + if (ret) + break; + } else { + WARNING("Ignoring children of non-directory \"%"TS"\"", + dentry_full_path(child)); + } + } + + } } return ret; }