X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fdentry.c;h=d7f189900ef545107156bd06710d24ccbe95409d;hp=9411f85dba11f4e9964a10f1e932147e4e85dcbc;hb=a0a35f892af5a7536b68d3d8034bcd5f298d7de1;hpb=b8df27e5fdd4dae472ababcec11d04eafea8830b diff --git a/src/dentry.c b/src/dentry.c index 9411f85d..d7f18990 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -70,6 +70,8 @@ struct wim_ads_entry_on_disk { utf16lechar stream_name[]; } _packed_attribute; +#define WIM_ADS_ENTRY_DISK_SIZE 38 + /* WIM directory entry (on-disk format) */ struct wim_dentry_on_disk { le64 length; @@ -105,6 +107,8 @@ struct wim_dentry_on_disk { /*utf16lechar short_name[];*/ } _packed_attribute; +#define WIM_DENTRY_DISK_SIZE 102 + /* Calculates the unaligned length, in bytes, of an on-disk WIM dentry that has * a file name and short name that take the specified numbers of bytes. This * excludes any alternate data stream entries that may follow the dentry. */ @@ -130,16 +134,6 @@ dentry_correct_length_unaligned(const struct wim_dentry *dentry) dentry->short_name_nbytes); } -/* Return %true iff the alternate data stream entry @entry has the UTF-16LE - * stream name @name that has length @name_nbytes bytes. */ -static inline bool -ads_entry_has_name(const struct wim_ads_entry *entry, - const utf16lechar *name, size_t name_nbytes) -{ - return entry->stream_name_nbytes == name_nbytes && - memcmp(entry->stream_name, name, name_nbytes) == 0; -} - /* Duplicates a string of system-dependent encoding into a UTF-16LE string and * returns the string and its length, in bytes, in the pointer arguments. Frees * any existing string at the return location before overwriting it. */ @@ -344,7 +338,7 @@ for_dentry_in_tree_depth(struct wim_dentry *root, /* Calculate the full path of @dentry. The full path of its parent must have * already been calculated, or it must be the root dentry. */ -static int +int calculate_dentry_full_path(struct wim_dentry *dentry) { tchar *full_path; @@ -485,24 +479,98 @@ calculate_subdir_offsets(struct wim_dentry *dentry, u64 *subdir_offset_p) } } +/* Case-sensitive UTF-16LE dentry or stream name comparison. Used on both UNIX + * (always) and Windows (sometimes) */ static int -compare_utf16le_names(const utf16lechar *name1, size_t nbytes1, - const utf16lechar *name2, size_t nbytes2) +compare_utf16le_names_case_sensitive(const utf16lechar *name1, size_t nbytes1, + const utf16lechar *name2, size_t nbytes2) { + /* Return the result if the strings differ up to their minimum length. + * Note that we cannot use strcmp() or strncmp() here, as the strings + * are in UTF-16LE format. */ int result = memcmp(name1, name2, min(nbytes1, nbytes2)); if (result) return result; + + /* The strings are the same up to their minimum length, so return a + * result based on their lengths. */ + if (nbytes1 < nbytes2) + return -1; + else if (nbytes1 > nbytes2) + return 1; + else + return 0; +} + +#ifdef __WIN32__ +/* Windoze: Case-insensitive UTF-16LE dentry or stream name comparison */ +static int +compare_utf16le_names_case_insensitive(const utf16lechar *name1, size_t nbytes1, + const utf16lechar *name2, size_t nbytes2) +{ + /* Return the result if the strings differ up to their minimum length. + * */ + int result = _wcsnicmp((const wchar_t*)name1, (const wchar_t*)name2, + min(nbytes1 / 2, nbytes2 / 2)); + if (result) + return result; + + /* The strings are the same up to their minimum length, so return a + * result based on their lengths. */ + if (nbytes1 < nbytes2) + return -1; + else if (nbytes1 > nbytes2) + return 1; else - return (int)nbytes1 - (int)nbytes2; + return 0; +} +#endif /* __WIN32__ */ + +#ifdef __WIN32__ +# define compare_utf16le_names compare_utf16le_names_case_insensitive +#else +# define compare_utf16le_names compare_utf16le_names_case_sensitive +#endif + + +#ifdef __WIN32__ +static int +dentry_compare_names_case_insensitive(const struct wim_dentry *d1, + const struct wim_dentry *d2) +{ + return compare_utf16le_names_case_insensitive(d1->file_name, + d1->file_name_nbytes, + d2->file_name, + d2->file_name_nbytes); } +#endif /* __WIN32__ */ static int -dentry_compare_names(const struct wim_dentry *d1, const struct wim_dentry *d2) +dentry_compare_names_case_sensitive(const struct wim_dentry *d1, + const struct wim_dentry *d2) { - return compare_utf16le_names(d1->file_name, d1->file_name_nbytes, - d2->file_name, d2->file_name_nbytes); + return compare_utf16le_names_case_sensitive(d1->file_name, + d1->file_name_nbytes, + d2->file_name, + d2->file_name_nbytes); } +#ifdef __WIN32__ +# define dentry_compare_names dentry_compare_names_case_insensitive +#else +# define dentry_compare_names dentry_compare_names_case_sensitive +#endif + +/* Return %true iff the alternate data stream entry @entry has the UTF-16LE + * stream name @name that has length @name_nbytes bytes. */ +static inline bool +ads_entry_has_name(const struct wim_ads_entry *entry, + const utf16lechar *name, size_t name_nbytes) +{ + return !compare_utf16le_names(name, name_nbytes, + entry->stream_name, + entry->stream_name_nbytes); +} struct wim_dentry * get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry, @@ -520,8 +588,20 @@ get_dentry_child_with_utf16le_name(const struct wim_dentry *dentry, node = node->rb_left; else if (result > 0) node = node->rb_right; - else + else { + #ifdef __WIN32__ + if (!list_empty(&child->case_insensitive_conflict_list)) + { + WARNING("Result of case-insensitive lookup is ambiguous " + "(returning \"%ls\" instead of \"%ls\")", + child->file_name, + container_of(child->case_insensitive_conflict_list.next, + struct wim_dentry, + case_insensitive_conflict_list)->file_name); + } + #endif return child; + } } return NULL; } @@ -1018,21 +1098,28 @@ free_dentry_tree(struct wim_dentry *root, struct wim_lookup_table *lookup_table) * @parent: The dentry that will be the parent of @child. * @child: The dentry to link. * - * Returns non-NULL if a duplicate dentry was detected. + * Returns NULL if successful. If @parent already contains a dentry with the + * same case-sensitive name as @child, the pointer to this duplicate dentry is + * returned. */ struct wim_dentry * dentry_add_child(struct wim_dentry * restrict parent, struct wim_dentry * restrict child) { - wimlib_assert(dentry_is_directory(parent)); + struct rb_root *root; + struct rb_node **new; + struct rb_node *rb_parent; - struct rb_root *root = &parent->d_inode->i_children; - struct rb_node **new = &(root->rb_node); - struct rb_node *rb_parent = NULL; + wimlib_assert(dentry_is_directory(parent)); + wimlib_assert(parent != child); + /* Case sensitive child dentry index */ + root = &parent->d_inode->i_children; + new = &root->rb_node; + rb_parent = NULL; while (*new) { struct wim_dentry *this = rbnode_dentry(*new); - int result = dentry_compare_names(child, this); + int result = dentry_compare_names_case_sensitive(child, this); rb_parent = *new; @@ -1046,6 +1133,34 @@ dentry_add_child(struct wim_dentry * restrict parent, child->parent = parent; rb_link_node(&child->rb_node, rb_parent, new); rb_insert_color(&child->rb_node, root); + +#ifdef __WIN32__ + /* Case insensitive child dentry index */ + root = &parent->d_inode->i_children_case_insensitive; + new = &root->rb_node; + rb_parent = NULL; + while (*new) { + struct wim_dentry *this = container_of(*new, struct wim_dentry, + rb_node_case_insensitive); + int result = dentry_compare_names_case_insensitive(child, this); + + rb_parent = *new; + + if (result < 0) + new = &((*new)->rb_left); + else if (result > 0) + new = &((*new)->rb_right); + else { + list_add(&child->case_insensitive_conflict_list, + &this->case_insensitive_conflict_list); + return NULL; + + } + } + rb_link_node(&child->rb_node_case_insensitive, rb_parent, new); + rb_insert_color(&child->rb_node_case_insensitive, root); + INIT_LIST_HEAD(&child->case_insensitive_conflict_list); +#endif return NULL; } @@ -1053,8 +1168,14 @@ dentry_add_child(struct wim_dentry * restrict parent, void unlink_dentry(struct wim_dentry *dentry) { - if (!dentry_is_root(dentry)) + if (!dentry_is_root(dentry)) { rb_erase(&dentry->rb_node, &dentry->parent->d_inode->i_children); + #ifdef __WIN32__ + rb_erase(&dentry->rb_node_case_insensitive, + &dentry->parent->d_inode->i_children_case_insensitive); + list_del(&dentry->case_insensitive_conflict_list); + #endif + } } /* @@ -1179,12 +1300,11 @@ add_stream_from_data_buffer(const void *buffer, size_t size, lte = new_lookup_table_entry(); if (!lte) return NULL; - buffer_copy = MALLOC(size); + buffer_copy = memdup(buffer, size); if (!buffer_copy) { free_lookup_table_entry(lte); return NULL; } - memcpy(buffer_copy, buffer, size); lte->resource_location = RESOURCE_IN_ATTACHED_BUFFER; lte->attached_buffer = buffer_copy; lte->resource_entry.original_size = size; @@ -1329,50 +1449,17 @@ inode_set_unix_data(struct wim_inode *inode, uid_t uid, gid_t gid, mode_t mode, } #endif /* !__WIN32__ */ -/* Replace weird characters in filenames and alternate data stream names. - * - * In particular we do not want the path separator to appear in any names, as - * that would make it possible for a "malicious" WIM to extract itself to any - * location it wanted to. */ -static void -replace_forbidden_characters(utf16lechar *name) -{ - utf16lechar *p; - - for (p = name; *p; p++) { - #ifdef __WIN32__ - if (wcschr(L"<>:\"/\\|?*", (wchar_t)*p)) - #else - if (*p == cpu_to_le16('/')) - #endif - { - if (name) { - WARNING("File, directory, or stream name \"%"WS"\"\n" - " contains forbidden characters; " - "substituting replacement characters.", - name); - name = NULL; - } - #ifdef __WIN32__ - *p = cpu_to_le16(0xfffd); - #else - *p = cpu_to_le16('?'); - #endif - } - } -} - /* * Reads the alternate data stream entries of a WIM dentry. * * @p: Pointer to buffer that starts with the first alternate stream entry. * * @inode: Inode to load the alternate data streams into. - * @inode->i_num_ads must have been set to the number of - * alternate data streams that are expected. + * @inode->i_num_ads must have been set to the number of + * alternate data streams that are expected. * * @remaining_size: Number of bytes of data remaining in the buffer pointed - * to by @p. + * to by @p. * * * Return 0 on success or nonzero on failure. On success, inode->i_ads_entries @@ -1387,11 +1474,16 @@ read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode, struct wim_ads_entry *ads_entries; int ret; + BUILD_BUG_ON(sizeof(struct wim_ads_entry_on_disk) != WIM_ADS_ENTRY_DISK_SIZE); + + /* Allocate an array for our in-memory representation of the alternate + * data stream entries. */ num_ads = inode->i_num_ads; ads_entries = CALLOC(num_ads, sizeof(inode->i_ads_entries[0])); if (!ads_entries) goto out_of_memory; + /* Read the entries into our newly allocated buffer. */ for (u16 i = 0; i < num_ads; i++) { u64 length; struct wim_ads_entry *cur_entry; @@ -1410,7 +1502,7 @@ read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode, length = le64_to_cpu(disk_entry->length); /* Make sure the length field is neither so small it doesn't - * include all the fixed-length data, or so large it overflows + * include all the fixed-length data nor so large it overflows * the metadata resource buffer. */ if (length < sizeof(struct wim_ads_entry_on_disk) || length > nbytes_remaining) @@ -1419,9 +1511,8 @@ read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode, /* Read the rest of the fixed-length data. */ cur_entry->reserved = le64_to_cpu(disk_entry->reserved); - copy_hash(cur_entry->hash, disk_entry->hash); - cur_entry->stream_name_nbytes = le16_to_cpu(cur_entry->stream_name_nbytes); + cur_entry->stream_name_nbytes = le16_to_cpu(disk_entry->stream_name_nbytes); /* If stream_name_nbytes != 0, this is a named stream. * Otherwise this is an unnamed stream, or in some cases (bugs @@ -1430,8 +1521,6 @@ read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode, * the fact that the real unnamed stream entry has a nonzero * hash field. */ if (cur_entry->stream_name_nbytes) { - u64 length_no_padding; - /* The name is encoded in UTF16-LE, which uses 2-byte * coding units, so the length of the name had better be * an even number of bytes... */ @@ -1452,8 +1541,7 @@ read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode, memcpy(cur_entry->stream_name, disk_entry->stream_name, cur_entry->stream_name_nbytes); - cur_entry->stream_name[cur_entry->stream_name_nbytes / 2] = 0; - replace_forbidden_characters(cur_entry->stream_name); + cur_entry->stream_name[cur_entry->stream_name_nbytes / 2] = cpu_to_le16(0); } /* It's expected that the size of every ADS entry is a multiple @@ -1472,9 +1560,7 @@ read_ads_entries(const u8 * restrict p, struct wim_inode * restrict inode, nbytes_remaining -= length; } inode->i_ads_entries = ads_entries; -#ifdef WITH_FUSE inode->i_next_stream_id = inode->i_num_ads + 1; -#endif ret = 0; goto out; out_of_memory: @@ -1512,6 +1598,10 @@ out: * buffers. On success, the dentry->length field must be examined. If zero, * this was a special "end of directory" dentry and not a real dentry. If * nonzero, this was a real dentry. + * + * Possible errors include: + * WIMLIB_ERR_NOMEM + * WIMLIB_ERR_INVALID_DENTRY */ int read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len, @@ -1525,44 +1615,43 @@ read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len, u16 file_name_nbytes; int ret; struct wim_inode *inode; - const struct wim_dentry_on_disk *disk_dentry; const u8 *p = &metadata_resource[offset]; + const struct wim_dentry_on_disk *disk_dentry = + (const struct wim_dentry_on_disk*)p; + + BUILD_BUG_ON(sizeof(struct wim_dentry_on_disk) != WIM_DENTRY_DISK_SIZE); if ((uintptr_t)p & 7) WARNING("WIM dentry is not 8-byte aligned"); dentry_common_init(dentry); - /*Make sure the dentry really fits into the metadata resource.*/ - if (offset + sizeof(u64) < offset || - offset + sizeof(u64) > metadata_resource_len) + /* Before reading the whole dentry, we need to read just the length. + * This is because a dentry of length 8 (that is, just the length field) + * terminates the list of sibling directory entries. */ + if (offset + sizeof(u64) > metadata_resource_len || + offset + sizeof(u64) < offset) { ERROR("Directory entry starting at %"PRIu64" ends past the " "end of the metadata resource (size %"PRIu64")", offset, metadata_resource_len); return WIMLIB_ERR_INVALID_DENTRY; } - - disk_dentry = (const struct wim_dentry_on_disk*)p; - - /* Before reading the whole dentry, we need to read just the length. - * This is because a dentry of length 8 (that is, just the length field) - * terminates the list of sibling directory entries. */ dentry->length = le64_to_cpu(disk_dentry->length); /* A zero length field (really a length of 8, since that's how big the * directory entry is...) indicates that this is the end of directory * dentry. We do not read it into memory as an actual dentry, so just - * return successfully in that case. */ + * return successfully in this case. */ if (dentry->length == 8) dentry->length = 0; if (dentry->length == 0) return 0; /* Now that we have the actual length provided in the on-disk structure, - * make sure it doesn't overflow the metadata buffer. */ - if (offset + dentry->length >= metadata_resource_len - || offset + dentry->length < offset) + * again make sure it doesn't overflow the metadata resource buffer. */ + if (offset + dentry->length > metadata_resource_len || + offset + dentry->length < offset) { ERROR("Directory entry at offset %"PRIu64" and with size " "%"PRIu64" ends past the end of the metadata resource " @@ -1658,7 +1747,6 @@ read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len, memcpy(file_name, p, file_name_nbytes); p += file_name_nbytes + 2; file_name[file_name_nbytes / 2] = cpu_to_le16(0); - replace_forbidden_characters(file_name); } else { file_name = NULL; } @@ -1677,7 +1765,6 @@ read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len, memcpy(short_name, p, short_name_nbytes); p += short_name_nbytes + 2; short_name[short_name_nbytes / 2] = cpu_to_le16(0); - replace_forbidden_characters(short_name); } else { short_name = NULL; } @@ -1706,7 +1793,6 @@ read_dentry(const u8 * restrict metadata_resource, u64 metadata_resource_len, goto out_free_short_name; } } -out_success: /* We've read all the data for this dentry. Set the names and their * lengths, and we've done. */ dentry->d_inode = inode; @@ -1726,6 +1812,18 @@ out: return ret; } +static const tchar * +dentry_get_file_type_string(const struct wim_dentry *dentry) +{ + const struct wim_inode *inode = dentry->d_inode; + if (inode_is_directory(inode)) + return T("directory"); + else if (inode_is_symlink(inode)) + return T("symbolic link"); + else + return T("file"); +} + /* Reads the children of a dentry, and all their children, ..., etc. from the * metadata resource and into the dentry tree. * @@ -1748,6 +1846,7 @@ read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, { u64 cur_offset = dentry->subdir_offset; struct wim_dentry *child; + struct wim_dentry *duplicate; struct wim_dentry cur_child; int ret; @@ -1761,12 +1860,12 @@ read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, return 0; /* Find and read all the children of @dentry. */ - while (1) { + for (;;) { /* Read next child of @dentry into @cur_child. */ ret = read_dentry(metadata_resource, metadata_resource_len, cur_offset, &cur_child); - if (ret != 0) + if (ret) break; /* Check for end of directory. */ @@ -1775,32 +1874,48 @@ read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, /* Not end of directory. Allocate this child permanently and * link it to the parent and previous child. */ - child = MALLOC(sizeof(struct wim_dentry)); + child = memdup(&cur_child, sizeof(struct wim_dentry)); if (!child) { - ERROR("Failed to allocate %zu bytes for new dentry", - sizeof(struct wim_dentry)); + ERROR("Failed to allocate new dentry!"); ret = WIMLIB_ERR_NOMEM; break; } - memcpy(child, &cur_child, sizeof(struct wim_dentry)); - dentry_add_child(dentry, child); - inode_add_dentry(child, child->d_inode); - - /* If there are children of this child, call this procedure - * recursively. */ - if (child->subdir_offset != 0) { - ret = read_dentry_tree(metadata_resource, - metadata_resource_len, child); - if (ret != 0) - break; - } /* Advance to the offset of the next child. Note: We need to * advance by the TOTAL length of the dentry, not by the length - * child->length, which although it does take into account the - * padding, it DOES NOT take into account alternate stream + * cur_child.length, which although it does take into account + * the padding, it DOES NOT take into account alternate stream * entries. */ cur_offset += dentry_total_length(child); + + duplicate = dentry_add_child(dentry, child); + if (duplicate) { + const tchar *child_type, *duplicate_type; + child_type = dentry_get_file_type_string(child); + duplicate_type = dentry_get_file_type_string(duplicate); + WARNING("Ignoring duplicate %"TS" \"%"TS"\" " + "(the WIM image already contains a %"TS" " + "at that path with the exact same name)", + child_type, dentry_full_path(duplicate), + duplicate_type); + } else { + inode_add_dentry(child, child->d_inode); + /* If there are children of this child, call this + * procedure recursively. */ + if (child->subdir_offset != 0) { + if (dentry_is_directory(child)) { + ret = read_dentry_tree(metadata_resource, + metadata_resource_len, + child); + if (ret) + break; + } else { + WARNING("Ignoring children of non-directory \"%"TS"\"", + dentry_full_path(child)); + } + } + + } } return ret; } @@ -1812,7 +1927,7 @@ read_dentry_tree(const u8 metadata_resource[], u64 metadata_resource_len, * @p: The memory location to write the data to. * * Returns the pointer to the byte after the last byte we wrote as part of the - * dentry, including any alternate data streams entry. + * dentry, including any alternate data stream entries. */ static u8 * write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p) @@ -1836,12 +1951,8 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p) disk_dentry->creation_time = cpu_to_le64(inode->i_creation_time); disk_dentry->last_access_time = cpu_to_le64(inode->i_last_access_time); disk_dentry->last_write_time = cpu_to_le64(inode->i_last_write_time); - - if (inode->i_resolved) - hash = inode->i_lte->hash; - else - hash = inode->i_hash; - copy_hash(disk_dentry->unnamed_stream_hash, inode_stream_hash(inode, 0)); + hash = inode_stream_hash(inode, 0); + copy_hash(disk_dentry->unnamed_stream_hash, hash); if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) { disk_dentry->reparse.rp_unknown_1 = cpu_to_le32(inode->i_rp_unknown_1); disk_dentry->reparse.reparse_tag = cpu_to_le32(inode->i_reparse_tag); @@ -1870,19 +1981,19 @@ write_dentry(const struct wim_dentry * restrict dentry, u8 * restrict p) /* We calculate the correct length of the dentry ourselves because the * dentry->length field may been set to an unexpected value from when we * read the dentry in (for example, there may have been unknown data - * appended to the end of the dentry...) */ + * appended to the end of the dentry...). Furthermore, the dentry may + * have been renamed, thus changing its needed length. */ disk_dentry->length = cpu_to_le64(p - orig_p); - /* Write the alternate data streams entries, if there are any. */ - for (u16 i = 0; i < inode->i_num_ads; i++) - { + /* Write the alternate data streams entries, if any. */ + for (u16 i = 0; i < inode->i_num_ads; i++) { const struct wim_ads_entry *ads_entry = - &inode->i_ads_entries[i]; + &inode->i_ads_entries[i]; struct wim_ads_entry_on_disk *disk_ads_entry = (struct wim_ads_entry_on_disk*)p; + orig_p = p; disk_ads_entry->reserved = cpu_to_le64(ads_entry->reserved); - orig_p = p; hash = inode_stream_hash(inode, i + 1); copy_hash(disk_ads_entry->hash, hash);