X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Finode_fixup.c;h=8e1579db014617ac94bad6778d83ce3409453d9b;hp=d24157cd224d24f2cd8da6f2f636249c8ec7de53;hb=9eb312340031663062d6be70cbe2097b677647c9;hpb=7a3d3588c3a7eb30b43d72d3b653053347d89e56 diff --git a/src/inode_fixup.c b/src/inode_fixup.c index d24157cd..8e1579db 100644 --- a/src/inode_fixup.c +++ b/src/inode_fixup.c @@ -1,26 +1,22 @@ /* * inode_fixup.c - * - * See dentry_tree_fix_inodes() for description. */ /* - * Copyright (C) 2012, 2013 Eric Biggers - * - * This file is part of wimlib, a library for working with WIM files. + * Copyright (C) 2012, 2013, 2014 Eric Biggers * - * wimlib is free software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the Free - * Software Foundation; either version 3 of the License, or (at your option) - * any later version. + * This file is free software; you can redistribute it and/or modify it under + * the terms of the GNU Lesser General Public License as published by the Free + * Software Foundation; either version 3 of the License, or (at your option) any + * later version. * - * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY - * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR - * A PARTICULAR PURPOSE. See the GNU General Public License for more + * This file is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more * details. * - * You should have received a copy of the GNU General Public License - * along with wimlib; if not, see http://www.gnu.org/licenses/. + * You should have received a copy of the GNU Lesser General Public License + * along with this file; if not, see http://www.gnu.org/licenses/. */ #ifdef HAVE_CONFIG_H @@ -31,466 +27,176 @@ #include "wimlib/error.h" #include "wimlib/inode.h" #include "wimlib/inode_table.h" -#include "wimlib/lookup_table.h" - -/* Manual link count of inode (normally we can just check i_nlink) */ -static inline size_t -inode_link_count(const struct wim_inode *inode) -{ - const struct list_head *cur; - size_t size = 0; - list_for_each(cur, &inode->i_dentry) - size++; - return size; -} - -static inline void -print_inode_dentries(const struct wim_inode *inode) -{ - struct wim_dentry *dentry; - inode_for_each_dentry(dentry, inode) - tfprintf(stderr, T("%"TS"\n"), dentry_full_path(dentry)); -} - -static void -inconsistent_inode(const struct wim_inode *inode) -{ - if (wimlib_print_errors) { - WARNING("An inconsistent hard link group that cannot be corrected has " - "been detected"); - WARNING("The dentries are located at the following paths:"); - print_inode_dentries(inode); - } -} -static bool -ads_entries_have_same_name(const struct wim_ads_entry *entry_1, - const struct wim_ads_entry *entry_2) -{ - return entry_1->stream_name_nbytes == entry_2->stream_name_nbytes && - memcmp(entry_1->stream_name, entry_2->stream_name, - entry_1->stream_name_nbytes) == 0; -} +struct inode_fixup_params { + struct wim_inode_table inode_table; + unsigned long num_dir_hard_links; + unsigned long num_inconsistent_inodes; +}; -static bool -ref_inodes_consistent(const struct wim_inode *ref_inode_1, - const struct wim_inode *ref_inode_2) -{ - if (ref_inode_1->i_num_ads != ref_inode_2->i_num_ads) - return false; - if (ref_inode_1->i_security_id != ref_inode_2->i_security_id - || ref_inode_1->i_attributes != ref_inode_2->i_attributes) - return false; - for (unsigned i = 0; i <= ref_inode_1->i_num_ads; i++) { - const u8 *ref_1_hash, *ref_2_hash; - ref_1_hash = inode_stream_hash(ref_inode_1, i); - ref_2_hash = inode_stream_hash(ref_inode_2, i); - if (!hashes_equal(ref_1_hash, ref_2_hash)) - return false; - if (i && !ads_entries_have_same_name(&ref_inode_1->i_ads_entries[i - 1], - &ref_inode_2->i_ads_entries[i - 1])) - return false; +#define MAX_DIR_HARD_LINK_WARNINGS 8 - } - return true; -} - -/* Returns true iff the specified inode has any data streams with nonzero hash. - */ static bool -inode_has_data_streams(const struct wim_inode *inode) +inodes_consistent(const struct wim_inode *inode_1, + const struct wim_inode *inode_2) { - for (unsigned i = 0; i <= inode->i_num_ads; i++) - if (!is_zero_hash(inode_stream_hash(inode, i))) - return true; - return false; + /* This certainly isn't the only thing we need to check to make sure the + * inodes are consistent. However, this seems to be the only thing that + * the MS implementation checks when working around its own bug. + * + * (Tested: If two dentries share the same hard link group ID, Windows + * 8.1 DISM will link them if they have the same unnamed stream hash, + * even if the dentries provide different timestamps, attributes, + * alternate data streams, and security IDs! And the one that gets used + * will change if you merely swap the filenames. But if you use + * different unnamed stream hashes with everything else the same, it + * doesn't link the dentries.) + * + * For non-buggy WIMs this function will always return true. */ + return hashes_equal(inode_get_hash_of_unnamed_data_stream(inode_1), + inode_get_hash_of_unnamed_data_stream(inode_2)); } -/* Returns true iff the specified dentry has any data streams with nonzero hash. - */ -static bool -dentry_has_data_streams(const struct wim_dentry *dentry) -{ - return inode_has_data_streams(dentry->d_inode); -} - -static bool -inodes_consistent(const struct wim_inode *ref_inode, - const struct wim_inode *inode) +static int +inode_table_insert(struct wim_dentry *dentry, void *_params) { - if (ref_inode->i_security_id != inode->i_security_id) { - WARNING("Security ID mismatch: %d != %d", - ref_inode->i_security_id, inode->i_security_id); - return false; - } + struct inode_fixup_params *params = _params; + struct wim_inode_table *table = ¶ms->inode_table; + struct wim_inode *d_inode = dentry->d_inode; + size_t pos; + struct wim_inode *inode; - if (ref_inode->i_attributes != inode->i_attributes) { - WARNING("Attributes mismatch: 0x%08x != 0x%08x", - ref_inode->i_attributes, inode->i_attributes); - return false; + if (d_inode->i_ino == 0) { + hlist_add_head(&d_inode->i_hlist_node, &table->extra_inodes); + return 0; } - if (inode_has_data_streams(inode)) { - if (ref_inode->i_num_ads != inode->i_num_ads) { - WARNING("Stream count mismatch: %u != %u", - ref_inode->i_num_ads, inode->i_num_ads); - return false; + /* Try adding this dentry to an existing inode. */ + pos = hash_inode(table, d_inode->i_ino, 0); + hlist_for_each_entry(inode, &table->array[pos], i_hlist_node) { + if (inode->i_ino != d_inode->i_ino) { + continue; } - for (unsigned i = 0; i <= ref_inode->i_num_ads; i++) { - const u8 *ref_hash, *hash; - - ref_hash = inode_stream_hash(ref_inode, i); - hash = inode_stream_hash(inode, i); - if (!hashes_equal(ref_hash, hash) && !is_zero_hash(hash)) { - WARNING("Stream hash mismatch"); - return false; - } - if (i && !ads_entries_have_same_name(&ref_inode->i_ads_entries[i - 1], - &inode->i_ads_entries[i - 1])) + if (unlikely(!inodes_consistent(inode, d_inode))) { + params->num_inconsistent_inodes++; + continue; + } + if (unlikely((d_inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) || + (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY))) + { + params->num_dir_hard_links++; + if (params->num_dir_hard_links <= + MAX_DIR_HARD_LINK_WARNINGS) { - WARNING("Stream name mismatch"); - return false; + WARNING("Unsupported directory hard link " + "\"%"TS"\" <=> \"%"TS"\"", + dentry_full_path(dentry), + inode_any_full_path(inode)); + } else if (params->num_dir_hard_links == + MAX_DIR_HARD_LINK_WARNINGS + 1) + { + WARNING("Suppressing additional warnings about " + "directory hard links..."); } + continue; } + /* Transfer this dentry to the existing inode. */ + d_disassociate(dentry); + d_associate(dentry, inode); + return 0; } - return true; -} -/* Fix up a "true" inode and check for inconsistencies */ -static int -fix_true_inode(struct wim_inode *inode, struct list_head *inode_list) -{ - struct wim_dentry *dentry; - struct wim_dentry *ref_dentry = NULL; - struct wim_inode *ref_inode; - u64 last_ctime = 0; - u64 last_mtime = 0; - u64 last_atime = 0; - - inode_for_each_dentry(dentry, inode) { - if (!ref_dentry || dentry->d_inode->i_num_ads > ref_dentry->d_inode->i_num_ads) - ref_dentry = dentry; - if (dentry->d_inode->i_creation_time > last_ctime) - last_ctime = dentry->d_inode->i_creation_time; - if (dentry->d_inode->i_last_write_time > last_mtime) - last_mtime = dentry->d_inode->i_last_write_time; - if (dentry->d_inode->i_last_access_time > last_atime) - last_atime = dentry->d_inode->i_last_access_time; - } - - ref_inode = ref_dentry->d_inode; - wimlib_assert(ref_inode->i_nlink == 1); - list_add_tail(&ref_inode->i_list, inode_list); - - list_del(&inode->i_dentry); - list_add(&ref_inode->i_dentry, &ref_dentry->d_alias); - - inode_for_each_dentry(dentry, ref_inode) { - if (dentry != ref_dentry) { - if (!inodes_consistent(ref_inode, dentry->d_inode)) - inconsistent_inode(ref_inode); - /* Free the unneeded `struct wim_inode'. */ - wimlib_assert(dentry->d_inode->i_nlink == 1); - free_inode(dentry->d_inode); - dentry->d_inode = ref_inode; - ref_inode->i_nlink++; - } - } - ref_inode->i_creation_time = last_ctime; - ref_inode->i_last_write_time = last_mtime; - ref_inode->i_last_access_time = last_atime; - wimlib_assert(inode_link_count(ref_inode) == ref_inode->i_nlink); + /* Keep this dentry's inode. */ + hlist_add_head(&d_inode->i_hlist_node, &table->array[pos]); + if (++table->filled > table->capacity) + enlarge_inode_table(table); return 0; } -/* - * Fixes up a nominal inode. - * - * By a nominal inode we mean a group of two or more dentries that share the - * same hard link group ID. - * - * If dentries in the inode are found to be inconsistent, we may split the inode - * into several "true" inodes. - * - * After splitting up each nominal inode into the "true" inodes we will - * canonicalize the link group by getting rid of all the unnecessary `struct - * wim_inode's. There will be just one `struct wim_inode' for each hard link - * group remaining. - */ -static int -fix_nominal_inode(struct wim_inode *inode, struct list_head *inode_list, - bool *ino_changes_needed) +static void +hlist_move_all(struct hlist_head *src, struct hlist_head *dest) { - struct wim_dentry *dentry; - struct hlist_node *cur, *tmp; - int ret; - size_t num_true_inodes; - - LIST_HEAD(dentries_with_data_streams); - LIST_HEAD(dentries_with_no_data_streams); - HLIST_HEAD(true_inodes); - - /* Create a list of dentries in the nominal inode that have at - * least one data stream with a non-zero hash, and another list that - * contains the dentries that have a zero hash for all data streams. */ - inode_for_each_dentry(dentry, inode) { - if (dentry_has_data_streams(dentry)) - list_add(&dentry->tmp_list, &dentries_with_data_streams); - else - list_add(&dentry->tmp_list, &dentries_with_no_data_streams); - } - - /* If there are no dentries with data streams, we require the nominal - * inode to be a true inode */ - if (list_empty(&dentries_with_data_streams)) { - #ifdef ENABLE_DEBUG - unsigned nominal_group_size = inode_link_count(inode); - if (nominal_group_size > 1) { - DEBUG("Found link group of size %u without " - "any data streams:", nominal_group_size); - print_inode_dentries(inode); - DEBUG("We are going to interpret it as true " - "link group, provided that the dentries " - "are consistent."); - } - #endif - return fix_true_inode(inode, inode_list); - } - - /* One or more dentries had data streams specified. We check each of - * these dentries for consistency with the others to form a set of true - * inodes. */ - num_true_inodes = 0; - list_for_each_entry(dentry, &dentries_with_data_streams, tmp_list) { - /* Look for a true inode that is consistent with this dentry and - * add this dentry to it. Or, if none of the true inodes are - * consistent with this dentry, add a new one (if that happens, - * we have split the hard link group). */ - hlist_for_each_entry(inode, cur, &true_inodes, i_hlist) { - if (ref_inodes_consistent(inode, dentry->d_inode)) { - inode_add_dentry(dentry, inode); - goto next_dentry_2; - } - } - num_true_inodes++; - INIT_LIST_HEAD(&dentry->d_inode->i_dentry); - inode_add_dentry(dentry, dentry->d_inode); - hlist_add_head(&dentry->d_inode->i_hlist, &true_inodes); -next_dentry_2: - ; - } - - wimlib_assert(num_true_inodes != 0); - - /* If there were dentries with no data streams, we require there to only - * be one true inode so that we know which inode to assign the - * streamless dentries to. */ - if (!list_empty(&dentries_with_no_data_streams)) { - if (num_true_inodes != 1) { - ERROR("Hard link ambiguity detected!"); - ERROR("We split up inode 0x%"PRIx64" due to " - "inconsistencies,", inode->i_ino); - ERROR("but dentries with no stream information remained. " - "We don't know which inode"); - ERROR("to assign them to."); - ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; - goto out_cleanup_true_inode_list; - } - inode = container_of(true_inodes.first, struct wim_inode, i_hlist); - /* Assign the streamless dentries to the one and only true - * inode. */ - list_for_each_entry(dentry, &dentries_with_no_data_streams, tmp_list) - inode_add_dentry(dentry, inode); - } - if (num_true_inodes != 1) { - #ifdef ENABLE_DEBUG - inode = container_of(true_inodes.first, struct wim_inode, i_hlist); - - tprintf(T("Split nominal inode 0x%"PRIx64" into %zu " - "inodes:\n"), inode->i_ino, num_true_inodes); - tputs(T("----------------------------------------------------" - "--------------------------")); - size_t i = 1; - hlist_for_each_entry(inode, cur, &true_inodes, i_hlist) { - tprintf(T("[Split inode %zu]\n"), i++); - print_inode_dentries(inode); - tputchar(T('\n')); - } - tputs(T("----------------------------------------------------" - "--------------------------")); - #endif - *ino_changes_needed = true; - } + struct hlist_node *node; - hlist_for_each_entry_safe(inode, cur, tmp, &true_inodes, i_hlist) { - hlist_del_init(&inode->i_hlist); - ret = fix_true_inode(inode, inode_list); - if (ret) - goto out_cleanup_true_inode_list; + while ((node = src->first) != NULL) { + hlist_del(node); + hlist_add_head(node, dest); } - ret = 0; - goto out; -out_cleanup_true_inode_list: - hlist_for_each_entry_safe(inode, cur, tmp, &true_inodes, i_hlist) - hlist_del_init(&inode->i_hlist); -out: - return ret; } -static int -fix_inodes(struct wim_inode_table *table, struct list_head *inode_list, - bool *ino_changes_needed) +/* Move the inodes from the 'struct wim_inode_table' to the 'inode_list'. */ +static void +build_inode_list(struct wim_inode_table *inode_table, + struct hlist_head *inode_list) { - struct wim_inode *inode; - struct hlist_node *cur, *tmp; - int ret; - INIT_LIST_HEAD(inode_list); - for (size_t i = 0; i < table->capacity; i++) { - hlist_for_each_entry_safe(inode, cur, tmp, &table->array[i], i_hlist) { - hlist_del_init(&inode->i_hlist); - ret = fix_nominal_inode(inode, inode_list, ino_changes_needed); - if (ret) - return ret; - } - } - list_splice_tail(&table->extra_inodes, inode_list); - return 0; + hlist_move_all(&inode_table->extra_inodes, inode_list); + for (size_t i = 0; i < inode_table->capacity; i++) + hlist_move_all(&inode_table->array[i], inode_list); } -/* Insert a dentry into the inode table based on the inode number of the - * attached inode (which came from the hard link group ID field of the on-disk - * WIM dentry) */ -static int -inode_table_insert(struct wim_dentry *dentry, void *_table) +/* Re-assign inode numbers to the inodes in the list. */ +static void +reassign_inode_numbers(struct hlist_head *inode_list) { - struct wim_inode_table *table = _table; - struct wim_inode *d_inode = dentry->d_inode; - - if (d_inode->i_ino == 0) { - /* A dentry with a hard link group ID of 0 indicates that it's - * in a hard link group by itself. Add it to the list of extra - * inodes rather than inserting it into the hash lists. */ - list_add_tail(&d_inode->i_list, &table->extra_inodes); - } else { - size_t pos; - struct wim_inode *inode; - struct hlist_node *cur; - - /* Try adding this dentry to an existing inode */ - pos = d_inode->i_ino % table->capacity; - hlist_for_each_entry(inode, cur, &table->array[pos], i_hlist) { - if (inode->i_ino == d_inode->i_ino) { - if (unlikely((inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) || - (d_inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY))) - { - ERROR("Unsupported directory hard link " - "\"%"TS"\" <=> \"%"TS"\"", - dentry_full_path(dentry), - dentry_full_path(inode_first_dentry(inode))); - return WIMLIB_ERR_INVALID_METADATA_RESOURCE; - } - inode_add_dentry(dentry, inode); - return 0; - } - } - - /* No inode in the table has the same number as this one, so add - * it to the table. */ - hlist_add_head(&d_inode->i_hlist, &table->array[pos]); + struct wim_inode *inode; + u64 cur_ino = 1; - /* XXX Make the table grow when too many entries have been - * inserted. */ - table->num_entries++; - } - return 0; + hlist_for_each_entry(inode, inode_list, i_hlist_node) + inode->i_ino = cur_ino++; } - /* - * dentry_tree_fix_inodes(): - * - * This function takes as input a tree of WIM dentries that initially has a - * different inode associated with each dentry. Sets of dentries that should - * share the same inode (a.k.a. hard link groups) are built using the i_ino - * field of each inode, then the link count and alias list for one inode in each - * set is set correctly and the unnecessary struct wim_inode's freed. The - * effect is to correctly associate exactly one struct wim_inode with each - * original inode, regardless of how many dentries are aliases for that inode. - * - * The special inode number of 0 indicates that the dentry is in a hard link - * group by itself, and therefore has a 'struct wim_inode' with i_nlink=1 to - * itself. + * Given a WIM image's tree of dentries such that each dentry initially + * has a unique inode associated with it, determine the actual + * dentry/inode information. Following this, a single inode may be named + * by more than one dentry (usually called a hard link). * - * This function also checks the dentries in each hard link group for - * consistency. In some WIMs, such as install.wim for some versions of Windows - * 7, dentries can share the same hard link group ID but not actually be hard - * linked to each other (based on conflicting information, such as file - * contents). This should be an error, but this case needs be handled. So, - * each "nominal" inode (the inode based on the inode numbers provided in the - * WIM) is examined for consistency and may be split into multiple "true" inodes - * that are maximally sized consistent sets of dentries. + * The 'hard_link_group_id' field of the on-disk WIM dentry, which we + * have read into 'i_ino' of each dentry's initial inode, determines + * which dentries share the same inode. Ideally, dentries share the same + * inode if and only if they have the same value in this field. However, + * exceptions apply: * - * On success, the list of "true" inodes, linked by the i_list field, - * is returned in the list @inode_list. + * - If 'hard_link_group_id' is 0, the corresponding dentry is the sole + * name for its inode. + * - Due to bugs in the Microsoft implementation, dentries with different + * 'hard_link_group_id' fields may, in fact, need to be interpreted as + * naming different inodes. This seems to mostly affect images in + * install.wim for Windows 7. I try to work around this in the same way + * the Microsoft implementation works around this. * - * Return values: - * WIMLIB_ERR_SUCCESS (0) - * WIMLIB_ERR_INVALID_METADATA_RESOURCE - * WIMLIB_ERR_NOMEM + * Returns 0 or WIMLIB_ERR_NOMEM. On success, the resulting inodes will be + * appended to the @inode_list, and they will have consistent numbers in their + * i_ino fields. */ int -dentry_tree_fix_inodes(struct wim_dentry *root, struct list_head *inode_list) +dentry_tree_fix_inodes(struct wim_dentry *root, struct hlist_head *inode_list) { - struct wim_inode_table inode_tab; + struct inode_fixup_params params; int ret; - bool ino_changes_needed; - struct wim_inode *inode; - DEBUG("Inserting dentries into inode table"); - ret = init_inode_table(&inode_tab, 9001); - if (ret) - goto out; + /* We use a hash table to map inode numbers to inodes. */ - ret = for_dentry_in_tree(root, inode_table_insert, &inode_tab); + ret = init_inode_table(¶ms.inode_table, 64); if (ret) - goto out_destroy_inode_table; + return ret; - DEBUG("Cleaning up the hard link groups"); - ino_changes_needed = false; - ret = fix_inodes(&inode_tab, inode_list, &ino_changes_needed); - if (ret) - goto out_destroy_inode_table; + params.num_dir_hard_links = 0; + params.num_inconsistent_inodes = 0; - if (ino_changes_needed) { - u64 cur_ino = 1; + for_dentry_in_tree(root, inode_table_insert, ¶ms); - WARNING("The WIM image contains invalid hard links. Fixing."); + /* Generate the resulting list of inodes, and if needed reassign + * the inode numbers. */ + build_inode_list(¶ms.inode_table, inode_list); + destroy_inode_table(¶ms.inode_table); - list_for_each_entry(inode, inode_list, i_list) { - if (inode->i_nlink > 1) - inode->i_ino = cur_ino++; - else - inode->i_ino = 0; - } - } - /* On success, all the inodes have been moved to the image inode list, - * so there's no need to delete from from the hash lists in the inode - * table before freeing the hash buckets array directly. */ - ret = 0; - goto out_destroy_inode_table_raw; -out_destroy_inode_table: - for (size_t i = 0; i < inode_tab.capacity; i++) { - struct hlist_node *cur, *tmp; - hlist_for_each_entry_safe(inode, cur, tmp, &inode_tab.array[i], i_hlist) - hlist_del_init(&inode->i_hlist); - } - { - struct wim_inode *tmp; - list_for_each_entry_safe(inode, tmp, &inode_tab.extra_inodes, i_list) - list_del_init(&inode->i_list); - } -out_destroy_inode_table_raw: - destroy_inode_table(&inode_tab); -out: - return ret; + if (unlikely(params.num_dir_hard_links)) + WARNING("Ignoring %lu directory hard links", + params.num_dir_hard_links); + + if (unlikely(params.num_inconsistent_inodes || + params.num_dir_hard_links)) + reassign_inode_numbers(inode_list); + return 0; }