From: Eric Biggers Date: Tue, 28 Aug 2012 19:55:41 +0000 (-0500) Subject: Hard link group disambiguation X-Git-Tag: v1.0.0~54 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=02f6819d7098ce1b57f4fe61d0a3574393ea309e Hard link group disambiguation --- diff --git a/src/hardlink.c b/src/hardlink.c index 62002777..a94bb862 100644 --- a/src/hardlink.c +++ b/src/hardlink.c @@ -30,11 +30,38 @@ #include "list.h" #include "lookup_table.h" -/* Hard link group; it's identified by its hard link group ID and consists of a +/* NULL NULL + * ^ ^ + * dentry | | + * / \ ----------- ----------- + * | dentry<---| struct | | struct |---> dentry + * \ / |link_group| |link_group| + * dentry ------------ ------------ + * ^ ^ + * | | + * | | dentry + * ----------- ----------- / \ + * dentry<---| struct | | struct |---> dentry dentry + * / |link_group| |link_group| \ / + * dentry ------------ ------------ dentry + * ^ ^ + * | | + * ----------------- + * link_group_table->array | idx 0 | idx 1 | + * ----------------- + */ + +/* Hard link group; it's identified by its hard link group ID and points to a * circularly linked list of dentries. */ struct link_group { u64 link_group_id; + + /* Pointer to use to make a singly-linked list of link groups. */ struct link_group *next; + + /* This is a pointer to the circle and not part of the circle itself. + * This makes it easy to iterate through other dentries hard-linked to a + * given dentry without having to find the "head" of the list first. */ struct list_head *dentry_list; }; @@ -51,11 +78,12 @@ struct link_group_table { * * - Hard link groups of size 1, which are all allowed to have 0 for * their hard link group ID, meaning we cannot insert them into the - * hash table - * - Groups we create ourselves by splitting a nominal hard link group - * due to inconsistencies in the dentries. These groups will have a - * hard link group ID duplicated with some other group until - * assign_link_group_ids() is called. + * hash table before calling assign_link_group_ids(). + * + * - Groups we create ourselves by splitting a nominal hard link group + * due to inconsistencies in the dentries. These groups will share a + * hard link group ID with some other group until + * assign_link_group_ids() is called. */ struct link_group *extra_groups; }; @@ -90,7 +118,8 @@ err: * ID. * * If there is already a dentry in the table having the same hard link group ID, - * we link the dentries together in a circular list. + * and the hard link group ID is not 0, the dentry is added to the circular + * linked list for that hard link group. * * If the hard link group ID is 0, this indicates a dentry that's in a hard link * group by itself (has a link count of 1). We can't insert it into the hash @@ -118,9 +147,9 @@ int link_group_table_insert(struct dentry *dentry, void *__table) INIT_LIST_HEAD(&dentry->link_group_list); group->dentry_list = &dentry->link_group_list; } else { - /* Hard link group that may should multiple dentries (the code - * will work even if the group actually contains only 1 dentry - * though) */ + /* Hard link group that may contain multiple dentries (the code + * will work even if the group actually contains only 1 dentry + * though) */ /* Try adding to existing hard link group */ pos = dentry->hard_link % table->capacity; @@ -167,33 +196,40 @@ void free_link_group_table(struct link_group_table *table) { struct link_group *single, *next; - if (!table) - return; - if (table->array) - for (size_t i = 0; i < table->capacity; i++) - free_link_group_list(table->array[i]); - free_link_group_list(table->extra_groups); - - FREE(table); + if (table) { + if (table->array) + for (size_t i = 0; i < table->capacity; i++) + free_link_group_list(table->array[i]); + free_link_group_list(table->extra_groups); + FREE(table); + } } -u64 assign_link_group_ids_to_list(struct link_group *group, u64 id) +u64 assign_link_group_ids_to_list(struct link_group *group, u64 id, + struct link_group **extra_groups) { struct dentry *dentry; - struct list_head *cur; - while (group) { - cur = group->dentry_list; + struct list_head *cur_head; + struct link_group *prev_group = NULL; + struct link_group *cur_group = group; + while (cur_group) { + cur_head = cur_group->dentry_list; do { - dentry = container_of(cur, + dentry = container_of(cur_head, struct dentry, link_group_list); dentry->hard_link = id; - cur = cur->next; - } while (cur != group->dentry_list); - group->link_group_id = id; + cur_head = cur_head->next; + } while (cur_head != cur_group->dentry_list); + cur_group->link_group_id = id; id++; - group = group->next; + prev_group = cur_group; + cur_group = cur_group->next; } + if (group && extra_groups) { + prev_group->next = *extra_groups; + *extra_groups = group; + } return id; } @@ -205,11 +241,11 @@ static void insert_extra_groups(struct link_group_table *table) group = table->extra_groups; while (group) { - next_group = group->next; - pos = group->link_group_id % table->capacity; - group->next = table->array[pos]; + next_group = group->next; + pos = group->link_group_id % table->capacity; + group->next = table->array[pos]; table->array[pos] = group; - group = next_group; + group = next_group; } table->extra_groups = NULL; } @@ -219,16 +255,20 @@ static void insert_extra_groups(struct link_group_table *table) u64 assign_link_group_ids(struct link_group_table *table) { DEBUG("Assigning link groups"); + struct link_group *extra_groups = table->extra_groups; /* Assign consecutive link group IDs to each link group in the hash * table */ u64 id = 1; - for (size_t i = 0; i < table->capacity; i++) - id = assign_link_group_ids_to_list(table->array[i], id); + for (size_t i = 0; i < table->capacity; i++) { + id = assign_link_group_ids_to_list(table->array[i], id, + &table->extra_groups); + table->array[i] = NULL; + } /* Assign link group IDs to the "extra" link groups and insert them into * the hash table */ - id = assign_link_group_ids_to_list(table->extra_groups, id); + id = assign_link_group_ids_to_list(extra_groups, id, NULL); insert_extra_groups(table); return id; } @@ -245,7 +285,7 @@ static void inconsistent_link_group(const struct dentry *first_dentry) do { ERROR("`%s'", dentry->full_path_utf8); } while ((dentry = container_of(dentry->link_group_list.next, - struct dentry, + const struct dentry, link_group_list)) != first_dentry); } @@ -261,8 +301,8 @@ static bool ref_dentries_consistent(const struct dentry * restrict ref_dentry_1, return false; for (unsigned i = 0; i <= ref_dentry_1->num_ads; i++) { const u8 *ref_1_hash, *ref_2_hash; - ref_1_hash = dentry_stream_hash_unresolved(ref_dentry_1, i); - ref_2_hash = dentry_stream_hash_unresolved(ref_dentry_2, i); + ref_1_hash = dentry_stream_hash(ref_dentry_1, i); + ref_2_hash = dentry_stream_hash(ref_dentry_2, i); if (!hashes_equal(ref_1_hash, ref_2_hash)) return false; if (i && !ads_entries_have_same_name(&ref_dentry_1->ads_entries[i - 1], @@ -285,8 +325,8 @@ static bool dentries_consistent(const struct dentry * restrict ref_dentry, return false; for (unsigned i = 0; i <= min(ref_dentry->num_ads, dentry->num_ads); i++) { const u8 *ref_hash, *hash; - ref_hash = dentry_stream_hash_unresolved(ref_dentry, i); - hash = dentry_stream_hash_unresolved(dentry, i); + ref_hash = dentry_stream_hash(ref_dentry, i); + hash = dentry_stream_hash(dentry, i); if (!hashes_equal(ref_hash, hash) && !is_zero_hash(hash)) return false; if (i && !ads_entries_have_same_name(&ref_dentry->ads_entries[i - 1], @@ -296,6 +336,7 @@ static bool dentries_consistent(const struct dentry * restrict ref_dentry, return true; } +/* Fix up a "true" link group and check for inconsistencies */ static int fix_true_link_group(struct dentry *first_dentry) { @@ -343,6 +384,40 @@ fix_true_link_group(struct dentry *first_dentry) return 0; } +/* + * Fixes up a nominal link group. + * + * By a nominal link group we mean a group of two or more dentries that share + * the same hard link group ID. + * + * If dentries in the group are found to be inconsistent, we may split the group + * into several groups. @new_groups points to a linked list of these "extra" + * groups, and if we create any, they will be added to this list. + * + * After splitting up each nominal link group into the "true" link groups we + * will canonicalize the link groups. To do this, we: + * + * - Assign all the dentries in the link group the most recent timestamp + * among all the corresponding timestamps in the link group, for each of + * the three categories of time stamps + * + * - Make sure the dentry->hash field is valid in all the dentries, if + * possible (this field may be all zeroes, and in the context of a hard + * link group must be interpreted as implicitly refering to the same stream + * as another dentry is the hard link group that does *not* have all zeroes + * for the stream hash). + * + * - Make sure dentry->num_ads is the same in all the dentries in the link + * group. In some cases, it's possible for it to be set to 0 when it must + * be interpreted as being the same as the number of alternate data streams + * in another dentry in the hard link group that has a nonzero number of + * alternate data streams. + * + * - Make sure only the dentry->ads_entries array is only allocated for one + * dentry in the hard link group. This dentry will have + * dentry->ads_entries_status set to ADS_ENTRIES_OWNER, while the others + * will have dentry->ads_entries_status set to ADS_ENTRIES_USER. + */ static int fix_nominal_link_group(struct link_group *group, struct link_group **new_groups) @@ -357,14 +432,15 @@ fix_nominal_link_group(struct link_group *group, LIST_HEAD(dentries_with_no_data_streams); LIST_HEAD(true_link_groups); - /* Create the lists @dentries_with_data_streams and - * @dentries_with_no_data_streams. */ + /* Create a list of dentries in the nominal hard link group that have at + * least one data stream with a non-zero hash, and another list that + * contains the dentries that have a zero hash for all data streams. */ dentry = container_of(group->dentry_list, struct dentry, link_group_list); do { for (unsigned i = 0; i <= dentry->num_ads; i++) { const u8 *hash; - hash = dentry_stream_hash_unresolved(dentry, i); + hash = dentry_stream_hash(dentry, i); if (!is_zero_hash(hash)) { list_add(&dentry->tmp_list, &dentries_with_data_streams); @@ -386,9 +462,9 @@ fix_nominal_link_group(struct link_group *group, struct dentry, link_group_list)); - /* One or more dentries had data streams. We check each of these - * dentries for consistency with the others to form a set of true link - * groups. */ + /* One or more dentries had data streams specified. We check each of + * these dentries for consistency with the others to form a set of true + * link groups. */ num_true_link_groups = 0; list_for_each_entry_safe(dentry, tmp, &dentries_with_data_streams, tmp_list) @@ -415,15 +491,14 @@ next_dentry_2: wimlib_assert(num_true_link_groups != 0); - /* If there were dentries with no data streams, we require there to only - * be one true link group so that we know which link group to assign the - * - * streamless dentries to. */ + /* If there were dentries with no data streams, we require there to only + * be one true link group so that we know which link group to assign the + * streamless dentries to. */ if (!list_empty(&dentries_with_no_data_streams)) { if (num_true_link_groups != 1) { ERROR("Hard link group ambiguity detected!"); ERROR("We split up hard link group 0x%"PRIx64" due to " - "inconsistencies,"); + "inconsistencies,", group->link_group_id); ERROR("but dentries with no stream information remained. " "We don't know which true hard link"); ERROR("group to assign them to."); @@ -432,13 +507,14 @@ next_dentry_2: ref_dentry = container_of(true_link_groups.next, struct dentry, tmp_list); - list_for_each_entry(dentry, &dentries_with_no_data_streams, - tmp_list) - { - list_add(&dentry->link_group_list, - &ref_dentry->link_group_list); - } + list_splice(&dentries_with_no_data_streams, + &ref_dentry->link_group_list); } + if (num_true_link_groups != 1) { + WARNING("Split nominal link group 0x%"PRIx64" into %zu " + "link groups", + group->link_group_id, num_true_link_groups); + } list_for_each_entry(dentry, &true_link_groups, tmp_list) { ret = fix_true_link_group(dentry); @@ -461,7 +537,7 @@ next_dentry_2: group->dentry_list = &dentry->link_group_list; group->next = *new_groups; *new_groups = group; - } while ((head = head->next) != &true_link_groups); + } return 0; } @@ -491,93 +567,3 @@ int fix_link_groups(struct link_group_table *table) } return 0; } - -#if 0 -static bool dentries_have_same_ads(const struct dentry *d1, - const struct dentry *d2) -{ - wimlib_assert(d1->num_ads == d2->num_ads); - /* Verify stream names and hashes are the same */ - for (u16 i = 0; i < d1->num_ads; i++) { - if (strcmp(d1->ads_entries[i].stream_name_utf8, - d2->ads_entries[i].stream_name_utf8) != 0) - return false; - if (!hashes_equal(d1->ads_entries[i].hash, - d2->ads_entries[i].hash)) - return false; - } - return true; -} - -/* - * Share the alternate stream entries between hard-linked dentries. - * - * Notes: - * - If you use 'imagex.exe' (version 6.1.7600.16385) to create a WIM containing - * hard-linked files, only one dentry in the hard link set will refer to data - * streams, including all alternate data streams. The rest of the dentries in - * the hard link set will be marked as having 0 alternate data streams and - * will not refer to any main file stream (the SHA1 message digest will be all - * 0's). - * - * - However, if you look at the WIM's that Microsoft actually distributes (e.g. - * Windows 7/8 boot.wim, install.wim), it's not the same as above. The - * dentries in hard link sets will have stream information duplicated. I - * can't say anything about the alternate data streams because these WIMs do - * not contain alternate data streams. - * - * - Windows 7 'install.wim' contains hard link sets containing dentries with - * inconsistent streams and other inconsistent information such as security - * ID. The only way I can think to handle these is to treat the hard link - * grouping as erroneous and split up the hard link group. - */ -static int share_dentry_ads(struct dentry *owner, struct dentry *user) -{ - const char *mismatch_type; - bool data_streams_shared = true; - wimlib_assert(owner->num_ads == 0 || - owner->ads_entries != user->ads_entries); - if (owner->attributes != user->attributes) { - mismatch_type = "attributes"; - goto mismatch; - } - if (owner->attributes & FILE_ATTRIBUTE_DIRECTORY) { - WARNING("`%s' is hard-linked to `%s', which is a directory ", - user->full_path_utf8, owner->full_path_utf8); - return WIMLIB_ERR_INVALID_DENTRY; - } - if (owner->security_id != user->security_id) { - mismatch_type = "security ID"; - goto mismatch; - } - if (!hashes_equal(owner->hash, user->hash)) { - if (is_zero_hash(user->hash)) { - data_streams_shared = false; - copy_hash(user->hash, owner->hash); - } else { - mismatch_type = "main file resource"; - goto mismatch; - } - } - if (data_streams_shared) { - if (!dentries_have_same_ads(owner, user)) { - mismatch_type = "Alternate Stream Entries"; - goto mismatch; - } - } - if (owner->last_access_time != user->last_access_time - || owner->last_write_time != user->last_write_time - || owner->creation_time != user->creation_time) { - } - dentry_free_ads_entries(user); - user->ads_entries = owner->ads_entries; - user->ads_entries_status = ADS_ENTRIES_USER; - return 0; -mismatch: - WARNING("Dentries `%s' and `%s' are supposedly in the same hard-link " - "group but do not share the same %s", - owner->full_path_utf8, user->full_path_utf8, - mismatch_type); - return WIMLIB_ERR_INVALID_DENTRY; -} -#endif