From: Eric Biggers Date: Mon, 20 Aug 2012 00:39:51 +0000 (-0500) Subject: Extract WIM hard links correctly X-Git-Tag: v1.0.0~125 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=b3f993d2ece8fda5c989dc8c0311732fb20e3233 Extract WIM hard links correctly --- diff --git a/src/dentry.c b/src/dentry.c index bccfce2c..0e3d9777 100644 --- a/src/dentry.c +++ b/src/dentry.c @@ -533,6 +533,7 @@ static void __destroy_dentry(struct dentry *dentry) FREE(dentry->file_name_utf8); FREE(dentry->short_name); FREE(dentry->full_path_utf8); + FREE(dentry->extracted_file); } void free_dentry(struct dentry *dentry) diff --git a/src/dentry.h b/src/dentry.h index 3a3432ea..16cf5f37 100644 --- a/src/dentry.h +++ b/src/dentry.h @@ -114,7 +114,6 @@ struct dentry { * included only the length field, but that takes up 8 bytes. */ u64 length; - /* The file attributes associated with this file. */ u32 attributes; @@ -153,14 +152,6 @@ struct dentry { * read_dentry() function. */ //u32 reparse_reserved; - /* If the reparse_reserved field existed, there would be a 4-byte gap - * here to align hard_link on an 8-byte field. However, - * reparse_reserved does not actually exist, so there is no gap here. */ - - /* If the file is part of a hard link set, all the directory entries in - * the set will share the same value for this field. */ - u64 hard_link; - /* Number of alternate data streams associated with this file. */ u16 num_ads; @@ -202,13 +193,30 @@ struct dentry { u32 num_times_opened; }; - /* List of dentries in the hard link set */ + /* If the file is part of a hard link set, all the directory entries in + * the set will share the same value for this field. */ + u64 hard_link; + enum { + /* This dentry is the owner of its ads_entries, although it may + * be in a hard link set */ GROUP_INDEPENDENT, + + /* This dentry is the owner of the ads_entries in the hard link + * set */ GROUP_MASTER, + + /* This dentry shares its ads_entries with a dentry in the hard + * link set that has GROUP_MASTER set. */ GROUP_SLAVE } link_group_master_status; + + + /* List of dentries in the hard link set */ struct list_head link_group_list; + + /* Path to extracted file on disk (used during extraction only) */ + char *extracted_file; }; /* Return hash of the "unnamed" (default) data stream. */ diff --git a/src/extract.c b/src/extract.c index 70384329..0cb06b87 100644 --- a/src/extract.c +++ b/src/extract.c @@ -53,6 +53,7 @@ static int extract_regular_file_linked(const struct dentry *dentry, * instead either symlinks or hardlinks *all* identical files in * the WIM, even if they are in a different image (in the case * of a multi-image extraction) */ + wimlib_assert(lte->file_on_disk); if (extract_flags & WIMLIB_EXTRACT_FLAG_HARDLINK) { @@ -107,15 +108,50 @@ static int extract_regular_file_linked(const struct dentry *dentry, } static int extract_regular_file_unlinked(WIMStruct *w, - const struct dentry *dentry, + struct dentry *dentry, const char *output_path, int extract_flags, struct lookup_table_entry *lte) { + /* Normal mode of extraction. Regular files and hard links are + * extracted in the way that they appear in the WIM. */ + int out_fd; const struct resource_entry *res_entry; int ret; - /* Otherwise, we must actually extract the file contents. */ + const struct list_head *head = &dentry->link_group_list; + + if (head->next != head) { + /* This dentry is one of a hard link set of at least 2 dentries. + * If one of the other dentries has already been extracted, make + * a hard link to the file corresponding to this + * already-extracted directory. Otherwise, extract the + * file, and set the dentry->extracted_file field so that other + * dentries in the hard link group can link to it. */ + struct dentry *other; + list_for_each_entry(other, head, link_group_list) { + if (other->extracted_file) { + DEBUG("Extracting hard link `%s' => `%s'", + output_path, other->extracted_file); + if (link(other->extracted_file, output_path) != 0) { + ERROR_WITH_ERRNO("Failed to hard link " + "`%s' to `%s'", + output_path, + other->extracted_file); + return WIMLIB_ERR_LINK; + } + return 0; + } + } + FREE(dentry->extracted_file); + dentry->extracted_file = STRDUP(output_path); + if (!dentry->extracted_file) { + ERROR("Failed to allocate memory for filename"); + return WIMLIB_ERR_NOMEM; + } + } + + /* Extract the contents of the file to @output_path. */ out_fd = open(output_path, O_WRONLY | O_CREAT | O_TRUNC, 0644); if (out_fd == -1) { @@ -124,13 +160,14 @@ static int extract_regular_file_unlinked(WIMStruct *w, return WIMLIB_ERR_OPEN; } - /* Extract empty file, with no lookup table entry... */ if (!lte) { + /* Empty file with no lookup table entry */ DEBUG("Empty file `%s'.", output_path); ret = 0; goto done; } + res_entry = <e->resource_entry; ret = extract_resource_to_fd(w, res_entry, out_fd, @@ -141,14 +178,20 @@ static int extract_regular_file_unlinked(WIMStruct *w, goto done; } - /* Mark the lookup table entry to indicate this file has been extracted. */ - lte->out_refcnt++; - FREE(lte->file_on_disk); - lte->file_on_disk = STRDUP(output_path); - if (!lte->file_on_disk) - ret = WIMLIB_ERR_NOMEM; + if (extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE) { + /* Mark the lookup table entry to indicate this file has been + * extracted. */ + lte->out_refcnt++; + FREE(lte->file_on_disk); + lte->file_on_disk = STRDUP(output_path); + if (!lte->file_on_disk) + ret = WIMLIB_ERR_NOMEM; + } done: - close(out_fd); + if (close(out_fd) != 0) { + ERROR_WITH_ERRNO("Failed to close file `%s'", output_path); + ret = WIMLIB_ERR_WRITE; + } return ret; } @@ -156,7 +199,7 @@ done: * Extracts a regular file from the WIM archive. */ static int extract_regular_file(WIMStruct *w, - const struct dentry *dentry, + struct dentry *dentry, const char *output_dir, const char *output_path, int extract_flags) @@ -165,9 +208,6 @@ static int extract_regular_file(WIMStruct *w, lte = __lookup_resource(w->lookup_table, dentry_hash(dentry)); - /* If we already extracted the same file or a hard link copy of it, we - * may be able to simply create a link. The exact action is specified - * by the current @link_type. */ if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK)) && lte && lte->out_refcnt != 0) return extract_regular_file_linked(dentry, output_dir, @@ -205,7 +245,7 @@ static int extract_symlink(const struct dentry *dentry, const char *output_path, * @output_path: The path to which the directory is to be extracted to. * @return: True on success, false on failure. */ -static int extract_directory(struct dentry *dentry, const char *output_path) +static int extract_directory(const char *output_path) { /* Compute the output path directory to the directory. */ if (mkdir(output_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) != 0) @@ -260,7 +300,7 @@ static int extract_dentry(struct dentry *dentry, void *arg) if (dentry_is_symlink(dentry)) { ret = extract_symlink(dentry, output_path, w); } else if (dentry_is_directory(dentry)) { - ret = extract_directory(dentry, output_path); + ret = extract_directory(output_path); } else { ret = extract_regular_file(w, dentry, args->output_dir, output_path, extract_flags); @@ -306,6 +346,10 @@ static int extract_all_images(WIMStruct *w, const char *output_dir, DEBUG("Attempting to extract all images from `%s'", w->filename); + ret = extract_directory(output_dir); + if (ret != 0) + return ret; + memcpy(buf, output_dir, output_path_len); buf[output_path_len] = '/'; for (image = 1; image <= w->hdr.image_count; image++) { @@ -338,10 +382,12 @@ WIMLIBAPI int wimlib_extract_image(WIMStruct *w, int image, == (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK)) return WIMLIB_ERR_INVALID_PARAM; - if (image == WIM_ALL_IMAGES) + if (image == WIM_ALL_IMAGES) { flags |= WIMLIB_EXTRACT_FLAG_MULTI_IMAGE; - else + for_lookup_table_entry(w->lookup_table, zero_out_refcnts, NULL); + } else { flags &= ~WIMLIB_EXTRACT_FLAG_MULTI_IMAGE; + } if ((flags & WIMLIB_EXTRACT_FLAG_NTFS)) { #ifdef WITH_NTFS_3G diff --git a/src/hardlink.c b/src/hardlink.c index 7519b047..8067020b 100644 --- a/src/hardlink.c +++ b/src/hardlink.c @@ -169,41 +169,34 @@ u64 assign_link_groups(struct link_group_table *table) static int link_group_free_duplicate_data(struct link_group *group, struct link_group **bad_links) { - struct list_head *head; - struct list_head *next; - struct dentry *master; + struct dentry *master, *slave, *tmp; - head = group->dentry_list; - master = container_of(head, struct dentry, link_group_list); - head = head->next; + master = container_of(group->dentry_list, struct dentry, + link_group_list); master->link_group_master_status = GROUP_MASTER; - while (head != group->dentry_list) { - next = head->next; - struct dentry *slave; - int ret; - - slave = container_of(head, struct dentry, link_group_list); - ret = share_dentry_ads(master, slave); + list_for_each_entry_safe(slave, tmp, group->dentry_list, + link_group_list) + { /* I would it to be an error if two dentries are the same hard * link group but have irreconcilable differences such as * different file permissions, but unfortunately some of M$'s * WIMs contain many instances of this error. This problem is * worked around here by splitting each offending dentry off * into its own hard link group. */ - if (ret != 0) { + if (share_dentry_ads(master, slave) != 0) { struct link_group *single; single = MALLOC(sizeof(struct link_group)); if (!single) return WIMLIB_ERR_NOMEM; + list_del(&slave->link_group_list); + INIT_LIST_HEAD(&slave->link_group_list); single->link_group_id = 0; single->next = *bad_links; + single->dentry_list = &slave->link_group_list; *bad_links = single; - INIT_LIST_HEAD(&slave->link_group_list); - single->dentry_list = &slave->link_group_list; slave->link_group_master_status = GROUP_INDEPENDENT; } - head = next; } return 0; } diff --git a/src/wim.c b/src/wim.c index 6042a90c..45df3657 100644 --- a/src/wim.c +++ b/src/wim.c @@ -208,7 +208,9 @@ int wimlib_select_image(WIMStruct *w, int image) if (!imd->modified) { DEBUG("Freeing image %u", w->current_image); destroy_image_metadata(imd, NULL); - memset(imd, 0, sizeof(*imd)); + imd->root_dentry = NULL; + imd->security_data = NULL; + imd->lgt = NULL; } } @@ -526,7 +528,7 @@ WIMLIBAPI int wimlib_open_wim(const char *wim_file, int flags, ret = begin_read(w, wim_file, flags); if (ret != 0) { - ERROR("Could not begin reading the WIM file `%s'", wim_file); + DEBUG("Could not begin reading the WIM file `%s'", wim_file); wimlib_free(w); return ret; } diff --git a/src/wimlib_internal.h b/src/wimlib_internal.h index c04ffb1a..03e76929 100644 --- a/src/wimlib_internal.h +++ b/src/wimlib_internal.h @@ -219,13 +219,13 @@ struct image_metadata { /* Pointer to the security data for the image. */ struct wim_security_data *security_data; + /* Hard link group table */ + struct link_group_table *lgt; + /* A pointer to the lookup table entry for this image's metadata * resource. */ struct lookup_table_entry *metadata_lte; - /* Hard link group table */ - struct link_group_table *lgt; - /* True if the filesystem of the image has been modified. If this is * the case, the memory for the filesystem is not freed when switching * to a different WIM image. */