Extract WIM hard links correctly
authorEric Biggers <ebiggers3@gmail.com>
Mon, 20 Aug 2012 00:39:51 +0000 (19:39 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Mon, 20 Aug 2012 00:39:51 +0000 (19:39 -0500)
src/dentry.c
src/dentry.h
src/extract.c
src/hardlink.c
src/wim.c
src/wimlib_internal.h

index bccfce2c540e4475b74e4619982965835c0adc49..0e3d977751e7a5a74fd47eacf66eb9b32d3d84ad 100644 (file)
@@ -533,6 +533,7 @@ static void __destroy_dentry(struct dentry *dentry)
        FREE(dentry->file_name_utf8);
        FREE(dentry->short_name);
        FREE(dentry->full_path_utf8);
+       FREE(dentry->extracted_file);
 }
 
 void free_dentry(struct dentry *dentry)
index 3a3432eadbeafe36a41839235404b53b33884d2f..16cf5f37bba23200891fac458fca6e6f21385b79 100644 (file)
@@ -114,7 +114,6 @@ struct dentry {
         * included only the length field, but that takes up 8 bytes. */
        u64 length;
 
-
        /* The file attributes associated with this file. */
        u32 attributes;
 
@@ -153,14 +152,6 @@ struct dentry {
         * read_dentry() function. */
        //u32 reparse_reserved;
 
-       /* If the reparse_reserved field existed, there would be a 4-byte gap
-        * here to align hard_link on an 8-byte field.  However,
-        * reparse_reserved does not actually exist, so there is no gap here. */
-
-       /* If the file is part of a hard link set, all the directory entries in
-        * the set will share the same value for this field. */
-       u64 hard_link;
-
        /* Number of alternate data streams associated with this file. */
        u16 num_ads;
 
@@ -202,13 +193,30 @@ struct dentry {
                u32 num_times_opened;
        };
 
-       /* List of dentries in the hard link set */
+       /* If the file is part of a hard link set, all the directory entries in
+        * the set will share the same value for this field. */
+       u64 hard_link;
+
        enum {
+               /* This dentry is the owner of its ads_entries, although it may
+                * be in a hard link set */
                GROUP_INDEPENDENT,
+
+               /* This dentry is the owner of the ads_entries in the hard link
+                * set */
                GROUP_MASTER,
+
+               /* This dentry shares its ads_entries with a dentry in the hard
+                * link set that has GROUP_MASTER set. */
                GROUP_SLAVE
        } link_group_master_status;
+
+
+       /* List of dentries in the hard link set */
        struct list_head link_group_list;
+
+       /* Path to extracted file on disk (used during extraction only) */
+       char *extracted_file;
 };
 
 /* Return hash of the "unnamed" (default) data stream. */
index 70384329455a1cba7ef60570b0e7bce40e31dce9..0cb06b87edb11d802845d254fda12c083be75515 100644 (file)
@@ -53,6 +53,7 @@ static int extract_regular_file_linked(const struct dentry *dentry,
         * instead either symlinks or hardlinks *all* identical files in
         * the WIM, even if they are in a different image (in the case
         * of a multi-image extraction) */
+
        wimlib_assert(lte->file_on_disk);
 
        if (extract_flags & WIMLIB_EXTRACT_FLAG_HARDLINK) {
@@ -107,15 +108,50 @@ static int extract_regular_file_linked(const struct dentry *dentry,
 }
 
 static int extract_regular_file_unlinked(WIMStruct *w,
-                                        const struct dentry *dentry, 
+                                        struct dentry *dentry, 
                                         const char *output_path,
                                         int extract_flags,
                                         struct lookup_table_entry *lte)
 {
+       /* Normal mode of extraction.  Regular files and hard links are
+        * extracted in the way that they appear in the WIM. */
+
        int out_fd;
        const struct resource_entry *res_entry;
        int ret;
-       /* Otherwise, we must actually extract the file contents. */
+       const struct list_head *head = &dentry->link_group_list;
+
+       if (head->next != head) {
+               /* This dentry is one of a hard link set of at least 2 dentries.
+                * If one of the other dentries has already been extracted, make
+                * a hard link to the file corresponding to this
+                * already-extracted directory.  Otherwise, extract the
+                * file, and set the dentry->extracted_file field so that other
+                * dentries in the hard link group can link to it. */
+               struct dentry *other;
+               list_for_each_entry(other, head, link_group_list) {
+                       if (other->extracted_file) {
+                               DEBUG("Extracting hard link `%s' => `%s'",
+                                     output_path, other->extracted_file);
+                               if (link(other->extracted_file, output_path) != 0) {
+                                       ERROR_WITH_ERRNO("Failed to hard link "
+                                                        "`%s' to `%s'",
+                                                        output_path,
+                                                        other->extracted_file);
+                                       return WIMLIB_ERR_LINK;
+                               }
+                               return 0;
+                       }
+               }
+               FREE(dentry->extracted_file);
+               dentry->extracted_file = STRDUP(output_path);
+               if (!dentry->extracted_file) {
+                       ERROR("Failed to allocate memory for filename");
+                       return WIMLIB_ERR_NOMEM;
+               }
+       }
+
+       /* Extract the contents of the file to @output_path. */
 
        out_fd = open(output_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
        if (out_fd == -1) {
@@ -124,13 +160,14 @@ static int extract_regular_file_unlinked(WIMStruct *w,
                return WIMLIB_ERR_OPEN;
        }
 
-       /* Extract empty file, with no lookup table entry... */
        if (!lte) {
+               /* Empty file with no lookup table entry */
                DEBUG("Empty file `%s'.", output_path);
                ret = 0;
                goto done;
        }
 
+
        res_entry = &lte->resource_entry;
 
        ret = extract_resource_to_fd(w, res_entry, out_fd, 
@@ -141,14 +178,20 @@ static int extract_regular_file_unlinked(WIMStruct *w,
                goto done;
        }
 
-       /* Mark the lookup table entry to indicate this file has been extracted. */
-       lte->out_refcnt++;
-       FREE(lte->file_on_disk);
-       lte->file_on_disk = STRDUP(output_path);
-       if (!lte->file_on_disk)
-               ret = WIMLIB_ERR_NOMEM;
+       if (extract_flags & WIMLIB_EXTRACT_FLAG_MULTI_IMAGE) {
+               /* Mark the lookup table entry to indicate this file has been
+                * extracted. */
+               lte->out_refcnt++;
+               FREE(lte->file_on_disk);
+               lte->file_on_disk = STRDUP(output_path);
+               if (!lte->file_on_disk)
+                       ret = WIMLIB_ERR_NOMEM;
+       }
 done:
-       close(out_fd);
+       if (close(out_fd) != 0) {
+               ERROR_WITH_ERRNO("Failed to close file `%s'", output_path);
+               ret = WIMLIB_ERR_WRITE;
+       }
        return ret;
 }
 
@@ -156,7 +199,7 @@ done:
  * Extracts a regular file from the WIM archive. 
  */
 static int extract_regular_file(WIMStruct *w, 
-                               const struct dentry *dentry, 
+                               struct dentry *dentry, 
                                const char *output_dir,
                                const char *output_path,
                                int extract_flags)
@@ -165,9 +208,6 @@ static int extract_regular_file(WIMStruct *w,
 
        lte = __lookup_resource(w->lookup_table, dentry_hash(dentry));
 
-       /* If we already extracted the same file or a hard link copy of it, we
-        * may be able to simply create a link.  The exact action is specified
-        * by the current @link_type. */
        if ((extract_flags & (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK)) &&
              lte && lte->out_refcnt != 0)
                return extract_regular_file_linked(dentry, output_dir,
@@ -205,7 +245,7 @@ static int extract_symlink(const struct dentry *dentry, const char *output_path,
  * @output_path:       The path to which the directory is to be extracted to.
  * @return:            True on success, false on failure. 
  */
-static int extract_directory(struct dentry *dentry, const char *output_path)
+static int extract_directory(const char *output_path)
 {
        /* Compute the output path directory to the directory. */
        if (mkdir(output_path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) != 0) 
@@ -260,7 +300,7 @@ static int extract_dentry(struct dentry *dentry, void *arg)
        if (dentry_is_symlink(dentry)) {
                ret = extract_symlink(dentry, output_path, w);
        } else if (dentry_is_directory(dentry)) {
-               ret = extract_directory(dentry, output_path);
+               ret = extract_directory(output_path);
        } else {
                ret = extract_regular_file(w, dentry, args->output_dir,
                                           output_path, extract_flags);
@@ -306,6 +346,10 @@ static int extract_all_images(WIMStruct *w, const char *output_dir,
 
        DEBUG("Attempting to extract all images from `%s'", w->filename);
 
+       ret = extract_directory(output_dir);
+       if (ret != 0)
+               return ret;
+
        memcpy(buf, output_dir, output_path_len);
        buf[output_path_len] = '/';
        for (image = 1; image <= w->hdr.image_count; image++) {
@@ -338,10 +382,12 @@ WIMLIBAPI int wimlib_extract_image(WIMStruct *w, int image,
                        == (WIMLIB_EXTRACT_FLAG_SYMLINK | WIMLIB_EXTRACT_FLAG_HARDLINK))
                return WIMLIB_ERR_INVALID_PARAM;
 
-       if (image == WIM_ALL_IMAGES)
+       if (image == WIM_ALL_IMAGES) {
                flags |= WIMLIB_EXTRACT_FLAG_MULTI_IMAGE;
-       else
+               for_lookup_table_entry(w->lookup_table, zero_out_refcnts, NULL);
+       } else {
                flags &= ~WIMLIB_EXTRACT_FLAG_MULTI_IMAGE;
+       }
        
        if ((flags & WIMLIB_EXTRACT_FLAG_NTFS)) {
        #ifdef WITH_NTFS_3G
index 7519b0470b5c82958145088fa0e85601dda14f5d..8067020beb4531f3e03ade1063a61faf365bad44 100644 (file)
@@ -169,41 +169,34 @@ u64 assign_link_groups(struct link_group_table *table)
 static int link_group_free_duplicate_data(struct link_group *group,
                                          struct link_group **bad_links)
 {
-       struct list_head *head;
-       struct list_head *next;
-       struct dentry *master;
+       struct dentry *master, *slave, *tmp;
 
-       head = group->dentry_list;
-       master = container_of(head, struct dentry, link_group_list);
-       head = head->next;
+       master = container_of(group->dentry_list, struct dentry,
+                             link_group_list);
        master->link_group_master_status = GROUP_MASTER;
-       while (head != group->dentry_list) {
-               next = head->next;
-               struct dentry *slave;
-               int ret;
-
-               slave = container_of(head, struct dentry, link_group_list);
-               ret = share_dentry_ads(master, slave);
 
+       list_for_each_entry_safe(slave, tmp, group->dentry_list,
+                                link_group_list)
+       {
                /* I would it to be an error if two dentries are the same hard
                 * link group but have irreconcilable differences such as
                 * different file permissions, but unfortunately some of M$'s
                 * WIMs contain many instances of this error.  This problem is
                 * worked around here by splitting each offending dentry off
                 * into its own hard link group. */
-               if (ret != 0) {
+               if (share_dentry_ads(master, slave) != 0) {
                        struct link_group *single;
                        single = MALLOC(sizeof(struct link_group));
                        if (!single)
                                return WIMLIB_ERR_NOMEM;
+                       list_del(&slave->link_group_list);
+                       INIT_LIST_HEAD(&slave->link_group_list);
                        single->link_group_id = 0;
                        single->next          = *bad_links;
+                       single->dentry_list   = &slave->link_group_list;
                        *bad_links            = single;
-                       INIT_LIST_HEAD(&slave->link_group_list);
-                       single->dentry_list = &slave->link_group_list;
                        slave->link_group_master_status = GROUP_INDEPENDENT;
                }
-               head = next;
        }
        return 0;
 }
index 6042a90c17809cf6a4fe74c5d84df4d2932a5cd9..45df3657f80c1c9e5b8adaf22c38a4576ec66e4a 100644 (file)
--- a/src/wim.c
+++ b/src/wim.c
@@ -208,7 +208,9 @@ int wimlib_select_image(WIMStruct *w, int image)
                if (!imd->modified) {
                        DEBUG("Freeing image %u", w->current_image);
                        destroy_image_metadata(imd, NULL);
-                       memset(imd, 0, sizeof(*imd));
+                       imd->root_dentry = NULL;
+                       imd->security_data = NULL;
+                       imd->lgt = NULL;
                }
        }
 
@@ -526,7 +528,7 @@ WIMLIBAPI int wimlib_open_wim(const char *wim_file, int flags,
 
        ret = begin_read(w, wim_file, flags);
        if (ret != 0) {
-               ERROR("Could not begin reading the WIM file `%s'", wim_file);
+               DEBUG("Could not begin reading the WIM file `%s'", wim_file);
                wimlib_free(w);
                return ret;
        }
index c04ffb1aee312f47fc42ce518094840a17434875..03e769290148b6e71cfc56feb5fc225c6d386408 100644 (file)
@@ -219,13 +219,13 @@ struct image_metadata {
        /* Pointer to the security data for the image. */
        struct wim_security_data *security_data;
 
+       /* Hard link group table */
+       struct link_group_table *lgt;
+
        /* A pointer to the lookup table entry for this image's metadata
         * resource. */
        struct lookup_table_entry *metadata_lte;
 
-       /* Hard link group table */
-       struct link_group_table *lgt;
-
        /* True if the filesystem of the image has been modified.  If this is
         * the case, the memory for the filesystem is not freed when switching
         * to a different WIM image. */