]> wimlib.net Git - wimlib/blobdiff - src/unix_apply.c
unix_apply.c: support applying extended attributes
[wimlib] / src / unix_apply.c
index 077b49db9aa9e694dbe85f6536d3c3c9907e6cea..3b31e194287fedf85326c7d42a95c857241a7f33 100644 (file)
@@ -3,45 +3,48 @@
  */
 
 /*
- * Copyright (C) 2012, 2013, 2014 Eric Biggers
+ * Copyright (C) 2012-2016 Eric Biggers
  *
- * This file is part of wimlib, a library for working with WIM files.
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option) any
+ * later version.
  *
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
- *
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  * details.
  *
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this file; if not, see http://www.gnu.org/licenses/.
  */
 
 #ifdef HAVE_CONFIG_H
 #  include "config.h"
 #endif
 
-#include "wimlib/apply.h"
-#include "wimlib/dentry.h"
-#include "wimlib/error.h"
-#include "wimlib/file_io.h"
-#include "wimlib/reparse.h"
-#include "wimlib/timestamp.h"
-#include "wimlib/unix_data.h"
-
 #include <errno.h>
 #include <fcntl.h>
-#include <limits.h>
 #include <stdlib.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/types.h>
+#ifdef HAVE_SYS_XATTR_H
+#  include <sys/xattr.h>
+#endif
 #include <unistd.h>
 
+#include "wimlib/apply.h"
+#include "wimlib/assert.h"
+#include "wimlib/blob_table.h"
+#include "wimlib/dentry.h"
+#include "wimlib/error.h"
+#include "wimlib/file_io.h"
+#include "wimlib/reparse.h"
+#include "wimlib/timestamp.h"
+#include "wimlib/unix_data.h"
+#include "wimlib/xattr.h"
+
 /* We don't require O_NOFOLLOW, but the advantage of having it is that if we
  * need to extract a file to a location at which there exists a symbolic link,
  * open(..., O_NOFOLLOW | ...) recognizes the symbolic link rather than
@@ -55,17 +58,19 @@ static int
 unix_get_supported_features(const char *target,
                            struct wim_features *supported_features)
 {
+       supported_features->sparse_files = 1;
        supported_features->hard_links = 1;
        supported_features->symlink_reparse_points = 1;
        supported_features->unix_data = 1;
        supported_features->timestamps = 1;
        supported_features->case_sensitive_filenames = 1;
+#ifdef HAVE_XATTR_SUPPORT
+       supported_features->linux_xattrs = 1;
+#endif
        return 0;
 }
 
 #define NUM_PATHBUFS 2  /* We need 2 when creating hard links  */
-#define MAX_OPEN_FDS 1000 /* TODO: Add special case for when the number of
-                            identical streams exceeds this number.  */
 
 struct unix_apply_ctx {
        /* Extract flags, the pointer to the WIMStruct, etc.  */
@@ -78,13 +83,20 @@ struct unix_apply_ctx {
        unsigned which_pathbuf;
 
        /* Currently open file descriptors for extraction  */
-       struct filedes open_fds[MAX_OPEN_FDS];
+       struct filedes open_fds[MAX_OPEN_FILES];
 
        /* Number of currently open file descriptors in open_fds, starting from
         * the beginning of the array.  */
        unsigned num_open_fds;
 
-       /* Buffer for reading reparse data streams into memory  */
+       /* For each currently open file, whether we're writing to it in "sparse"
+        * mode or not.  */
+       bool is_sparse_file[MAX_OPEN_FILES];
+
+       /* Whether is_sparse_file[] is true for any currently open file  */
+       bool any_sparse_files;
+
+       /* Buffer for reading reparse point data into memory  */
        u8 reparse_data[REPARSE_DATA_MAX_SIZE];
 
        /* Pointer to the next byte in @reparse_data to fill  */
@@ -159,7 +171,9 @@ unix_build_extraction_path(const struct wim_dentry *dentry,
        d = dentry;
        do {
                p -= d->d_extraction_name_nchars;
-               memcpy(p, d->d_extraction_name, d->d_extraction_name_nchars);
+               if (d->d_extraction_name_nchars)
+                       memcpy(p, d->d_extraction_name,
+                              d->d_extraction_name_nchars);
                *--p = '/';
                d = d->d_parent;
        } while (!dentry_is_root(d) && will_extract_dentry(d));
@@ -184,6 +198,17 @@ unix_build_inode_extraction_path(const struct wim_inode *inode,
        return unix_build_extraction_path(inode_first_extraction_dentry(inode), ctx);
 }
 
+/* Should the specified file be extracted as a directory on UNIX?  We extract
+ * the file as a directory if FILE_ATTRIBUTE_DIRECTORY is set and the file does
+ * not have a symlink or junction reparse point.  It *may* have a different type
+ * of reparse point.  */
+static inline bool
+should_extract_as_directory(const struct wim_inode *inode)
+{
+       return (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) &&
+               !inode_is_symlink(inode);
+}
+
 /* Sets the timestamps on a file being extracted.
  *
  * Either @fd or @path must be specified (not -1 and not NULL, respectively).
@@ -243,6 +268,109 @@ unix_set_mode(int fd, const char *path, mode_t mode)
        return WIMLIB_ERR_SET_SECURITY;
 }
 
+#ifdef HAVE_XATTR_SUPPORT
+/* Apply extended attributes to a file */
+static int
+apply_linux_xattrs(int fd, const struct wim_inode *inode,
+                  const char *path, struct unix_apply_ctx *ctx,
+                  const void *entries, size_t entries_size)
+{
+       const void * const entries_end = entries + entries_size;
+       char name[XATTR_NAME_MAX + 1];
+
+       for (const struct wimlib_xattr_entry *entry = entries;
+            (void *)entry < entries_end; entry = xattr_entry_next(entry))
+       {
+               u16 name_len;
+               const void *value;
+               u32 value_len;
+               int res;
+
+               if (!valid_xattr_entry(entry, entries_end - (void *)entry)) {
+                       if (!path) {
+                               path = unix_build_inode_extraction_path(inode,
+                                                                       ctx);
+                       }
+                       ERROR("\"%s\": extended attribute is corrupt", path);
+                       return WIMLIB_ERR_INVALID_XATTR;
+               }
+               name_len = le16_to_cpu(entry->name_len);
+               memcpy(name, entry->name, name_len);
+               name[name_len] = '\0';
+
+               value = entry->name + name_len;
+               value_len = le32_to_cpu(entry->value_len);
+
+               if (fd >= 0)
+                       res = fsetxattr(fd, name, value, value_len, 0);
+               else
+                       res = lsetxattr(path, name, value, value_len, 0);
+
+               if (unlikely(res != 0)) {
+                       if (!path) {
+                               path = unix_build_inode_extraction_path(inode,
+                                                                       ctx);
+                       }
+                       if (is_security_xattr(name) &&
+                           (ctx->common.extract_flags &
+                            WIMLIB_EXTRACT_FLAG_STRICT_ACLS))
+                       {
+                               ERROR_WITH_ERRNO("\"%s\": unable to set extended attribute \"%s\"",
+                                                path, name);
+                               return WIMLIB_ERR_SET_XATTR;
+                       }
+                       WARNING_WITH_ERRNO("\"%s\": unable to set extended attribute \"%s\"",
+                                          path, name);
+               }
+       }
+       return 0;
+}
+#endif /* HAVE_XATTR_SUPPORT */
+
+/* Apply standard UNIX permissions (uid, gid, and mode) to a file */
+static int
+apply_unix_permissions(int fd, const struct wim_inode *inode,
+                      const char *path, struct unix_apply_ctx *ctx,
+                      const struct wimlib_unix_data *dat)
+{
+       int ret;
+
+       ret = unix_set_owner_and_group(fd, path, dat->uid, dat->gid);
+       if (ret) {
+               if (!path)
+                       path = unix_build_inode_extraction_path(inode, ctx);
+               if (ctx->common.extract_flags &
+                   WIMLIB_EXTRACT_FLAG_STRICT_ACLS)
+               {
+                       ERROR_WITH_ERRNO("\"%s\": unable to set uid=%"PRIu32" and gid=%"PRIu32,
+                                        path, dat->uid, dat->gid);
+                       return ret;
+               }
+               WARNING_WITH_ERRNO("\"%s\": unable to set uid=%"PRIu32" and gid=%"PRIu32,
+                                  path, dat->uid, dat->gid);
+       }
+
+       if (!inode_is_symlink(inode)) {
+               ret = unix_set_mode(fd, path, dat->mode);
+               if (ret) {
+                       if (!path)
+                               path = unix_build_inode_extraction_path(inode,
+                                                                       ctx);
+                       if (ctx->common.extract_flags &
+                           WIMLIB_EXTRACT_FLAG_STRICT_ACLS)
+                       {
+                               ERROR_WITH_ERRNO("\"%s\": unable to set mode=0%"PRIo32,
+                                                path, dat->mode);
+                               return ret;
+                       }
+                       WARNING_WITH_ERRNO("\"%s\": unable to set mode=0%"PRIo32,
+                                          path, dat->mode);
+               }
+       }
+
+       return 0;
+}
+
 /*
  * Set metadata on an extracted file.
  *
@@ -256,57 +384,33 @@ unix_set_metadata(int fd, const struct wim_inode *inode,
                  const char *path, struct unix_apply_ctx *ctx)
 {
        int ret;
-       struct wimlib_unix_data unix_data;
 
        if (fd < 0 && !path)
                path = unix_build_inode_extraction_path(inode, ctx);
 
-       if ((ctx->common.extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA)
-           && inode_get_unix_data(inode, &unix_data))
-       {
-               u32 uid = unix_data.uid;
-               u32 gid = unix_data.gid;
-               u32 mode = unix_data.mode;
+       if (ctx->common.extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) {
+               struct wimlib_unix_data dat;
+       #ifdef HAVE_XATTR_SUPPORT
+               const void *entries;
+               u32 entries_size;
 
-               ret = unix_set_owner_and_group(fd, path, uid, gid);
-               if (ret) {
-                       if (!path)
-                               path = unix_build_inode_extraction_path(inode, ctx);
-                       if (ctx->common.extract_flags &
-                           WIMLIB_EXTRACT_FLAG_STRICT_ACLS)
-                       {
-                               ERROR_WITH_ERRNO("Can't set uid=%"PRIu32" and "
-                                                "gid=%"PRIu32" on \"%s\"",
-                                                uid, gid, path);
+               entries = inode_get_linux_xattrs(inode, &entries_size);
+               if (entries) {
+                       ret = apply_linux_xattrs(fd, inode, path, ctx,
+                                                entries, entries_size);
+                       if (ret)
                                return ret;
-                       } else {
-                               WARNING_WITH_ERRNO("Can't set uid=%"PRIu32" and "
-                                                  "gid=%"PRIu32" on \"%s\"",
-                                                  uid, gid, path);
-                       }
                }
-
-               ret = 0;
-               if (!inode_is_symlink(inode))
-                       ret = unix_set_mode(fd, path, mode);
-               if (ret) {
-                       if (!path)
-                               path = unix_build_inode_extraction_path(inode, ctx);
-                       if (ctx->common.extract_flags &
-                           WIMLIB_EXTRACT_FLAG_STRICT_ACLS)
-                       {
-                               ERROR_WITH_ERRNO("Can't set mode=0%"PRIo32" "
-                                                "on \"%s\"", mode, path);
+       #endif
+               if (inode_get_unix_data(inode, &dat)) {
+                       ret = apply_unix_permissions(fd, inode, path, ctx,
+                                                    &dat);
+                       if (ret)
                                return ret;
-                       } else {
-                               WARNING_WITH_ERRNO("Can't set mode=0%"PRIo32" "
-                                                  "on \"%s\"", mode, path);
-                       }
                }
        }
 
-       ret = unix_set_timestamps(fd, path,
-                                 inode->i_last_access_time,
+       ret = unix_set_timestamps(fd, path, inode->i_last_access_time,
                                  inode->i_last_write_time);
        if (ret) {
                if (!path)
@@ -314,12 +418,12 @@ unix_set_metadata(int fd, const struct wim_inode *inode,
                if (ctx->common.extract_flags &
                    WIMLIB_EXTRACT_FLAG_STRICT_TIMESTAMPS)
                {
-                       ERROR_WITH_ERRNO("Can't set timestamps on \"%s\"", path);
+                       ERROR_WITH_ERRNO("\"%s\": unable to set timestamps", path);
                        return ret;
-               } else {
-                       WARNING_WITH_ERRNO("Can't set timestamps on \"%s\"", path);
                }
+               WARNING_WITH_ERRNO("\"%s\": unable to set timestamps", path);
        }
+
        return 0;
 }
 
@@ -333,9 +437,7 @@ unix_create_hardlinks(const struct wim_inode *inode,
        const struct wim_dentry *dentry;
        const char *newpath;
 
-       list_for_each_entry(dentry, &inode->i_extraction_aliases,
-                           d_extraction_alias_node)
-       {
+       inode_for_each_extraction_alias(dentry, inode) {
                if (dentry == first_dentry)
                        continue;
 
@@ -361,7 +463,7 @@ unix_create_if_directory(const struct wim_dentry *dentry,
        const char *path;
        struct stat stbuf;
 
-       if (!dentry_is_directory(dentry))
+       if (!should_extract_as_directory(dentry->d_inode))
                return 0;
 
        path = unix_build_extraction_path(dentry, ctx);
@@ -373,7 +475,8 @@ unix_create_if_directory(const struct wim_dentry *dentry,
                ERROR_WITH_ERRNO("Can't create directory \"%s\"", path);
                return WIMLIB_ERR_MKDIR;
        }
-       return 0;
+
+       return report_file_created(&ctx->common);
 }
 
 /* If @dentry represents an empty regular file or a special file, create it, set
@@ -395,8 +498,8 @@ unix_extract_if_empty_file(const struct wim_dentry *dentry,
 
        /* Is this a directory, a symbolic link, or any type of nonempty file?
         */
-       if (inode_is_directory(inode) || inode_is_symlink(inode) ||
-           inode_unnamed_lte_resolved(inode))
+       if (should_extract_as_directory(inode) || inode_is_symlink(inode) ||
+           inode_get_blob_for_unnamed_data_stream_resolved(inode))
                return 0;
 
        /* Recognize special files in UNIX_DATA mode  */
@@ -427,7 +530,7 @@ unix_extract_if_empty_file(const struct wim_dentry *dentry,
 
                path = unix_build_extraction_path(dentry, ctx);
        retry_create:
-               fd = open(path, O_TRUNC | O_CREAT | O_WRONLY | O_NOFOLLOW, 0644);
+               fd = open(path, O_EXCL | O_CREAT | O_WRONLY | O_NOFOLLOW, 0644);
                if (fd < 0) {
                        if (errno == EEXIST && !unlink(path))
                                goto retry_create;
@@ -445,7 +548,11 @@ unix_extract_if_empty_file(const struct wim_dentry *dentry,
        if (ret)
                return ret;
 
-       return unix_create_hardlinks(inode, dentry, path, ctx);
+       ret = unix_create_hardlinks(inode, dentry, path, ctx);
+       if (ret)
+               return ret;
+
+       return report_file_created(&ctx->common);
 }
 
 static int
@@ -468,43 +575,53 @@ unix_create_dirs_and_empty_files(const struct list_head *dentry_list,
        return 0;
 }
 
+static void
+unix_count_dentries(const struct list_head *dentry_list,
+                   u64 *dir_count_ret, u64 *empty_file_count_ret)
+{
+       const struct wim_dentry *dentry;
+       u64 dir_count = 0;
+       u64 empty_file_count = 0;
+
+       list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
+
+               const struct wim_inode *inode = dentry->d_inode;
+
+               if (should_extract_as_directory(inode))
+                       dir_count++;
+               else if ((dentry == inode_first_extraction_dentry(inode)) &&
+                        !inode_is_symlink(inode) &&
+                        !inode_get_blob_for_unnamed_data_stream_resolved(inode))
+                       empty_file_count++;
+       }
+
+       *dir_count_ret = dir_count;
+       *empty_file_count_ret = empty_file_count;
+}
+
 static int
 unix_create_symlink(const struct wim_inode *inode, const char *path,
-                   const u8 *rpdata, u16 rpdatalen, bool rpfix,
-                   const char *apply_dir, size_t apply_dir_nchars)
+                   size_t rpdatalen, struct unix_apply_ctx *ctx)
 {
-       char link_target[REPARSE_DATA_MAX_SIZE];
+       char target[REPARSE_POINT_MAX_SIZE];
+       struct blob_descriptor blob_override;
        int ret;
-       struct wim_lookup_table_entry lte_override;
 
-       lte_override.resource_location = RESOURCE_IN_ATTACHED_BUFFER;
-       lte_override.attached_buffer = (void *)rpdata;
-       lte_override.size = rpdatalen;
+       blob_set_is_located_in_attached_buffer(&blob_override,
+                                              ctx->reparse_data, rpdatalen);
 
-       ret = wim_inode_readlink(inode, link_target,
-                                sizeof(link_target) - 1, &lte_override);
-       if (ret < 0) {
+       ret = wim_inode_readlink(inode, target, sizeof(target) - 1,
+                                &blob_override,
+                                ctx->target_abspath,
+                                ctx->target_abspath_nchars);
+       if (unlikely(ret < 0)) {
                errno = -ret;
                return WIMLIB_ERR_READLINK;
        }
+       target[ret] = '\0';
 
-       link_target[ret] = 0;
-
-       if (rpfix && link_target[0] == '/') {
-
-               /* "Fix" the absolute symbolic link by prepending the absolute
-                * path to the target directory.  */
-
-               if (sizeof(link_target) - (ret + 1) < apply_dir_nchars) {
-                       errno = ENAMETOOLONG;
-                       return WIMLIB_ERR_REPARSE_POINT_FIXUP_FAILED;
-               }
-               memmove(link_target + apply_dir_nchars, link_target,
-                       ret + 1);
-               memcpy(link_target, apply_dir, apply_dir_nchars);
-       }
 retry_symlink:
-       if (symlink(link_target, path)) {
+       if (symlink(target, path)) {
                if (errno == EEXIST && !unlink(path))
                        goto retry_symlink;
                return WIMLIB_ERR_LINK;
@@ -518,63 +635,75 @@ unix_cleanup_open_fds(struct unix_apply_ctx *ctx, unsigned offset)
        for (unsigned i = offset; i < ctx->num_open_fds; i++)
                filedes_close(&ctx->open_fds[i]);
        ctx->num_open_fds = 0;
+       ctx->any_sparse_files = false;
 }
 
 static int
-unix_begin_extract_stream_instance(const struct wim_lookup_table_entry *stream,
-                                  const struct wim_inode *inode,
-                                  struct unix_apply_ctx *ctx)
+unix_begin_extract_blob_instance(const struct blob_descriptor *blob,
+                                const struct wim_inode *inode,
+                                const struct wim_inode_stream *strm,
+                                struct unix_apply_ctx *ctx)
 {
        const struct wim_dentry *first_dentry;
        const char *first_path;
        int fd;
 
-       if (inode_is_symlink(inode)) {
+       if (unlikely(strm->stream_type == STREAM_TYPE_REPARSE_POINT)) {
                /* On UNIX, symbolic links must be created with symlink(), which
                 * requires that the full link target be available.  */
-               if (stream->size > REPARSE_DATA_MAX_SIZE) {
+               if (blob->size > REPARSE_DATA_MAX_SIZE) {
                        ERROR_WITH_ERRNO("Reparse data of \"%s\" has size "
                                         "%"PRIu64" bytes (exceeds %u bytes)",
-                                        inode_first_full_path(inode),
-                                        stream->size, REPARSE_DATA_MAX_SIZE);
+                                        inode_any_full_path(inode),
+                                        blob->size, REPARSE_DATA_MAX_SIZE);
                        return WIMLIB_ERR_INVALID_REPARSE_DATA;
                }
                ctx->reparse_ptr = ctx->reparse_data;
                return 0;
        }
 
-       if (ctx->num_open_fds == MAX_OPEN_FDS) {
-               ERROR("Can't extract data: too many open files!");
-               return WIMLIB_ERR_UNSUPPORTED;
-       }
+       wimlib_assert(stream_is_unnamed_data_stream(strm));
+
+       /* Unnamed data stream of "regular" file  */
+
+       /* This should be ensured by extract_blob_list()  */
+       wimlib_assert(ctx->num_open_fds < MAX_OPEN_FILES);
 
        first_dentry = inode_first_extraction_dentry(inode);
        first_path = unix_build_extraction_path(first_dentry, ctx);
 retry_create:
-       fd = open(first_path, O_TRUNC | O_CREAT | O_WRONLY | O_NOFOLLOW, 0644);
+       fd = open(first_path, O_EXCL | O_CREAT | O_WRONLY | O_NOFOLLOW, 0644);
        if (fd < 0) {
                if (errno == EEXIST && !unlink(first_path))
                        goto retry_create;
                ERROR_WITH_ERRNO("Can't create regular file \"%s\"", first_path);
                return WIMLIB_ERR_OPEN;
        }
+       if (inode->i_attributes & FILE_ATTRIBUTE_SPARSE_FILE) {
+               ctx->is_sparse_file[ctx->num_open_fds] = true;
+               ctx->any_sparse_files = true;
+       } else {
+               ctx->is_sparse_file[ctx->num_open_fds] = false;
+#ifdef HAVE_POSIX_FALLOCATE
+               posix_fallocate(fd, 0, blob->size);
+#endif
+       }
        filedes_init(&ctx->open_fds[ctx->num_open_fds++], fd);
        return unix_create_hardlinks(inode, first_dentry, first_path, ctx);
 }
 
-/* Called when starting to read a single-instance stream for extraction  */
+/* Called when starting to read a blob for extraction  */
 static int
-unix_begin_extract_stream(struct wim_lookup_table_entry *stream,
-                         u32 flags, void *_ctx)
+unix_begin_extract_blob(struct blob_descriptor *blob, void *_ctx)
 {
        struct unix_apply_ctx *ctx = _ctx;
-       const struct stream_owner *owners = stream_owners(stream);
-       int ret;
+       const struct blob_extraction_target *targets = blob_extraction_targets(blob);
 
-       for (u32 i = 0; i < stream->out_refcnt; i++) {
-               const struct wim_inode *inode = owners[i].inode;
-
-               ret = unix_begin_extract_stream_instance(stream, inode, ctx);
+       for (u32 i = 0; i < blob->out_refcnt; i++) {
+               int ret = unix_begin_extract_blob_instance(blob,
+                                                          targets[i].inode,
+                                                          targets[i].stream,
+                                                          ctx);
                if (ret) {
                        ctx->reparse_ptr = NULL;
                        unix_cleanup_open_fds(ctx, 0);
@@ -584,35 +713,53 @@ unix_begin_extract_stream(struct wim_lookup_table_entry *stream,
        return 0;
 }
 
-/* Called when the next chunk of a single-instance stream has been read for
- * extraction  */
+/* Called when the next chunk of a blob has been read for extraction  */
 static int
-unix_extract_chunk(const void *chunk, size_t size, void *_ctx)
+unix_extract_chunk(const struct blob_descriptor *blob, u64 offset,
+                  const void *chunk, size_t size, void *_ctx)
 {
        struct unix_apply_ctx *ctx = _ctx;
+       const void * const end = chunk + size;
+       const void *p;
+       bool zeroes;
+       size_t len;
+       unsigned i;
        int ret;
 
-       for (unsigned i = 0; i < ctx->num_open_fds; i++) {
-               ret = full_write(&ctx->open_fds[i], chunk, size);
-               if (ret) {
-                       ERROR_WITH_ERRNO("Error writing data to filesystem");
-                       return ret;
+       /*
+        * For sparse files, only write nonzero regions.  This lets the
+        * filesystem use holes to represent zero regions.
+        */
+       for (p = chunk; p != end; p += len, offset += len) {
+               zeroes = maybe_detect_sparse_region(p, end - p, &len,
+                                                   ctx->any_sparse_files);
+               for (i = 0; i < ctx->num_open_fds; i++) {
+                       if (!zeroes || !ctx->is_sparse_file[i]) {
+                               ret = full_pwrite(&ctx->open_fds[i],
+                                                 p, len, offset);
+                               if (ret)
+                                       goto err;
+                       }
                }
        }
+
        if (ctx->reparse_ptr)
                ctx->reparse_ptr = mempcpy(ctx->reparse_ptr, chunk, size);
        return 0;
+
+err:
+       ERROR_WITH_ERRNO("Error writing data to filesystem");
+       return ret;
 }
 
-/* Called when a single-instance stream has been fully read for extraction  */
+/* Called when a blob has been fully read for extraction  */
 static int
-unix_end_extract_stream(struct wim_lookup_table_entry *stream, int status,
-                       void *_ctx)
+unix_end_extract_blob(struct blob_descriptor *blob, int status, void *_ctx)
 {
        struct unix_apply_ctx *ctx = _ctx;
        int ret;
        unsigned j;
-       const struct stream_owner *owners = stream_owners(stream);
+       const struct blob_extraction_target *targets = blob_extraction_targets(blob);
 
        ctx->reparse_ptr = NULL;
 
@@ -623,26 +770,16 @@ unix_end_extract_stream(struct wim_lookup_table_entry *stream, int status,
 
        j = 0;
        ret = 0;
-       for (u32 i = 0; i < stream->out_refcnt; i++) {
-               struct wim_inode *inode = owners[i].inode;
+       for (u32 i = 0; i < blob->out_refcnt; i++) {
+               struct wim_inode *inode = targets[i].inode;
 
                if (inode_is_symlink(inode)) {
                        /* We finally have the symlink data, so we can create
                         * the symlink.  */
                        const char *path;
-                       bool rpfix;
-
-                       rpfix = (ctx->common.extract_flags &
-                                WIMLIB_EXTRACT_FLAG_RPFIX) &&
-                                       !inode->i_not_rpfixed;
 
                        path = unix_build_inode_extraction_path(inode, ctx);
-                       ret = unix_create_symlink(inode, path,
-                                                 ctx->reparse_data,
-                                                 stream->size,
-                                                 rpfix,
-                                                 ctx->target_abspath,
-                                                 ctx->target_abspath_nchars);
+                       ret = unix_create_symlink(inode, path, blob->size, ctx);
                        if (ret) {
                                ERROR_WITH_ERRNO("Can't create symbolic link "
                                                 "\"%s\"", path);
@@ -652,10 +789,17 @@ unix_end_extract_stream(struct wim_lookup_table_entry *stream, int status,
                        if (ret)
                                break;
                } else {
-                       /* Set metadata on regular file just before closing it.
-                        */
                        struct filedes *fd = &ctx->open_fds[j];
 
+                       /* If the file is sparse, extend it to its final size. */
+                       if (ctx->is_sparse_file[j] && ftruncate(fd->fd, blob->size)) {
+                               ERROR_WITH_ERRNO("Error extending \"%s\" to final size",
+                                                unix_build_inode_extraction_path(inode, ctx));
+                               ret = WIMLIB_ERR_WRITE;
+                               break;
+                       }
+
+                       /* Set metadata on regular file just before closing.  */
                        ret = unix_set_metadata(fd->fd, inode, NULL, ctx);
                        if (ret)
                                break;
@@ -680,10 +824,13 @@ unix_set_dir_metadata(struct list_head *dentry_list, struct unix_apply_ctx *ctx)
        int ret;
 
        list_for_each_entry_reverse(dentry, dentry_list, d_extraction_list_node) {
-               if (dentry_is_directory(dentry)) {
+               if (should_extract_as_directory(dentry->d_inode)) {
                        ret = unix_set_metadata(-1, dentry->d_inode, NULL, ctx);
                        if (ret)
                                return ret;
+                       ret = report_file_metadata_applied(&ctx->common);
+                       if (ret)
+                               return ret;
                }
        }
        return 0;
@@ -695,6 +842,8 @@ unix_extract(struct list_head *dentry_list, struct apply_ctx *_ctx)
        int ret;
        struct unix_apply_ctx *ctx = (struct unix_apply_ctx *)_ctx;
        size_t path_max;
+       u64 dir_count;
+       u64 empty_file_count;
 
        /* Compute the maximum path length that will be needed, then allocate
         * some path buffers.  */
@@ -715,11 +864,22 @@ unix_extract(struct list_head *dentry_list, struct apply_ctx *_ctx)
        /* Extract directories and empty regular files.  Directories are needed
         * because we can't extract any other files until their directories
         * exist.  Empty files are needed because they don't have
-        * representatives in the stream list.  */
+        * representatives in the blob list.  */
+
+       unix_count_dentries(dentry_list, &dir_count, &empty_file_count);
+
+       ret = start_file_structure_phase(&ctx->common, dir_count + empty_file_count);
+       if (ret)
+               goto out;
+
        ret = unix_create_dirs_and_empty_files(dentry_list, ctx);
        if (ret)
                goto out;
 
+       ret = end_file_structure_phase(&ctx->common);
+       if (ret)
+               goto out;
+
        /* Get full path to target if needed for absolute symlink fixups.  */
        if ((ctx->common.extract_flags & WIMLIB_EXTRACT_FLAG_RPFIX) &&
            ctx->common.required_features.symlink_reparse_points)
@@ -734,23 +894,31 @@ unix_extract(struct list_head *dentry_list, struct apply_ctx *_ctx)
 
        /* Extract nonempty regular files and symbolic links.  */
 
-       struct read_stream_list_callbacks cbs = {
-               .begin_stream      = unix_begin_extract_stream,
-               .begin_stream_ctx  = ctx,
-               .consume_chunk     = unix_extract_chunk,
-               .consume_chunk_ctx = ctx,
-               .end_stream        = unix_end_extract_stream,
-               .end_stream_ctx    = ctx,
+       struct read_blob_callbacks cbs = {
+               .begin_blob     = unix_begin_extract_blob,
+               .continue_blob  = unix_extract_chunk,
+               .end_blob       = unix_end_extract_blob,
+               .ctx            = ctx,
        };
-       ret = extract_stream_list(&ctx->common, &cbs);
+       ret = extract_blob_list(&ctx->common, &cbs);
        if (ret)
                goto out;
 
+
        /* Set directory metadata.  We do this last so that we get the right
         * directory timestamps.  */
+       ret = start_file_metadata_phase(&ctx->common, dir_count);
+       if (ret)
+               goto out;
+
        ret = unix_set_dir_metadata(dentry_list, ctx);
        if (ret)
                goto out;
+
+       ret = end_file_metadata_phase(&ctx->common);
+       if (ret)
+               goto out;
+
        if (ctx->num_special_files_ignored) {
                WARNING("%lu special files were not extracted due to EPERM!",
                        ctx->num_special_files_ignored);