]> wimlib.net Git - wimlib/blobdiff - src/unix_apply.c
unix_apply.c: support extracting xattrs
[wimlib] / src / unix_apply.c
index e6bd2321276baeae20b1f177c21b211a24cd45df..f62b952e66e5bc2ede2a194cd4b9fcf9e4b1778b 100644 (file)
@@ -3,7 +3,7 @@
  */
 
 /*
- * Copyright (C) 2012, 2013, 2014 Eric Biggers
+ * Copyright (C) 2012-2016 Eric Biggers
  *
  * This file is free software; you can redistribute it and/or modify it under
  * the terms of the GNU Lesser General Public License as published by the Free
@@ -29,6 +29,9 @@
 #include <sys/stat.h>
 #include <sys/time.h>
 #include <sys/types.h>
+#ifdef HAVE_SYS_XATTR_H
+#  include <sys/xattr.h>
+#endif
 #include <unistd.h>
 
 #include "wimlib/apply.h"
@@ -40,6 +43,7 @@
 #include "wimlib/reparse.h"
 #include "wimlib/timestamp.h"
 #include "wimlib/unix_data.h"
+#include "wimlib/xattr.h"
 
 /* We don't require O_NOFOLLOW, but the advantage of having it is that if we
  * need to extract a file to a location at which there exists a symbolic link,
@@ -54,11 +58,15 @@ static int
 unix_get_supported_features(const char *target,
                            struct wim_features *supported_features)
 {
+       supported_features->sparse_files = 1;
        supported_features->hard_links = 1;
        supported_features->symlink_reparse_points = 1;
        supported_features->unix_data = 1;
        supported_features->timestamps = 1;
        supported_features->case_sensitive_filenames = 1;
+#ifdef HAVE_XATTR_SUPPORT
+       supported_features->linux_xattrs = 1;
+#endif
        return 0;
 }
 
@@ -81,11 +89,22 @@ struct unix_apply_ctx {
         * the beginning of the array.  */
        unsigned num_open_fds;
 
-       /* Buffer for reading reparse point data into memory  */
-       u8 reparse_data[REPARSE_DATA_MAX_SIZE];
+       /* For each currently open file, whether we're writing to it in "sparse"
+        * mode or not.  */
+       bool is_sparse_file[MAX_OPEN_FILES];
+
+       /* Whether is_sparse_file[] is true for any currently open file  */
+       bool any_sparse_files;
+
+       /* Allocated buffer for reading blob data when it cannot be extracted
+        * directly  */
+       u8 *data_buffer;
+
+       /* Pointer to the next byte in @data_buffer to fill  */
+       u8 *data_buffer_ptr;
 
-       /* Pointer to the next byte in @reparse_data to fill  */
-       u8 *reparse_ptr;
+       /* Size allocated in @data_buffer  */
+       size_t data_buffer_size;
 
        /* Absolute path to the target directory (allocated buffer).  Only set
         * if needed for absolute symbolic link fixups.  */
@@ -96,6 +115,11 @@ struct unix_apply_ctx {
 
        /* Number of special files we couldn't create due to EPERM  */
        unsigned long num_special_files_ignored;
+
+#ifdef HAVE_XATTR_SUPPORT
+       /* Delayed xattrs saved in memory (deduplicated)  */
+       struct blob_table *delayed_xattrs;
+#endif
 };
 
 /* Returns the number of characters needed to represent the path to the
@@ -137,6 +161,29 @@ unix_compute_path_max(const struct list_head *dentry_list,
        return ctx->common.target_nchars + max + 1;
 }
 
+/* Prepare to read the next blob, which has size @blob_size, into an in-memory
+ * buffer.  */
+static bool
+prepare_data_buffer(struct unix_apply_ctx *ctx, u64 blob_size)
+{
+       if (blob_size > ctx->data_buffer_size) {
+               /* Larger buffer needed.  */
+               void *new_buffer;
+               if ((size_t)blob_size != blob_size)
+                       return false;
+               new_buffer = REALLOC(ctx->data_buffer, blob_size);
+               if (!new_buffer)
+                       return false;
+               ctx->data_buffer = new_buffer;
+               ctx->data_buffer_size = blob_size;
+       }
+       /* On the first call this changes data_buffer_ptr from NULL, which tells
+        * unix_extract_chunk() that the data buffer needs to be filled while
+        * reading the stream data.  */
+       ctx->data_buffer_ptr = ctx->data_buffer;
+       return true;
+}
+
 /* Builds and returns the filesystem path to which to extract @dentry.
  * This cycles through NUM_PATHBUFS different buffers.  */
 static const char *
@@ -156,7 +203,9 @@ unix_build_extraction_path(const struct wim_dentry *dentry,
        d = dentry;
        do {
                p -= d->d_extraction_name_nchars;
-               memcpy(p, d->d_extraction_name, d->d_extraction_name_nchars);
+               if (d->d_extraction_name_nchars)
+                       memcpy(p, d->d_extraction_name,
+                              d->d_extraction_name_nchars);
                *--p = '/';
                d = d->d_parent;
        } while (!dentry_is_root(d) && will_extract_dentry(d));
@@ -181,83 +230,51 @@ unix_build_inode_extraction_path(const struct wim_inode *inode,
        return unix_build_extraction_path(inode_first_extraction_dentry(inode), ctx);
 }
 
-/* Sets the timestamps on a file being extracted.
- *
- * Either @fd or @path must be specified (not -1 and not NULL, respectively).
- */
+/* Should the specified file be extracted as a directory on UNIX?  We extract
+ * the file as a directory if FILE_ATTRIBUTE_DIRECTORY is set and the file does
+ * not have a symlink or junction reparse point.  It *may* have a different type
+ * of reparse point.  */
+static inline bool
+should_extract_as_directory(const struct wim_inode *inode)
+{
+       return (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) &&
+               !inode_is_symlink(inode);
+}
+
+/* Sets the timestamps on a file being extracted. */
 static int
-unix_set_timestamps(int fd, const char *path, u64 atime, u64 mtime)
+unix_set_timestamps(const char *path, u64 atime, u64 mtime)
 {
+#ifdef HAVE_UTIMENSAT
        {
                struct timespec times[2];
 
                times[0] = wim_timestamp_to_timespec(atime);
                times[1] = wim_timestamp_to_timespec(mtime);
 
-               errno = ENOSYS;
-#ifdef HAVE_FUTIMENS
-               if (fd >= 0 && !futimens(fd, times))
-                       return 0;
-#endif
-#ifdef HAVE_UTIMENSAT
-               if (fd < 0 && !utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW))
+               if (utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW) == 0)
                        return 0;
-#endif
                if (errno != ENOSYS)
-                       return WIMLIB_ERR_SET_TIMESTAMPS;
+                       return -1;
        }
+#endif
        {
                struct timeval times[2];
 
                times[0] = wim_timestamp_to_timeval(atime);
                times[1] = wim_timestamp_to_timeval(mtime);
 
-               if (fd >= 0 && !futimes(fd, times))
-                       return 0;
-               if (fd < 0 && !lutimes(path, times))
-                       return 0;
-               return WIMLIB_ERR_SET_TIMESTAMPS;
+               return lutimes(path, times);
        }
 }
 
+/* Set metadata on an extracted file. */
 static int
-unix_set_owner_and_group(int fd, const char *path, uid_t uid, gid_t gid)
-{
-       if (fd >= 0 && !fchown(fd, uid, gid))
-               return 0;
-       if (fd < 0 && !lchown(path, uid, gid))
-               return 0;
-       return WIMLIB_ERR_SET_SECURITY;
-}
-
-static int
-unix_set_mode(int fd, const char *path, mode_t mode)
-{
-       if (fd >= 0 && !fchmod(fd, mode))
-               return 0;
-       if (fd < 0 && !chmod(path, mode))
-               return 0;
-       return WIMLIB_ERR_SET_SECURITY;
-}
-
-/*
- * Set metadata on an extracted file.
- *
- * @fd is an open file descriptor to the extracted file, or -1.  @path is the
- * path to the extracted file, or NULL.  If valid, this function uses @fd.
- * Otherwise, if valid, it uses @path.  Otherwise, it calculates the path to one
- * alias of the extracted file and uses it.
- */
-static int
-unix_set_metadata(int fd, const struct wim_inode *inode,
-                 const char *path, struct unix_apply_ctx *ctx)
+unix_set_metadata(const struct wim_inode *inode, struct unix_apply_ctx *ctx)
 {
-       int ret;
+       const char *path = unix_build_inode_extraction_path(inode, ctx);
        struct wimlib_unix_data unix_data;
 
-       if (fd < 0 && !path)
-               path = unix_build_inode_extraction_path(inode, ctx);
-
        if ((ctx->common.extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA)
            && inode_get_unix_data(inode, &unix_data))
        {
@@ -265,77 +282,61 @@ unix_set_metadata(int fd, const struct wim_inode *inode,
                u32 gid = unix_data.gid;
                u32 mode = unix_data.mode;
 
-               ret = unix_set_owner_and_group(fd, path, uid, gid);
-               if (ret) {
-                       if (!path)
-                               path = unix_build_inode_extraction_path(inode, ctx);
+               if (lchown(path, uid, gid) != 0) {
                        if (ctx->common.extract_flags &
                            WIMLIB_EXTRACT_FLAG_STRICT_ACLS)
                        {
                                ERROR_WITH_ERRNO("Can't set uid=%"PRIu32" and "
                                                 "gid=%"PRIu32" on \"%s\"",
                                                 uid, gid, path);
-                               return ret;
-                       } else {
-                               WARNING_WITH_ERRNO("Can't set uid=%"PRIu32" and "
-                                                  "gid=%"PRIu32" on \"%s\"",
-                                                  uid, gid, path);
+                               return WIMLIB_ERR_SET_SECURITY;
                        }
+                       WARNING_WITH_ERRNO("Can't set uid=%"PRIu32" and "
+                                          "gid=%"PRIu32" on \"%s\"",
+                                          uid, gid, path);
                }
 
-               ret = 0;
-               if (!inode_is_symlink(inode))
-                       ret = unix_set_mode(fd, path, mode);
-               if (ret) {
-                       if (!path)
-                               path = unix_build_inode_extraction_path(inode, ctx);
+               if (!inode_is_symlink(inode) && chmod(path, mode) != 0) {
                        if (ctx->common.extract_flags &
                            WIMLIB_EXTRACT_FLAG_STRICT_ACLS)
                        {
                                ERROR_WITH_ERRNO("Can't set mode=0%"PRIo32" "
                                                 "on \"%s\"", mode, path);
-                               return ret;
-                       } else {
-                               WARNING_WITH_ERRNO("Can't set mode=0%"PRIo32" "
-                                                  "on \"%s\"", mode, path);
+                               return WIMLIB_ERR_SET_SECURITY;
                        }
+                       WARNING_WITH_ERRNO("Can't set mode=0%"PRIo32" "
+                                          "on \"%s\"", mode, path);
                }
        }
 
-       ret = unix_set_timestamps(fd, path,
-                                 inode->i_last_access_time,
-                                 inode->i_last_write_time);
-       if (ret) {
-               if (!path)
-                       path = unix_build_inode_extraction_path(inode, ctx);
+       if (unix_set_timestamps(path, inode->i_last_access_time,
+                               inode->i_last_write_time) != 0)
+       {
                if (ctx->common.extract_flags &
                    WIMLIB_EXTRACT_FLAG_STRICT_TIMESTAMPS)
                {
                        ERROR_WITH_ERRNO("Can't set timestamps on \"%s\"", path);
-                       return ret;
-               } else {
-                       WARNING_WITH_ERRNO("Can't set timestamps on \"%s\"", path);
+                       return WIMLIB_ERR_SET_TIMESTAMPS;
                }
+               WARNING_WITH_ERRNO("Can't set timestamps on \"%s\"", path);
        }
        return 0;
 }
 
-/* Extract all needed aliases of the @inode, where one alias, corresponding to
- * @first_dentry, has already been extracted to @first_path.  */
+/*
+ * Extract all needed aliases of the specified @inode, where the first alias has
+ * already been extracted to @first_path.
+ */
 static int
 unix_create_hardlinks(const struct wim_inode *inode,
-                     const struct wim_dentry *first_dentry,
                      const char *first_path, struct unix_apply_ctx *ctx)
 {
        const struct wim_dentry *dentry;
        const char *newpath;
 
-       list_for_each_entry(dentry, &inode->i_extraction_aliases,
-                           d_extraction_alias_node)
-       {
-               if (dentry == first_dentry)
+       inode_for_each_extraction_alias(dentry, inode) {
+               if (dentry == inode_first_extraction_dentry(inode))
                        continue;
-
                newpath = unix_build_extraction_path(dentry, ctx);
        retry_link:
                if (link(first_path, newpath)) {
@@ -350,18 +351,13 @@ unix_create_hardlinks(const struct wim_inode *inode,
        return 0;
 }
 
-/* If @dentry represents a directory, create it.  */
 static int
-unix_create_if_directory(const struct wim_dentry *dentry,
-                        struct unix_apply_ctx *ctx)
+unix_create_directory(const struct wim_dentry *dentry,
+                     struct unix_apply_ctx *ctx)
 {
-       const char *path;
+       const char *path = unix_build_extraction_path(dentry, ctx);
        struct stat stbuf;
 
-       if (!dentry_is_directory(dentry))
-               return 0;
-
-       path = unix_build_extraction_path(dentry, ctx);
        if (mkdir(path, 0755) &&
            /* It's okay if the path already exists, as long as it's a
             * directory.  */
@@ -371,38 +367,21 @@ unix_create_if_directory(const struct wim_dentry *dentry,
                return WIMLIB_ERR_MKDIR;
        }
 
-       return report_file_created(&ctx->common);
+       return 0;
 }
 
-/* If @dentry represents an empty regular file or a special file, create it, set
- * its metadata, and create any needed hard links.  */
 static int
-unix_extract_if_empty_file(const struct wim_dentry *dentry,
-                          struct unix_apply_ctx *ctx)
+unix_create_nondirectory(const struct wim_inode *inode,
+                        struct unix_apply_ctx *ctx)
 {
-       const struct wim_inode *inode;
+       const char *path = unix_build_inode_extraction_path(inode, ctx);
        struct wimlib_unix_data unix_data;
-       const char *path;
-       int ret;
-
-       inode = dentry->d_inode;
-
-       /* Extract all aliases only when the "first" comes up.  */
-       if (dentry != inode_first_extraction_dentry(inode))
-               return 0;
-
-       /* Is this a directory, a symbolic link, or any type of nonempty file?
-        */
-       if (inode_is_directory(inode) || inode_is_symlink(inode) ||
-           inode_get_blob_for_unnamed_data_stream_resolved(inode))
-               return 0;
 
        /* Recognize special files in UNIX_DATA mode  */
        if ((ctx->common.extract_flags & WIMLIB_EXTRACT_FLAG_UNIX_DATA) &&
            inode_get_unix_data(inode, &unix_data) &&
            !S_ISREG(unix_data.mode))
        {
-               path = unix_build_extraction_path(dentry, ctx);
        retry_mknod:
                if (mknod(path, unix_data.mode, unix_data.rdev)) {
                        if (errno == EPERM) {
@@ -417,53 +396,54 @@ unix_extract_if_empty_file(const struct wim_dentry *dentry,
                                         path);
                        return WIMLIB_ERR_MKNOD;
                }
-               /* On special files, we can set timestamps immediately because
-                * we don't need to write any data to them.  */
-               ret = unix_set_metadata(-1, inode, path, ctx);
        } else {
                int fd;
 
-               path = unix_build_extraction_path(dentry, ctx);
        retry_create:
-               fd = open(path, O_TRUNC | O_CREAT | O_WRONLY | O_NOFOLLOW, 0644);
+               fd = open(path, O_EXCL | O_CREAT | O_WRONLY | O_NOFOLLOW, 0644);
                if (fd < 0) {
                        if (errno == EEXIST && !unlink(path))
                                goto retry_create;
                        ERROR_WITH_ERRNO("Can't create regular file \"%s\"", path);
                        return WIMLIB_ERR_OPEN;
                }
-               /* On empty files, we can set timestamps immediately because we
-                * don't need to write any data to them.  */
-               ret = unix_set_metadata(fd, inode, path, ctx);
-               if (close(fd) && !ret) {
+               if (close(fd)) {
                        ERROR_WITH_ERRNO("Error closing \"%s\"", path);
-                       ret = WIMLIB_ERR_WRITE;
+                       return WIMLIB_ERR_WRITE;
                }
        }
-       if (ret)
-               return ret;
-
-       ret = unix_create_hardlinks(inode, dentry, path, ctx);
-       if (ret)
-               return ret;
 
-       return report_file_created(&ctx->common);
+       return unix_create_hardlinks(inode, path, ctx);
 }
 
+/* Create all files (and directories) except for symlinks. */
 static int
-unix_create_dirs_and_empty_files(const struct list_head *dentry_list,
-                                struct unix_apply_ctx *ctx)
+unix_create_file_structure(const struct list_head *dentry_list,
+                          struct unix_apply_ctx *ctx)
 {
        const struct wim_dentry *dentry;
+       const struct wim_inode *inode;
        int ret;
 
        list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
-               ret = unix_create_if_directory(dentry, ctx);
+               inode = dentry->d_inode;
+               if (!should_extract_as_directory(inode))
+                       continue;
+               ret = unix_create_directory(dentry, ctx);
+               if (!ret)
+                       ret = report_file_created(&ctx->common);
                if (ret)
                        return ret;
        }
        list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
-               ret = unix_extract_if_empty_file(dentry, ctx);
+               inode = dentry->d_inode;
+               if (should_extract_as_directory(inode) ||
+                   inode_is_symlink(inode) ||
+                   dentry != inode_first_extraction_dentry(inode))
+                       continue;
+               ret = unix_create_nondirectory(inode, ctx);
+               if (!ret)
+                       ret = report_file_created(&ctx->common);
                if (ret)
                        return ret;
        }
@@ -471,65 +451,123 @@ unix_create_dirs_and_empty_files(const struct list_head *dentry_list,
 }
 
 static void
-unix_count_dentries(const struct list_head *dentry_list,
-                   uint64_t *dir_count_ret, uint64_t *empty_file_count_ret)
+unix_count_inodes(const struct list_head *dentry_list,
+                 u64 *full_count, u64 *symlink_count)
 {
        const struct wim_dentry *dentry;
-       uint64_t dir_count = 0;
-       uint64_t empty_file_count = 0;
+
+       *full_count = 0;
+       *symlink_count = 0;
 
        list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
+               if (dentry != inode_first_extraction_dentry(dentry->d_inode))
+                       continue;
+               ++*full_count;
+               if (inode_is_symlink(dentry->d_inode))
+                       ++*symlink_count;
+       }
+}
 
-               const struct wim_inode *inode = dentry->d_inode;
+#ifdef HAVE_XATTR_SUPPORT
 
-               if (inode_is_directory(inode))
-                       dir_count++;
-               else if ((dentry == inode_first_extraction_dentry(inode)) &&
-                        !inode_is_symlink(inode) &&
-                        !inode_get_blob_for_unnamed_data_stream_resolved(inode))
-                       empty_file_count++;
+static int
+apply_xattrs(struct wim_inode *inode, const void *entries,
+            size_t entries_size, struct unix_apply_ctx *ctx)
+{
+       const void * const entries_end = entries + entries_size;
+       const char *path = unix_build_inode_extraction_path(inode, ctx);
+       char name[XATTR_NAME_MAX + 1];
+
+       for (const struct wimlib_xattr_entry *entry = entries;
+            (void *)entry < entries_end; entry = xattr_entry_next(entry))
+       {
+               u16 name_len;
+               const void *value;
+               u32 value_len;
+
+               if (!valid_xattr_entry(entry, entries_end - (void *)entry)) {
+                       ERROR("\"%s\": extended attribute stream is corrupt",
+                               path);
+                       return WIMLIB_ERR_INVALID_EXTENDED_ATTRIBUTE;
+               }
+               name_len = le16_to_cpu(entry->name_len);
+               memcpy(name, entry->name, name_len);
+               name[name_len] = '\0';
+
+               value = entry->name + name_len;
+               value_len = le32_to_cpu(entry->value_len);
+
+               if (lsetxattr(path, name, value, value_len, 0) != 0) {
+                       if (ctx->common.extract_flags &
+                           WIMLIB_EXTRACT_FLAG_STRICT_ACLS)
+                       {
+                               ERROR_WITH_ERRNO("\"%s\": unable to set "
+                                                "extended attribute %s",
+                                                path, name);
+                               return WIMLIB_ERR_SET_SECURITY;
+                       }
+                       WARNING_WITH_ERRNO("\"%s\": unable to set extended "
+                                          "attribute %s", path, name);
+               }
        }
+       return 0;
+}
 
-       *dir_count_ret = dir_count;
-       *empty_file_count_ret = empty_file_count;
+static int
+apply_delayed_xattrs(struct list_head *dentry_list, struct unix_apply_ctx *ctx)
+{
+       struct wim_dentry *dentry;
+
+       list_for_each_entry(dentry, dentry_list, d_extraction_list_node) {
+               struct wim_inode *inode = dentry->d_inode;
+               const struct blob_descriptor *blob;
+               const struct wim_inode_stream *strm;
+               int ret;
+
+               if (!inode_is_symlink(inode))
+                       continue;
+               if (dentry != inode_first_extraction_dentry(inode))
+                       continue;
+               strm = inode_get_stream(inode, STREAM_TYPE_LINUX_XATTR,
+                                       NO_STREAM_NAME);
+               if (!strm)
+                       continue;
+               blob = lookup_blob(ctx->delayed_xattrs, stream_hash(strm));
+               if (!blob)
+                       continue;
+               wimlib_assert(blob->blob_location == BLOB_IN_ATTACHED_BUFFER);
+               ret = apply_xattrs(inode, blob->attached_buffer, blob->size,
+                                  ctx);
+               if (ret)
+                       return ret;
+       }
+       return 0;
 }
+#endif /* HAVE_XATTR_SUPPORT */
 
 static int
 unix_create_symlink(const struct wim_inode *inode, const char *path,
-                   const u8 *rpdata, u16 rpdatalen, bool rpfix,
-                   const char *apply_dir, size_t apply_dir_nchars)
+                   size_t rpdatalen, struct unix_apply_ctx *ctx)
 {
-       char link_target[REPARSE_DATA_MAX_SIZE];
-       int ret;
+       char target[REPARSE_POINT_MAX_SIZE];
        struct blob_descriptor blob_override;
+       int ret;
 
        blob_set_is_located_in_attached_buffer(&blob_override,
-                                              (void *)rpdata, rpdatalen);
+                                              ctx->data_buffer, rpdatalen);
 
-       ret = wim_inode_readlink(inode, link_target,
-                                sizeof(link_target) - 1, &blob_override);
-       if (ret < 0) {
+       ret = wim_inode_readlink(inode, target, sizeof(target) - 1,
+                                &blob_override,
+                                ctx->target_abspath,
+                                ctx->target_abspath_nchars);
+       if (unlikely(ret < 0)) {
                errno = -ret;
                return WIMLIB_ERR_READLINK;
        }
+       target[ret] = '\0';
 
-       link_target[ret] = 0;
-
-       if (rpfix && link_target[0] == '/') {
-
-               /* "Fix" the absolute symbolic link by prepending the absolute
-                * path to the target directory.  */
-
-               if (sizeof(link_target) - (ret + 1) < apply_dir_nchars) {
-                       errno = ENAMETOOLONG;
-                       return WIMLIB_ERR_REPARSE_POINT_FIXUP_FAILED;
-               }
-               memmove(link_target + apply_dir_nchars, link_target,
-                       ret + 1);
-               memcpy(link_target, apply_dir, apply_dir_nchars);
-       }
 retry_symlink:
-       if (symlink(link_target, path)) {
+       if (symlink(target, path)) {
                if (errno == EEXIST && !unlink(path))
                        goto retry_symlink;
                return WIMLIB_ERR_LINK;
@@ -543,6 +581,7 @@ unix_cleanup_open_fds(struct unix_apply_ctx *ctx, unsigned offset)
        for (unsigned i = offset; i < ctx->num_open_fds; i++)
                filedes_close(&ctx->open_fds[i]);
        ctx->num_open_fds = 0;
+       ctx->any_sparse_files = false;
 }
 
 static int
@@ -551,21 +590,16 @@ unix_begin_extract_blob_instance(const struct blob_descriptor *blob,
                                 const struct wim_inode_stream *strm,
                                 struct unix_apply_ctx *ctx)
 {
-       const struct wim_dentry *first_dentry;
-       const char *first_path;
+       const char *path = unix_build_inode_extraction_path(inode, ctx);
        int fd;
 
-       if (unlikely(strm->stream_type == STREAM_TYPE_REPARSE_POINT)) {
+       if (strm->stream_type == STREAM_TYPE_REPARSE_POINT ||
+           strm->stream_type == STREAM_TYPE_LINUX_XATTR) {
                /* On UNIX, symbolic links must be created with symlink(), which
-                * requires that the full link target be available.  */
-               if (blob->size > REPARSE_DATA_MAX_SIZE) {
-                       ERROR_WITH_ERRNO("Reparse data of \"%s\" has size "
-                                        "%"PRIu64" bytes (exceeds %u bytes)",
-                                        inode_first_full_path(inode),
-                                        blob->size, REPARSE_DATA_MAX_SIZE);
-                       return WIMLIB_ERR_INVALID_REPARSE_DATA;
-               }
-               ctx->reparse_ptr = ctx->reparse_data;
+                * requires that the full link target be available.
+                * Similar for extended attribute "streams".  */
+               if (!prepare_data_buffer(ctx, blob->size))
+                       return WIMLIB_ERR_NOMEM;
                return 0;
        }
 
@@ -576,18 +610,22 @@ unix_begin_extract_blob_instance(const struct blob_descriptor *blob,
        /* This should be ensured by extract_blob_list()  */
        wimlib_assert(ctx->num_open_fds < MAX_OPEN_FILES);
 
-       first_dentry = inode_first_extraction_dentry(inode);
-       first_path = unix_build_extraction_path(first_dentry, ctx);
-retry_create:
-       fd = open(first_path, O_TRUNC | O_CREAT | O_WRONLY | O_NOFOLLOW, 0644);
+       fd = open(path, O_WRONLY | O_NOFOLLOW);
        if (fd < 0) {
-               if (errno == EEXIST && !unlink(first_path))
-                       goto retry_create;
-               ERROR_WITH_ERRNO("Can't create regular file \"%s\"", first_path);
+               ERROR_WITH_ERRNO("Can't open regular file \"%s\"", path);
                return WIMLIB_ERR_OPEN;
        }
+       if (inode->i_attributes & FILE_ATTRIBUTE_SPARSE_FILE) {
+               ctx->is_sparse_file[ctx->num_open_fds] = true;
+               ctx->any_sparse_files = true;
+       } else {
+               ctx->is_sparse_file[ctx->num_open_fds] = false;
+#ifdef HAVE_POSIX_FALLOCATE
+               posix_fallocate(fd, 0, blob->size);
+#endif
+       }
        filedes_init(&ctx->open_fds[ctx->num_open_fds++], fd);
-       return unix_create_hardlinks(inode, first_dentry, first_path, ctx);
+       return 0;
 }
 
 /* Called when starting to read a blob for extraction  */
@@ -603,7 +641,7 @@ unix_begin_extract_blob(struct blob_descriptor *blob, void *_ctx)
                                                           targets[i].stream,
                                                           ctx);
                if (ret) {
-                       ctx->reparse_ptr = NULL;
+                       ctx->data_buffer_ptr = NULL;
                        unix_cleanup_open_fds(ctx, 0);
                        return ret;
                }
@@ -613,21 +651,43 @@ unix_begin_extract_blob(struct blob_descriptor *blob, void *_ctx)
 
 /* Called when the next chunk of a blob has been read for extraction  */
 static int
-unix_extract_chunk(const void *chunk, size_t size, void *_ctx)
+unix_extract_chunk(const struct blob_descriptor *blob, u64 offset,
+                  const void *chunk, size_t size, void *_ctx)
 {
        struct unix_apply_ctx *ctx = _ctx;
+       const void * const end = chunk + size;
+       const void *p;
+       bool zeroes;
+       size_t len;
+       unsigned i;
        int ret;
 
-       for (unsigned i = 0; i < ctx->num_open_fds; i++) {
-               ret = full_write(&ctx->open_fds[i], chunk, size);
-               if (ret) {
-                       ERROR_WITH_ERRNO("Error writing data to filesystem");
-                       return ret;
+       /*
+        * For sparse files, only write nonzero regions.  This lets the
+        * filesystem use holes to represent zero regions.
+        */
+       for (p = chunk; p != end; p += len, offset += len) {
+               zeroes = maybe_detect_sparse_region(p, end - p, &len,
+                                                   ctx->any_sparse_files);
+               for (i = 0; i < ctx->num_open_fds; i++) {
+                       if (!zeroes || !ctx->is_sparse_file[i]) {
+                               ret = full_pwrite(&ctx->open_fds[i],
+                                                 p, len, offset);
+                               if (ret)
+                                       goto err;
+                       }
                }
        }
-       if (ctx->reparse_ptr)
-               ctx->reparse_ptr = mempcpy(ctx->reparse_ptr, chunk, size);
+
+       /* Copy the data chunk into the buffer (if needed)  */
+       if (ctx->data_buffer_ptr)
+               ctx->data_buffer_ptr = mempcpy(ctx->data_buffer_ptr,
+                                              chunk, size);
        return 0;
+
+err:
+       ERROR_WITH_ERRNO("Error writing data to filesystem");
+       return ret;
 }
 
 /* Called when a blob has been fully read for extraction  */
@@ -639,7 +699,7 @@ unix_end_extract_blob(struct blob_descriptor *blob, int status, void *_ctx)
        unsigned j;
        const struct blob_extraction_target *targets = blob_extraction_targets(blob);
 
-       ctx->reparse_ptr = NULL;
+       ctx->data_buffer_ptr = NULL;
 
        if (status) {
                unix_cleanup_open_fds(ctx, 0);
@@ -650,40 +710,64 @@ unix_end_extract_blob(struct blob_descriptor *blob, int status, void *_ctx)
        ret = 0;
        for (u32 i = 0; i < blob->out_refcnt; i++) {
                struct wim_inode *inode = targets[i].inode;
+               struct wim_inode_stream *strm = targets[i].stream;
 
-               if (inode_is_symlink(inode)) {
+               if (strm->stream_type == STREAM_TYPE_REPARSE_POINT) {
                        /* We finally have the symlink data, so we can create
                         * the symlink.  */
                        const char *path;
-                       bool rpfix;
-
-                       rpfix = (ctx->common.extract_flags &
-                                WIMLIB_EXTRACT_FLAG_RPFIX) &&
-                                       !inode->i_not_rpfixed;
 
                        path = unix_build_inode_extraction_path(inode, ctx);
-                       ret = unix_create_symlink(inode, path,
-                                                 ctx->reparse_data,
-                                                 blob->size,
-                                                 rpfix,
-                                                 ctx->target_abspath,
-                                                 ctx->target_abspath_nchars);
+                       ret = unix_create_symlink(inode, path, blob->size, ctx);
                        if (ret) {
                                ERROR_WITH_ERRNO("Can't create symbolic link "
                                                 "\"%s\"", path);
                                break;
                        }
-                       ret = unix_set_metadata(-1, inode, path, ctx);
-                       if (ret)
-                               break;
-               } else {
-                       /* Set metadata on regular file just before closing it.
-                        */
+               }
+       #ifdef HAVE_XATTR_SUPPORT
+               else if (strm->stream_type == STREAM_TYPE_LINUX_XATTR) {
+                       if (inode_is_symlink(inode)) {
+                               /*
+                                * We can't apply xattrs to a symlink until it
+                                * has been created, but that requires the
+                                * reparse stream and we might be given the
+                                * reparse and xattr streams in either order.
+                                * Solution: cache xattrs for symlinks in
+                                * memory, then apply them at the end...
+                                */
+                               if (!ctx->delayed_xattrs) {
+                                       ctx->delayed_xattrs = new_blob_table(32);
+                                       if (!ctx->delayed_xattrs) {
+                                               ret = WIMLIB_ERR_NOMEM;
+                                               break;
+                                       }
+                               }
+                               if (!new_blob_from_data_buffer(ctx->data_buffer,
+                                                              blob->size,
+                                                              ctx->delayed_xattrs))
+                               {
+                                       ret = WIMLIB_ERR_NOMEM;
+                                       break;
+                               }
+                       } else {
+                               ret = apply_xattrs(inode, ctx->data_buffer,
+                                                  blob->size, ctx);
+                               if (ret)
+                                       break;
+                       }
+               }
+       #endif /* HAVE_XATTR_SUPPORT */
+               else {
                        struct filedes *fd = &ctx->open_fds[j];
 
-                       ret = unix_set_metadata(fd->fd, inode, NULL, ctx);
-                       if (ret)
+                       /* If the file is sparse, extend it to its final size. */
+                       if (ctx->is_sparse_file[j] && ftruncate(fd->fd, blob->size)) {
+                               ERROR_WITH_ERRNO("Error extending \"%s\" to final size",
+                                                unix_build_inode_extraction_path(inode, ctx));
+                               ret = WIMLIB_ERR_WRITE;
                                break;
+                       }
 
                        if (filedes_close(fd)) {
                                ERROR_WITH_ERRNO("Error closing \"%s\"",
@@ -698,21 +782,24 @@ unix_end_extract_blob(struct blob_descriptor *blob, int status, void *_ctx)
        return ret;
 }
 
+/* Apply metadata to all extracted files (and directories). */
 static int
-unix_set_dir_metadata(struct list_head *dentry_list, struct unix_apply_ctx *ctx)
+unix_apply_metadata(struct list_head *dentry_list, struct unix_apply_ctx *ctx)
 {
        const struct wim_dentry *dentry;
+       const struct wim_inode *inode;
        int ret;
 
-       list_for_each_entry_reverse(dentry, dentry_list, d_extraction_list_node) {
-               if (dentry_is_directory(dentry)) {
-                       ret = unix_set_metadata(-1, dentry->d_inode, NULL, ctx);
-                       if (ret)
-                               return ret;
+       list_for_each_entry_reverse(dentry, dentry_list, d_extraction_list_node)
+       {
+               inode = dentry->d_inode;
+               if (dentry != inode_first_extraction_dentry(inode))
+                       continue;
+               ret = unix_set_metadata(inode, ctx);
+               if (!ret)
                        ret = report_file_metadata_applied(&ctx->common);
-                       if (ret)
-                               return ret;
-               }
+               if (ret)
+                       return ret;
        }
        return 0;
 }
@@ -723,8 +810,8 @@ unix_extract(struct list_head *dentry_list, struct apply_ctx *_ctx)
        int ret;
        struct unix_apply_ctx *ctx = (struct unix_apply_ctx *)_ctx;
        size_t path_max;
-       uint64_t dir_count;
-       uint64_t empty_file_count;
+       u64 full_count;
+       u64 symlink_count;
 
        /* Compute the maximum path length that will be needed, then allocate
         * some path buffers.  */
@@ -742,18 +829,38 @@ unix_extract(struct list_head *dentry_list, struct apply_ctx *_ctx)
                       ctx->common.target, ctx->common.target_nchars);
        }
 
-       /* Extract directories and empty regular files.  Directories are needed
-        * because we can't extract any other files until their directories
-        * exist.  Empty files are needed because they don't have
-        * representatives in the blob list.  */
+       /*
+        * We do the extraction in three phases:
+        *
+        *      1. Create all directories and files except for symlinks
+        *      2. Extract streams
+        *      3. Apply metadata
+        *
+        * In phase (2), the streams which may be extracted include unnamed data
+        * streams (regular file contents), reparse streams (translated to
+        * symlink targets), and extended attribute (xattr) streams.  These may
+        * come up for extraction in any order.  Therefore, at least when xattr
+        * streams are present, all files must be created earlier, in phase (1).
+        *
+        * Symlinks are an exception: they cannot be created until the reparse
+        * stream comes up for extraction.  Currently we hack around this by
+        * caching the xattrs of symlinks in memory until they can be applied
+        * between phases (2) and (3).
+        *
+        * Note that phase (3) must happen after all data all xattr extraction
+        * because it might set the file mode's to readonly (which precludes
+        * setxattr), and it also will set timestamps including the last
+        * modification time (which precludes write).
+        */
 
-       unix_count_dentries(dentry_list, &dir_count, &empty_file_count);
+       unix_count_inodes(dentry_list, &full_count, &symlink_count);
 
-       ret = start_file_structure_phase(&ctx->common, dir_count + empty_file_count);
+       ret = start_file_structure_phase(&ctx->common,
+                                        full_count - symlink_count);
        if (ret)
                goto out;
 
-       ret = unix_create_dirs_and_empty_files(dentry_list, ctx);
+       ret = unix_create_file_structure(dentry_list, ctx);
        if (ret)
                goto out;
 
@@ -773,28 +880,29 @@ unix_extract(struct list_head *dentry_list, struct apply_ctx *_ctx)
                ctx->target_abspath_nchars = strlen(ctx->target_abspath);
        }
 
-       /* Extract nonempty regular files and symbolic links.  */
-
-       struct read_blob_list_callbacks cbs = {
-               .begin_blob        = unix_begin_extract_blob,
-               .begin_blob_ctx    = ctx,
-               .consume_chunk     = unix_extract_chunk,
-               .consume_chunk_ctx = ctx,
-               .end_blob          = unix_end_extract_blob,
-               .end_blob_ctx      = ctx,
+       struct read_blob_callbacks cbs = {
+               .begin_blob     = unix_begin_extract_blob,
+               .continue_blob  = unix_extract_chunk,
+               .end_blob       = unix_end_extract_blob,
+               .ctx            = ctx,
        };
        ret = extract_blob_list(&ctx->common, &cbs);
        if (ret)
                goto out;
 
+#ifdef HAVE_XATTR_SUPPORT
+       if (unlikely(ctx->delayed_xattrs)) {
+               ret = apply_delayed_xattrs(dentry_list, ctx);
+               if (ret)
+                       goto out;
+       }
+#endif
 
-       /* Set directory metadata.  We do this last so that we get the right
-        * directory timestamps.  */
-       ret = start_file_metadata_phase(&ctx->common, dir_count);
+       ret = start_file_metadata_phase(&ctx->common, full_count);
        if (ret)
                goto out;
 
-       ret = unix_set_dir_metadata(dentry_list, ctx);
+       ret = unix_apply_metadata(dentry_list, ctx);
        if (ret)
                goto out;
 
@@ -807,6 +915,10 @@ unix_extract(struct list_head *dentry_list, struct apply_ctx *_ctx)
                        ctx->num_special_files_ignored);
        }
 out:
+#ifdef HAVE_XATTR_SUPPORT
+       free_blob_table(ctx->delayed_xattrs);
+#endif
+       FREE(ctx->data_buffer);
        for (unsigned i = 0; i < NUM_PATHBUFS; i++)
                FREE(ctx->pathbufs[i]);
        FREE(ctx->target_abspath);