]> wimlib.net Git - wimlib/blobdiff - src/win32_capture.c
Add experimental support for Windows VSS
[wimlib] / src / win32_capture.c
index 88e31248621119b7a0e176329bfca6933c5afdc5..70d7fd7ebf4fd08f797ce53c75c6d01f1d4c66be 100644 (file)
 #include "wimlib/error.h"
 #include "wimlib/paths.h"
 #include "wimlib/reparse.h"
+#include "wimlib/win32_vss.h"
+#include "wimlib/wof.h"
 
-struct winnt_scan_stats {
+struct winnt_scan_ctx {
+       u32 vol_flags;
        unsigned long num_get_sd_access_denied;
        unsigned long num_get_sacl_priv_notheld;
+
+       /* True if WOF is definitely not attached to the volume being scanned;
+        * false if it may be  */
+       bool wof_not_attached;
+
+       /* A reference to the VSS snapshot being used, or NULL if none  */
+       struct vss_snapshot *snapshot;
 };
 
 static inline const wchar_t *
@@ -51,6 +61,135 @@ printable_path(const wchar_t *full_path)
        return full_path + 4;
 }
 
+/* Description of where data is located on a Windows filesystem  */
+struct windows_file {
+
+       /* Is the data the raw encrypted data of an EFS-encrypted file?  */
+       u64 is_encrypted : 1;
+
+       /* The file's LCN (logical cluster number) for sorting, or 0 if unknown.
+        */
+       u64 sort_key : 63;
+
+       /* A reference to the VSS snapshot containing the file, or NULL if none.
+        */
+       struct vss_snapshot *snapshot;
+
+       /* The path to the file.  If 'is_encrypted=0' this is an NT namespace
+        * path; if 'is_encrypted=1' this is a Win32 namespace path.  */
+       wchar_t path[];
+};
+
+/* Allocate a 'struct windows_file' to describe the location of a data stream.
+ */
+static struct windows_file *
+alloc_windows_file(bool is_encrypted, struct vss_snapshot *snapshot,
+                  const wchar_t *path, size_t path_nchars,
+                  const wchar_t *stream_name, size_t stream_name_nchars)
+{
+       struct windows_file *file;
+       wchar_t *p;
+
+       file = MALLOC(sizeof(struct windows_file) +
+                     (path_nchars + (stream_name_nchars ? 1 : 0) +
+                      stream_name_nchars + 1) * sizeof(wchar_t));
+       if (!file)
+               return NULL;
+
+       file->is_encrypted = is_encrypted;
+       file->sort_key = 0;
+       file->snapshot = vss_get_snapshot(snapshot);
+       p = wmempcpy(file->path, path, path_nchars);
+       if (stream_name_nchars) {
+               /* Named data stream  */
+               *p++ = L':';
+               p = wmempcpy(p, stream_name, stream_name_nchars);
+       }
+       *p = L'\0';
+       return file;
+}
+
+/* Add a stream, located on a Windows filesystem, to the specified WIM inode.
+ */
+static int
+add_stream(struct wim_inode *inode, bool is_encrypted,
+          struct vss_snapshot *snapshot, u64 size,
+          const wchar_t *path, size_t path_nchars,
+          int stream_type, const utf16lechar *stream_name, size_t stream_name_nchars,
+          struct list_head *unhashed_blobs)
+{
+       struct blob_descriptor *blob = NULL;
+       struct wim_inode_stream *strm;
+
+       /* If the stream is nonempty, create a blob descriptor for it.  */
+       if (size) {
+               blob = new_blob_descriptor();
+               if (!blob)
+                       goto err_nomem;
+
+               blob->windows_file = alloc_windows_file(is_encrypted, snapshot,
+                                                       path, path_nchars,
+                                                       stream_name,
+                                                       stream_name_nchars);
+               if (!blob->windows_file)
+                       goto err_nomem;
+
+               blob->blob_location = BLOB_IN_WINDOWS_FILE;
+               blob->file_inode = inode;
+               blob->size = size;
+       }
+
+       strm = inode_add_stream(inode, stream_type, stream_name, blob);
+       if (!strm)
+               goto err_nomem;
+
+       prepare_unhashed_blob(blob, inode, strm->stream_id, unhashed_blobs);
+       return 0;
+
+err_nomem:
+       free_blob_descriptor(blob);
+       return WIMLIB_ERR_NOMEM;
+}
+
+struct windows_file *
+clone_windows_file(const struct windows_file *file)
+{
+       struct windows_file *new;
+
+       new = memdup(file, sizeof(struct windows_file) +
+                          (wcslen(file->path) + 1) * sizeof(wchar_t));
+       if (new)
+               vss_get_snapshot(new->snapshot);
+       return new;
+}
+
+void
+free_windows_file(struct windows_file *file)
+{
+       vss_put_snapshot(file->snapshot);
+       FREE(file);
+}
+
+int
+cmp_windows_files(const struct windows_file *file1,
+                 const struct windows_file *file2)
+{
+       /* Compare by starting LCN (logical cluster number)  */
+       int v = cmp_u64(file1->sort_key, file2->sort_key);
+       if (v)
+               return v;
+
+       /* Compare files by path: just a heuristic that will place files
+        * in the same directory next to each other.  */
+       return wcscmp(file1->path, file2->path);
+}
+
+const wchar_t *
+get_windows_file_path(const struct windows_file *file)
+{
+       return file->path;
+}
+
 /*
  * If cur_dir is not NULL, open an existing file relative to the already-open
  * directory cur_dir.
@@ -102,22 +241,16 @@ retry:
        return status;
 }
 
-/* Read the first @size bytes from the file, or named data stream of a file,
- * described by @blob.  */
-int
-read_winnt_stream_prefix(const struct blob_descriptor *blob, u64 size,
+static int
+read_winnt_stream_prefix(const wchar_t *path, u64 size,
                         const struct read_blob_callbacks *cbs)
 {
-       const wchar_t *path;
        HANDLE h;
        NTSTATUS status;
        u8 buf[BUFFER_SIZE];
        u64 bytes_remaining;
        int ret;
 
-       /* This is an NT namespace path.  */
-       path = blob->file_on_disk;
-
        status = winnt_openat(NULL, path, wcslen(path),
                              FILE_READ_DATA | SYNCHRONIZE, &h);
        if (!NT_SUCCESS(status)) {
@@ -137,10 +270,16 @@ read_winnt_stream_prefix(const struct blob_descriptor *blob, u64 size,
 
                status = (*func_NtReadFile)(h, NULL, NULL, NULL,
                                            &iosb, buf, count, NULL, NULL);
-               if (!NT_SUCCESS(status)) {
-                       winnt_error(status, L"\"%ls\": Error reading data",
-                                   printable_path(path));
-                       ret = WIMLIB_ERR_READ;
+               if (unlikely(!NT_SUCCESS(status))) {
+                       if (status == STATUS_END_OF_FILE) {
+                               ERROR("\"%ls\": File was concurrently truncated",
+                                     printable_path(path));
+                               ret = WIMLIB_ERR_CONCURRENT_MODIFICATION_DETECTED;
+                       } else {
+                               winnt_error(status, L"\"%ls\": Error reading data",
+                                           printable_path(path));
+                               ret = WIMLIB_ERR_READ;
+                       }
                        break;
                }
 
@@ -182,9 +321,8 @@ win32_encrypted_export_cb(unsigned char *data, void *_ctx, unsigned long len)
        return ERROR_SUCCESS;
 }
 
-int
-read_win32_encrypted_file_prefix(const struct blob_descriptor *blob,
-                                u64 size,
+static int
+read_win32_encrypted_file_prefix(const wchar_t *path, bool is_dir, u64 size,
                                 const struct read_blob_callbacks *cbs)
 {
        struct win32_encrypted_read_ctx export_ctx;
@@ -193,18 +331,18 @@ read_win32_encrypted_file_prefix(const struct blob_descriptor *blob,
        int ret;
        DWORD flags = 0;
 
-       if (blob->file_inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY)
+       if (is_dir)
                flags |= CREATE_FOR_DIR;
 
        export_ctx.cbs = cbs;
        export_ctx.wimlib_err_code = 0;
        export_ctx.bytes_remaining = size;
 
-       err = OpenEncryptedFileRaw(blob->file_on_disk, flags, &file_ctx);
+       err = OpenEncryptedFileRaw(path, flags, &file_ctx);
        if (err != ERROR_SUCCESS) {
                win32_error(err,
                            L"Failed to open encrypted file \"%ls\" for raw read",
-                           printable_path(blob->file_on_disk));
+                           printable_path(path));
                return WIMLIB_ERR_OPEN;
        }
        err = ReadEncryptedFileRaw(win32_encrypted_export_cb,
@@ -214,14 +352,14 @@ read_win32_encrypted_file_prefix(const struct blob_descriptor *blob,
                if (ret == 0) {
                        win32_error(err,
                                    L"Failed to read encrypted file \"%ls\"",
-                                   printable_path(blob->file_on_disk));
+                                   printable_path(path));
                        ret = WIMLIB_ERR_READ;
                }
        } else if (export_ctx.bytes_remaining != 0) {
                ERROR("Only could read %"PRIu64" of %"PRIu64" bytes from "
                      "encrypted file \"%ls\"",
                      size - export_ctx.bytes_remaining, size,
-                     printable_path(blob->file_on_disk));
+                     printable_path(path));
                ret = WIMLIB_ERR_READ;
        } else {
                ret = 0;
@@ -230,6 +368,22 @@ read_win32_encrypted_file_prefix(const struct blob_descriptor *blob,
        return ret;
 }
 
+/* Read the first @size bytes from the file, or named data stream of a file,
+ * described by @blob.  */
+int
+read_windows_file_prefix(const struct blob_descriptor *blob, u64 size,
+                        const struct read_blob_callbacks *cbs)
+{
+       const struct windows_file *file = blob->windows_file;
+
+       if (unlikely(file->is_encrypted)) {
+               bool is_dir = (blob->file_inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY);
+               return read_win32_encrypted_file_prefix(file->path, is_dir, size, cbs);
+       }
+
+       return read_winnt_stream_prefix(file->path, size, cbs);
+}
+
 /*
  * Load the short name of a file into a WIM dentry.
  */
@@ -264,7 +418,7 @@ winnt_get_short_name(HANDLE h, struct wim_dentry *dentry)
 static noinline_for_stack NTSTATUS
 winnt_get_security_descriptor(HANDLE h, struct wim_inode *inode,
                              struct wim_sd_set *sd_set,
-                             struct winnt_scan_stats *stats, int add_flags)
+                             struct winnt_scan_ctx *ctx, int add_flags)
 {
        SECURITY_INFORMATION requestedInformation;
        u8 _buf[4096] _aligned_attribute(8);
@@ -350,7 +504,7 @@ winnt_get_security_descriptor(HANDLE h, struct wim_inode *inode,
                        }
                        if (requestedInformation & SACL_SECURITY_INFORMATION) {
                                /* Try again without the SACL.  */
-                               stats->num_get_sacl_priv_notheld++;
+                               ctx->num_get_sacl_priv_notheld++;
                                requestedInformation &= ~(SACL_SECURITY_INFORMATION |
                                                          LABEL_SECURITY_INFORMATION |
                                                          BACKUP_SECURITY_INFORMATION);
@@ -358,7 +512,7 @@ winnt_get_security_descriptor(HANDLE h, struct wim_inode *inode,
                        }
                        /* Fake success (useful when capturing as
                         * non-Administrator).  */
-                       stats->num_get_sd_access_denied++;
+                       ctx->num_get_sd_access_denied++;
                        status = STATUS_SUCCESS;
                        goto out_free_buf;
                }
@@ -383,8 +537,7 @@ winnt_build_dentry_tree_recursive(struct wim_dentry **root_ret,
                                  const wchar_t *filename,
                                  size_t filename_nchars,
                                  struct capture_params *params,
-                                 struct winnt_scan_stats *stats,
-                                 u32 vol_flags);
+                                 struct winnt_scan_ctx *ctx);
 
 static int
 winnt_recurse_directory(HANDLE h,
@@ -392,8 +545,7 @@ winnt_recurse_directory(HANDLE h,
                        size_t full_path_nchars,
                        struct wim_dentry *parent,
                        struct capture_params *params,
-                       struct winnt_scan_stats *stats,
-                       u32 vol_flags)
+                       struct winnt_scan_ctx *ctx)
 {
        void *buf;
        const size_t bufsize = 8192;
@@ -442,8 +594,7 @@ winnt_recurse_directory(HANDLE h,
                                                        filename,
                                                        info->FileNameLength / 2,
                                                        params,
-                                                       stats,
-                                                       vol_flags);
+                                                       ctx);
 
                                full_path[full_path_nchars] = L'\0';
 
@@ -797,49 +948,32 @@ win32_get_encrypted_file_size(const wchar_t *path, bool is_dir, u64 *size_ret)
 }
 
 static int
-winnt_scan_efsrpc_raw_data(struct wim_inode *inode, const wchar_t *nt_path,
-                          struct list_head *unhashed_blobs)
+winnt_scan_efsrpc_raw_data(struct wim_inode *inode,
+                          wchar_t *path, size_t path_nchars,
+                          struct list_head *unhashed_blobs,
+                          struct vss_snapshot *snapshot)
 {
-       struct blob_descriptor *blob;
-       struct wim_inode_stream *strm;
+       const bool is_dir = (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY);
+       u64 size;
        int ret;
 
-       blob = new_blob_descriptor();
-       if (!blob)
-               goto err_nomem;
-
-       blob->file_on_disk = WCSDUP(nt_path);
-       if (!blob->file_on_disk)
-               goto err_nomem;
-       blob->blob_location = BLOB_WIN32_ENCRYPTED;
-
        /* OpenEncryptedFileRaw() expects a Win32 name.  */
-       wimlib_assert(!wmemcmp(blob->file_on_disk, L"\\??\\", 4));
-       blob->file_on_disk[1] = L'\\';
-
-       blob->file_inode = inode;
+       wimlib_assert(!wmemcmp(path, L"\\??\\", 4));
+       path[1] = L'\\';
 
-       ret = win32_get_encrypted_file_size(blob->file_on_disk,
-                                           (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY),
-                                           &blob->size);
+       ret = win32_get_encrypted_file_size(path, is_dir, &size);
        if (ret)
-               goto err;
+               goto out;
 
        /* Empty EFSRPC data does not make sense  */
-       wimlib_assert(blob->size != 0);
+       wimlib_assert(size != 0);
 
-       strm = inode_add_stream(inode, STREAM_TYPE_EFSRPC_RAW_DATA,
-                               NO_STREAM_NAME, blob);
-       if (!strm)
-               goto err_nomem;
-
-       prepare_unhashed_blob(blob, inode, strm->stream_id, unhashed_blobs);
-       return 0;
-
-err_nomem:
-       ret = WIMLIB_ERR_NOMEM;
-err:
-       free_blob_descriptor(blob);
+       ret = add_stream(inode, true, snapshot, size,
+                        path, path_nchars,
+                        STREAM_TYPE_EFSRPC_RAW_DATA, NO_STREAM_NAME, 0,
+                        unhashed_blobs);
+out:
+       path[1] = L'?';
        return ret;
 }
 
@@ -876,43 +1010,15 @@ get_data_stream_name(wchar_t *raw_stream_name, size_t raw_stream_name_nchars,
        return true;
 }
 
-/* Build the path to the data stream.  For unnamed streams, this is simply the
- * path to the file.  For named streams, this is the path to the file, followed
- * by a colon, followed by the stream name.  */
-static wchar_t *
-build_data_stream_path(const wchar_t *path, size_t path_nchars,
-                      const wchar_t *stream_name, size_t stream_name_nchars)
-{
-       size_t stream_path_nchars;
-       wchar_t *stream_path;
-       wchar_t *p;
-
-       stream_path_nchars = path_nchars;
-       if (stream_name_nchars)
-               stream_path_nchars += 1 + stream_name_nchars;
-
-       stream_path = MALLOC((stream_path_nchars + 1) * sizeof(wchar_t));
-       if (stream_path) {
-               p = wmempcpy(stream_path, path, path_nchars);
-               if (stream_name_nchars) {
-                       *p++ = L':';
-                       p = wmempcpy(p, stream_name, stream_name_nchars);
-               }
-               *p++ = L'\0';
-       }
-       return stream_path;
-}
-
 static int
 winnt_scan_data_stream(const wchar_t *path, size_t path_nchars,
                       wchar_t *raw_stream_name, size_t raw_stream_name_nchars,
                       u64 stream_size,
-                      struct wim_inode *inode, struct list_head *unhashed_blobs)
+                      struct wim_inode *inode, struct list_head *unhashed_blobs,
+                      struct vss_snapshot *snapshot)
 {
        wchar_t *stream_name;
        size_t stream_name_nchars;
-       struct blob_descriptor *blob;
-       struct wim_inode_stream *strm;
 
        /* Given the raw stream name (which is something like
         * :streamname:$DATA), extract just the stream name part (streamname).
@@ -923,34 +1029,10 @@ winnt_scan_data_stream(const wchar_t *path, size_t path_nchars,
 
        stream_name[stream_name_nchars] = L'\0';
 
-       /* If the stream is non-empty, set up a blob descriptor for it.  */
-       if (stream_size != 0) {
-               blob = new_blob_descriptor();
-               if (!blob)
-                       goto err_nomem;
-               blob->file_on_disk = build_data_stream_path(path,
-                                                           path_nchars,
-                                                           stream_name,
-                                                           stream_name_nchars);
-               if (!blob->file_on_disk)
-                       goto err_nomem;
-               blob->blob_location = BLOB_IN_WINNT_FILE_ON_DISK;
-               blob->size = stream_size;
-               blob->file_inode = inode;
-       } else {
-               blob = NULL;
-       }
-
-       strm = inode_add_stream(inode, STREAM_TYPE_DATA, stream_name, blob);
-       if (!strm)
-               goto err_nomem;
-
-       prepare_unhashed_blob(blob, inode, strm->stream_id, unhashed_blobs);
-       return 0;
-
-err_nomem:
-       free_blob_descriptor(blob);
-       return WIMLIB_ERR_NOMEM;
+       return add_stream(inode, false, snapshot, stream_size,
+                         path, path_nchars,
+                         STREAM_TYPE_DATA, stream_name, stream_name_nchars,
+                         unhashed_blobs);
 }
 
 /*
@@ -970,7 +1052,8 @@ err_nomem:
 static noinline_for_stack int
 winnt_scan_data_streams(HANDLE h, const wchar_t *path, size_t path_nchars,
                        struct wim_inode *inode, struct list_head *unhashed_blobs,
-                       u64 file_size, u32 vol_flags)
+                       u64 file_size, u32 vol_flags,
+                       struct vss_snapshot *snapshot)
 {
        int ret;
        u8 _buf[4096] _aligned_attribute(8);
@@ -1038,7 +1121,8 @@ winnt_scan_data_streams(HANDLE h, const wchar_t *path, size_t path_nchars,
                                             info->StreamName,
                                             info->StreamNameLength / 2,
                                             info->StreamSize.QuadPart,
-                                            inode, unhashed_blobs);
+                                            inode, unhashed_blobs,
+                                            snapshot);
                if (ret)
                        goto out_free_buf;
 
@@ -1066,7 +1150,8 @@ unnamed_only:
        {
                wchar_t stream_name[] = L"::$DATA";
                ret = winnt_scan_data_stream(path, path_nchars, stream_name, 7,
-                                            file_size, inode, unhashed_blobs);
+                                            file_size, inode, unhashed_blobs,
+                                            snapshot);
        }
 out_free_buf:
        /* Free buffer if allocated on heap.  */
@@ -1100,10 +1185,161 @@ set_sort_key(struct wim_inode *inode, u64 sort_key)
        for (unsigned i = 0; i < inode->i_num_streams; i++) {
                struct wim_inode_stream *strm = &inode->i_streams[i];
                struct blob_descriptor *blob = stream_blob_resolved(strm);
-               if (blob && (blob->blob_location == BLOB_IN_WINNT_FILE_ON_DISK ||
-                            blob->blob_location == BLOB_WIN32_ENCRYPTED))
-                       blob->sort_key = sort_key;
+               if (blob && blob->blob_location == BLOB_IN_WINDOWS_FILE)
+                       blob->windows_file->sort_key = sort_key;
+       }
+}
+
+static inline bool
+should_try_to_use_wimboot_hash(const struct wim_inode *inode,
+                              const struct winnt_scan_ctx *ctx,
+                              const struct capture_params *params)
+{
+       /* Directories and encrypted files aren't valid for external backing. */
+       if (inode->i_attributes & (FILE_ATTRIBUTE_DIRECTORY |
+                                  FILE_ATTRIBUTE_ENCRYPTED))
+               return false;
+
+       /* If the file is a reparse point, then try the hash fixup if it's a WOF
+        * reparse point and we're in WIMBOOT mode.  Otherwise, try the hash
+        * fixup if WOF may be attached. */
+       if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT)
+               return (inode->i_reparse_tag == WIM_IO_REPARSE_TAG_WOF) &&
+                       (params->add_flags & WIMLIB_ADD_FLAG_WIMBOOT);
+       return !ctx->wof_not_attached;
+}
+
+/*
+ * This function implements an optimization for capturing files from a
+ * filesystem with a backing WIM(s).  If a file is WIM-backed, then we can
+ * retrieve the SHA-1 message digest of its original contents from its reparse
+ * point.  This may eliminate the need to read the file's data and/or allow the
+ * file's data to be immediately deduplicated with existing data in the WIM.
+ *
+ * If WOF is attached, then this function is merely an optimization, but
+ * potentially a very effective one.  If WOF is detached, then this function
+ * really causes WIM-backed files to be, effectively, automatically
+ * "dereferenced" when possible; the unnamed data stream is updated to reference
+ * the original contents and the reparse point is removed.
+ *
+ * This function returns 0 if the fixup succeeded or was intentionally not
+ * executed.  Otherwise it returns an error code.
+ */
+static noinline_for_stack int
+try_to_use_wimboot_hash(HANDLE h, struct wim_inode *inode,
+                       struct blob_table *blob_table,
+                       struct winnt_scan_ctx *ctx, const wchar_t *full_path)
+{
+       struct wim_inode_stream *reparse_strm = NULL;
+       struct wim_inode_stream *strm;
+       struct blob_descriptor *blob;
+       u8 hash[SHA1_HASH_SIZE];
+       int ret;
+
+       if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
+               struct reparse_buffer_disk rpbuf;
+               struct {
+                       struct wof_external_info wof_info;
+                       struct wim_provider_rpdata wim_info;
+               } *rpdata = (void *)rpbuf.rpdata;
+               struct blob_descriptor *reparse_blob;
+
+               /* The file has a WOF reparse point, so WOF must be detached.
+                * We can read the reparse point directly.  */
+               ctx->wof_not_attached = true;
+               reparse_strm = inode_get_unnamed_stream(inode, STREAM_TYPE_REPARSE_POINT);
+               reparse_blob = stream_blob_resolved(reparse_strm);
+
+               if (!reparse_blob || reparse_blob->size < sizeof(*rpdata))
+                       return 0;  /* Not a WIM-backed file  */
+
+               ret = read_blob_into_buf(reparse_blob, rpdata);
+               if (ret)
+                       return ret;
+
+               if (rpdata->wof_info.version != WOF_CURRENT_VERSION ||
+                   rpdata->wof_info.provider != WOF_PROVIDER_WIM ||
+                   rpdata->wim_info.version != 2)
+                       return 0;  /* Not a WIM-backed file  */
+
+               /* Okay, this is a WIM backed file.  Get its SHA-1 hash.  */
+               copy_hash(hash, rpdata->wim_info.unnamed_data_stream_hash);
+       } else {
+               struct {
+                       struct wof_external_info wof_info;
+                       struct wim_provider_external_info wim_info;
+               } out;
+               IO_STATUS_BLOCK iosb;
+               NTSTATUS status;
+
+               /* WOF may be attached.  Try reading this file's external
+                * backing info.  */
+               status = (*func_NtFsControlFile)(h, NULL, NULL, NULL, &iosb,
+                                                FSCTL_GET_EXTERNAL_BACKING,
+                                                NULL, 0, &out, sizeof(out));
+
+               /* Is WOF not attached?  */
+               if (status == STATUS_INVALID_DEVICE_REQUEST) {
+                       ctx->wof_not_attached = true;
+                       return 0;
+               }
+
+               /* Is this file not externally backed?  */
+               if (status == STATUS_OBJECT_NOT_EXTERNALLY_BACKED)
+                       return 0;
+
+               /* Does this file have an unknown type of external backing that
+                * needed a larger information buffer?  */
+               if (status == STATUS_BUFFER_TOO_SMALL)
+                       return 0;
+
+               /* Was there some other failure?  */
+               if (status != STATUS_SUCCESS) {
+                       winnt_error(status,
+                                   L"\"%ls\": FSCTL_GET_EXTERNAL_BACKING failed",
+                                   full_path);
+                       return WIMLIB_ERR_STAT;
+               }
+
+               /* Is this file backed by a WIM?  */
+               if (out.wof_info.version != WOF_CURRENT_VERSION ||
+                   out.wof_info.provider != WOF_PROVIDER_WIM ||
+                   out.wim_info.version != WIM_PROVIDER_CURRENT_VERSION)
+                       return 0;
+
+               /* Okay, this is a WIM backed file.  Get its SHA-1 hash.  */
+               copy_hash(hash, out.wim_info.unnamed_data_stream_hash);
+       }
+
+       /* If the file's unnamed data stream is nonempty, then fill in its hash
+        * and deduplicate it if possible.
+        *
+        * With WOF detached, we require that the blob *must* de-duplicable for
+        * any action can be taken, since without WOF we can't fall back to
+        * getting the "dereferenced" data by reading the stream (the real
+        * stream is sparse and contains all zeroes).  */
+       strm = inode_get_unnamed_data_stream(inode);
+       if (strm && (blob = stream_blob_resolved(strm))) {
+               struct blob_descriptor **back_ptr;
+
+               if (reparse_strm && !lookup_blob(blob_table, hash))
+                       return 0;
+               back_ptr = retrieve_pointer_to_unhashed_blob(blob);
+               copy_hash(blob->hash, hash);
+               if (after_blob_hashed(blob, back_ptr, blob_table) != blob)
+                       free_blob_descriptor(blob);
        }
+
+       /* Remove the reparse point, if present.  */
+       if (reparse_strm) {
+               inode_remove_stream(inode, reparse_strm, blob_table);
+               inode->i_attributes &= ~(FILE_ATTRIBUTE_REPARSE_POINT |
+                                        FILE_ATTRIBUTE_SPARSE_FILE);
+               if (inode->i_attributes == 0)
+                       inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
+       }
+
+       return 0;
 }
 
 static noinline_for_stack u32
@@ -1196,8 +1432,7 @@ winnt_build_dentry_tree_recursive(struct wim_dentry **root_ret,
                                  const wchar_t *filename,
                                  size_t filename_nchars,
                                  struct capture_params *params,
-                                 struct winnt_scan_stats *stats,
-                                 u32 vol_flags)
+                                 struct winnt_scan_ctx *ctx)
 {
        struct wim_dentry *root = NULL;
        struct wim_inode *inode = NULL;
@@ -1269,7 +1504,7 @@ retry_open:
 
        if (unlikely(!cur_dir)) {
                /* Root of tree being captured; get volume information.  */
-               vol_flags = get_volume_information(h, full_path, params);
+               ctx->vol_flags = get_volume_information(h, full_path, params);
                params->capture_root_ino = file_info.ino;
        }
 
@@ -1323,10 +1558,10 @@ retry_open:
        /* Get the file's security descriptor, unless we are capturing in
         * NO_ACLS mode or the volume does not support security descriptors.  */
        if (!(params->add_flags & WIMLIB_ADD_FLAG_NO_ACLS)
-           && (vol_flags & FILE_PERSISTENT_ACLS))
+           && (ctx->vol_flags & FILE_PERSISTENT_ACLS))
        {
                status = winnt_get_security_descriptor(h, inode,
-                                                      params->sd_set, stats,
+                                                      params->sd_set, ctx,
                                                       params->add_flags);
                if (!NT_SUCCESS(status)) {
                        winnt_error(status,
@@ -1357,8 +1592,11 @@ retry_open:
                 * needed.  */
                (*func_NtClose)(h);
                h = NULL;
-               ret = winnt_scan_efsrpc_raw_data(inode, full_path,
-                                                params->unhashed_blobs);
+               ret = winnt_scan_efsrpc_raw_data(inode,
+                                                full_path,
+                                                full_path_nchars,
+                                                params->unhashed_blobs,
+                                                ctx->snapshot);
                if (ret)
                        goto out;
        } else {
@@ -1378,7 +1616,15 @@ retry_open:
                                              inode,
                                              params->unhashed_blobs,
                                              file_info.end_of_file,
-                                             vol_flags);
+                                             ctx->vol_flags,
+                                             ctx->snapshot);
+               if (ret)
+                       goto out;
+       }
+
+       if (unlikely(should_try_to_use_wimboot_hash(inode, ctx, params))) {
+               ret = try_to_use_wimboot_hash(h, inode, params->blob_table, ctx,
+                                             full_path);
                if (ret)
                        goto out;
        }
@@ -1412,8 +1658,7 @@ retry_open:
                                              full_path_nchars,
                                              root,
                                              params,
-                                             stats,
-                                             vol_flags);
+                                             ctx);
                if (ret)
                        goto out;
        }
@@ -1437,22 +1682,22 @@ out:
 }
 
 static void
-winnt_do_scan_warnings(const wchar_t *path, const struct winnt_scan_stats *stats)
+winnt_do_scan_warnings(const wchar_t *path, const struct winnt_scan_ctx *ctx)
 {
-       if (likely(stats->num_get_sacl_priv_notheld == 0 &&
-                  stats->num_get_sd_access_denied == 0))
+       if (likely(ctx->num_get_sacl_priv_notheld == 0 &&
+                  ctx->num_get_sd_access_denied == 0))
                return;
 
        WARNING("Scan of \"%ls\" complete, but with one or more warnings:", path);
-       if (stats->num_get_sacl_priv_notheld != 0) {
+       if (ctx->num_get_sacl_priv_notheld != 0) {
                WARNING("- Could not capture SACL (System Access Control List)\n"
                        "            on %lu files or directories.",
-                       stats->num_get_sacl_priv_notheld);
+                       ctx->num_get_sacl_priv_notheld);
        }
-       if (stats->num_get_sd_access_denied != 0) {
+       if (ctx->num_get_sd_access_denied != 0) {
                WARNING("- Could not capture security descriptor at all\n"
                        "            on %lu files or directories.",
-                       stats->num_get_sd_access_denied);
+                       ctx->num_get_sd_access_denied);
        }
        WARNING("To fully capture all security descriptors, run the program\n"
                "          with Administrator rights.");
@@ -1466,11 +1711,11 @@ win32_build_dentry_tree(struct wim_dentry **root_ret,
                        const wchar_t *root_disk_path,
                        struct capture_params *params)
 {
-       wchar_t *path;
-       int ret;
+       wchar_t *path = NULL;
+       struct winnt_scan_ctx ctx = {};
        UNICODE_STRING ntpath;
-       struct winnt_scan_stats stats;
        size_t ntpath_nchars;
+       int ret;
 
        /* WARNING: There is no check for overflow later when this buffer is
         * being used!  But it's as long as the maximum path length understood
@@ -1479,9 +1724,13 @@ win32_build_dentry_tree(struct wim_dentry **root_ret,
        if (!path)
                return WIMLIB_ERR_NOMEM;
 
-       ret = win32_path_to_nt_path(root_disk_path, &ntpath);
+       if (params->add_flags & WIMLIB_ADD_FLAG_SNAPSHOT)
+               ret = vss_create_snapshot(root_disk_path, &ntpath, &ctx.snapshot);
+       else
+               ret = win32_path_to_nt_path(root_disk_path, &ntpath);
+
        if (ret)
-               goto out_free_path;
+               goto out;
 
        if (ntpath.Length < 4 * sizeof(wchar_t) ||
            ntpath.Length > WINDOWS_NT_MAX_PATH * sizeof(wchar_t) ||
@@ -1501,17 +1750,16 @@ win32_build_dentry_tree(struct wim_dentry **root_ret,
        }
        HeapFree(GetProcessHeap(), 0, ntpath.Buffer);
        if (ret)
-               goto out_free_path;
-
-       memset(&stats, 0, sizeof(stats));
+               goto out;
 
        ret = winnt_build_dentry_tree_recursive(root_ret, NULL,
                                                path, ntpath_nchars,
-                                               L"", 0, params, &stats, 0);
-out_free_path:
+                                               L"", 0, params, &ctx);
+out:
+       vss_put_snapshot(ctx.snapshot);
        FREE(path);
        if (ret == 0)
-               winnt_do_scan_warnings(root_disk_path, &stats);
+               winnt_do_scan_warnings(root_disk_path, &ctx);
        return ret;
 }