+static noinline_for_stack u64
+get_sort_key(HANDLE h)
+{
+ STARTING_VCN_INPUT_BUFFER in = { .StartingVcn.QuadPart = 0 };
+ RETRIEVAL_POINTERS_BUFFER out;
+ DWORD bytesReturned;
+
+ if (!DeviceIoControl(h, FSCTL_GET_RETRIEVAL_POINTERS,
+ &in, sizeof(in),
+ &out, sizeof(out),
+ &bytesReturned, NULL))
+ return 0;
+
+ if (out.ExtentCount < 1)
+ return 0;
+
+ return out.Extents[0].Lcn.QuadPart;
+}
+
+static void
+set_sort_key(struct wim_inode *inode, u64 sort_key)
+{
+ for (unsigned i = 0; i < inode->i_num_streams; i++) {
+ struct wim_inode_stream *strm = &inode->i_streams[i];
+ struct blob_descriptor *blob = stream_blob_resolved(strm);
+ if (blob && (blob->blob_location == BLOB_IN_WINNT_FILE_ON_DISK ||
+ blob->blob_location == BLOB_WIN32_ENCRYPTED))
+ blob->sort_key = sort_key;
+ }
+}
+
+static inline bool
+should_try_to_use_wimboot_hash(const struct wim_inode *inode,
+ const struct winnt_scan_ctx *ctx,
+ const struct capture_params *params)
+{
+ /* Directories and encrypted files aren't valid for external backing. */
+ if (inode->i_attributes & (FILE_ATTRIBUTE_DIRECTORY |
+ FILE_ATTRIBUTE_ENCRYPTED))
+ return false;
+
+ /* If the file is a reparse point, then try the hash fixup if it's a WOF
+ * reparse point and we're in WIMBOOT mode. Otherwise, try the hash
+ * fixup if WOF may be attached. */
+ if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT)
+ return (inode->i_reparse_tag == WIM_IO_REPARSE_TAG_WOF) &&
+ (params->add_flags & WIMLIB_ADD_FLAG_WIMBOOT);
+ return !ctx->wof_not_attached;
+}
+
+/*
+ * This function implements an optimization for capturing files from a
+ * filesystem with a backing WIM(s). If a file is WIM-backed, then we can
+ * retrieve the SHA-1 message digest of its original contents from its reparse
+ * point. This may eliminate the need to read the file's data and/or allow the
+ * file's data to be immediately deduplicated with existing data in the WIM.
+ *
+ * If WOF is attached, then this function is merely an optimization, but
+ * potentially a very effective one. If WOF is detached, then this function
+ * really causes WIM-backed files to be, effectively, automatically
+ * "dereferenced" when possible; the unnamed data stream is updated to reference
+ * the original contents and the reparse point is removed.
+ *
+ * This function returns 0 if the fixup succeeded or was intentionally not
+ * executed. Otherwise it returns an error code.
+ */
+static noinline_for_stack int
+try_to_use_wimboot_hash(HANDLE h, struct wim_inode *inode,
+ struct blob_table *blob_table,
+ struct winnt_scan_ctx *ctx, const wchar_t *full_path)
+{
+ struct wim_inode_stream *reparse_strm = NULL;
+ struct wim_inode_stream *strm;
+ struct blob_descriptor *blob;
+ u8 hash[SHA1_HASH_SIZE];
+ int ret;
+
+ if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
+ struct reparse_buffer_disk rpbuf;
+ struct {
+ struct wof_external_info wof_info;
+ struct wim_provider_rpdata wim_info;
+ } *rpdata = (void *)rpbuf.rpdata;
+ struct blob_descriptor *reparse_blob;
+
+ /* The file has a WOF reparse point, so WOF must be detached.
+ * We can read the reparse point directly. */
+ ctx->wof_not_attached = true;
+ reparse_strm = inode_get_unnamed_stream(inode, STREAM_TYPE_REPARSE_POINT);
+ reparse_blob = stream_blob_resolved(reparse_strm);
+
+ if (!reparse_blob || reparse_blob->size < sizeof(*rpdata))
+ return 0; /* Not a WIM-backed file */
+
+ ret = read_blob_into_buf(reparse_blob, rpdata);
+ if (ret)
+ return ret;
+
+ if (rpdata->wof_info.version != WOF_CURRENT_VERSION ||
+ rpdata->wof_info.provider != WOF_PROVIDER_WIM ||
+ rpdata->wim_info.version != 2)
+ return 0; /* Not a WIM-backed file */
+
+ /* Okay, this is a WIM backed file. Get its SHA-1 hash. */
+ copy_hash(hash, rpdata->wim_info.unnamed_data_stream_hash);
+ } else {
+ struct {
+ struct wof_external_info wof_info;
+ struct wim_provider_external_info wim_info;
+ } out;
+ IO_STATUS_BLOCK iosb;
+ NTSTATUS status;
+
+ /* WOF may be attached. Try reading this file's external
+ * backing info. */
+ status = (*func_NtFsControlFile)(h, NULL, NULL, NULL, &iosb,
+ FSCTL_GET_EXTERNAL_BACKING,
+ NULL, 0, &out, sizeof(out));
+
+ /* Is WOF not attached? */
+ if (status == STATUS_INVALID_DEVICE_REQUEST) {
+ ctx->wof_not_attached = true;
+ return 0;
+ }
+
+ /* Is this file not externally backed? */
+ if (status == STATUS_OBJECT_NOT_EXTERNALLY_BACKED)
+ return 0;
+
+ /* Does this file have an unknown type of external backing that
+ * needed a larger information buffer? */
+ if (status == STATUS_BUFFER_TOO_SMALL)
+ return 0;
+
+ /* Was there some other failure? */
+ if (status != STATUS_SUCCESS) {
+ winnt_error(status,
+ L"\"%ls\": FSCTL_GET_EXTERNAL_BACKING failed",
+ full_path);
+ return WIMLIB_ERR_STAT;
+ }
+
+ /* Is this file backed by a WIM? */
+ if (out.wof_info.version != WOF_CURRENT_VERSION ||
+ out.wof_info.provider != WOF_PROVIDER_WIM ||
+ out.wim_info.version != WIM_PROVIDER_CURRENT_VERSION)
+ return 0;
+
+ /* Okay, this is a WIM backed file. Get its SHA-1 hash. */
+ copy_hash(hash, out.wim_info.unnamed_data_stream_hash);
+ }
+
+ /* If the file's unnamed data stream is nonempty, then fill in its hash
+ * and deduplicate it if possible.
+ *
+ * With WOF detached, we require that the blob *must* de-duplicable for
+ * any action can be taken, since without WOF we can't fall back to
+ * getting the "dereferenced" data by reading the stream (the real
+ * stream is sparse and contains all zeroes). */
+ strm = inode_get_unnamed_data_stream(inode);
+ if (strm && (blob = stream_blob_resolved(strm))) {
+ struct blob_descriptor **back_ptr;
+
+ if (reparse_strm && !lookup_blob(blob_table, hash))
+ return 0;
+ back_ptr = retrieve_pointer_to_unhashed_blob(blob);
+ copy_hash(blob->hash, hash);
+ if (after_blob_hashed(blob, back_ptr, blob_table) != blob)
+ free_blob_descriptor(blob);
+ }
+
+ /* Remove the reparse point, if present. */
+ if (reparse_strm) {
+ inode_remove_stream(inode, reparse_strm, blob_table);
+ inode->i_attributes &= ~(FILE_ATTRIBUTE_REPARSE_POINT |
+ FILE_ATTRIBUTE_SPARSE_FILE);
+ if (inode->i_attributes == 0)
+ inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
+ }
+
+ return 0;
+}
+
+static noinline_for_stack u32
+get_volume_information(HANDLE h, const wchar_t *full_path,
+ struct capture_params *params)
+{
+ FILE_FS_ATTRIBUTE_INFORMATION attr_info;
+ FILE_FS_VOLUME_INFORMATION vol_info;
+ IO_STATUS_BLOCK iosb;
+ NTSTATUS status;
+ u32 vol_flags;
+
+ /* Get volume flags */
+ status = (*func_NtQueryVolumeInformationFile)(h, &iosb,
+ &attr_info,
+ sizeof(attr_info),
+ FileFsAttributeInformation);
+ if (likely((NT_SUCCESS(status) || status == STATUS_BUFFER_OVERFLOW) &&
+ (iosb.Information >=
+ offsetof(FILE_FS_ATTRIBUTE_INFORMATION,
+ FileSystemAttributes) +
+ sizeof(attr_info.FileSystemAttributes))))
+ {
+ vol_flags = attr_info.FileSystemAttributes;
+ } else {
+ winnt_warning(status, L"\"%ls\": Can't get volume attributes",
+ printable_path(full_path));
+ vol_flags = 0;
+ }
+
+ /* Get volume ID. */
+ status = (*func_NtQueryVolumeInformationFile)(h, &iosb,
+ &vol_info,
+ sizeof(vol_info),
+ FileFsVolumeInformation);
+ if (likely((NT_SUCCESS(status) || status == STATUS_BUFFER_OVERFLOW) &&
+ (iosb.Information >=
+ offsetof(FILE_FS_VOLUME_INFORMATION,
+ VolumeSerialNumber) +
+ sizeof(vol_info.VolumeSerialNumber))))
+ {
+ params->capture_root_dev = vol_info.VolumeSerialNumber;
+ } else {
+ winnt_warning(status, L"\"%ls\": Can't get volume ID",
+ printable_path(full_path));
+ params->capture_root_dev = 0;
+ }
+ return vol_flags;
+}
+
+struct file_info {
+ u32 attributes;
+ u32 num_links;
+ u64 creation_time;
+ u64 last_write_time;
+ u64 last_access_time;
+ u64 ino;
+ u64 end_of_file;
+};
+
+static noinline_for_stack NTSTATUS
+get_file_info(HANDLE h, struct file_info *info)
+{
+ IO_STATUS_BLOCK iosb;
+ NTSTATUS status;
+ FILE_ALL_INFORMATION all_info;
+
+ status = (*func_NtQueryInformationFile)(h, &iosb, &all_info,
+ sizeof(all_info),
+ FileAllInformation);
+
+ if (unlikely(!NT_SUCCESS(status) && status != STATUS_BUFFER_OVERFLOW))
+ return status;
+
+ info->attributes = all_info.BasicInformation.FileAttributes;
+ info->num_links = all_info.StandardInformation.NumberOfLinks;
+ info->creation_time = all_info.BasicInformation.CreationTime.QuadPart;
+ info->last_write_time = all_info.BasicInformation.LastWriteTime.QuadPart;
+ info->last_access_time = all_info.BasicInformation.LastAccessTime.QuadPart;
+ info->ino = all_info.InternalInformation.IndexNumber.QuadPart;
+ info->end_of_file = all_info.StandardInformation.EndOfFile.QuadPart;
+ return STATUS_SUCCESS;
+}
+