+ CloseEncryptedFileRaw(file_ctx);
+ return ret;
+}
+
+static int
+winnt_scan_efsrpc_raw_data(struct wim_inode *inode,
+ struct winnt_scan_ctx *ctx)
+{
+ wchar_t *path = ctx->params->cur_path;
+ size_t path_nchars = ctx->params->cur_path_nchars;
+ const bool is_dir = (inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY);
+ struct windows_file *windows_file;
+ u64 size;
+ int ret;
+
+ /* OpenEncryptedFileRaw() expects a Win32 name. */
+ wimlib_assert(!wmemcmp(path, L"\\??\\", 4));
+ path[1] = L'\\';
+
+ ret = win32_get_encrypted_file_size(path, is_dir, &size);
+ if (ret)
+ goto out;
+
+ /* Empty EFSRPC data does not make sense */
+ wimlib_assert(size != 0);
+
+ windows_file = alloc_windows_file(path, path_nchars, NULL, 0,
+ ctx->snapshot, true);
+ ret = add_stream(inode, windows_file, size, STREAM_TYPE_EFSRPC_RAW_DATA,
+ NO_STREAM_NAME, ctx->params->unhashed_blobs);
+out:
+ path[1] = L'?';
+ return ret;
+}
+
+static bool
+get_data_stream_name(const wchar_t *raw_stream_name, size_t raw_stream_name_nchars,
+ const wchar_t **stream_name_ret, size_t *stream_name_nchars_ret)
+{
+ const wchar_t *sep, *type, *end;
+
+ /* The stream name should be returned as :NAME:TYPE */
+ if (raw_stream_name_nchars < 1)
+ return false;
+ if (raw_stream_name[0] != L':')
+ return false;
+
+ raw_stream_name++;
+ raw_stream_name_nchars--;
+
+ end = raw_stream_name + raw_stream_name_nchars;
+
+ sep = wmemchr(raw_stream_name, L':', raw_stream_name_nchars);
+ if (!sep)
+ return false;
+
+ type = sep + 1;
+ if (end - type != 5)
+ return false;
+
+ if (wmemcmp(type, L"$DATA", 5))
+ return false;
+
+ *stream_name_ret = raw_stream_name;
+ *stream_name_nchars_ret = sep - raw_stream_name;
+ return true;
+}
+
+static int
+winnt_scan_data_stream(wchar_t *raw_stream_name, size_t raw_stream_name_nchars,
+ u64 stream_size, struct wim_inode *inode,
+ struct winnt_scan_ctx *ctx)
+{
+ wchar_t *stream_name;
+ size_t stream_name_nchars;
+ struct windows_file *windows_file;
+
+ /* Given the raw stream name (which is something like
+ * :streamname:$DATA), extract just the stream name part (streamname).
+ * Ignore any non-$DATA streams. */
+ if (!get_data_stream_name(raw_stream_name, raw_stream_name_nchars,
+ (const wchar_t **)&stream_name,
+ &stream_name_nchars))
+ return 0;
+
+ stream_name[stream_name_nchars] = L'\0';
+
+ windows_file = alloc_windows_file(ctx->params->cur_path,
+ ctx->params->cur_path_nchars,
+ stream_name, stream_name_nchars,
+ ctx->snapshot, false);
+ return add_stream(inode, windows_file, stream_size, STREAM_TYPE_DATA,
+ stream_name, ctx->params->unhashed_blobs);
+}
+
+/*
+ * Load information about the data streams of an open file into a WIM inode.
+ *
+ * We use the NtQueryInformationFile() system call instead of FindFirstStream()
+ * and FindNextStream(). This is done for two reasons:
+ *
+ * - FindFirstStream() opens its own handle to the file or directory and
+ * apparently does so without specifying FILE_FLAG_BACKUP_SEMANTICS, thereby
+ * causing access denied errors on certain files (even when running as the
+ * Administrator).
+ * - FindFirstStream() and FindNextStream() is only available on Windows Vista
+ * and later, whereas the stream support in NtQueryInformationFile() was
+ * already present in Windows XP.
+ */
+static noinline_for_stack int
+winnt_scan_data_streams(HANDLE h, struct wim_inode *inode, u64 file_size,
+ struct winnt_scan_ctx *ctx)
+{
+ int ret;
+ u8 _buf[4096] _aligned_attribute(8);
+ u8 *buf;
+ size_t bufsize;
+ IO_STATUS_BLOCK iosb;
+ NTSTATUS status;
+ FILE_STREAM_INFORMATION *info;
+
+ buf = _buf;
+ bufsize = sizeof(_buf);
+
+ if (!(ctx->vol_flags & FILE_NAMED_STREAMS))
+ goto unnamed_only;
+
+ /* Get a buffer containing the stream information. */
+ while (!NT_SUCCESS(status = NtQueryInformationFile(h,
+ &iosb,
+ buf,
+ bufsize,
+ FileStreamInformation)))
+ {
+
+ switch (status) {
+ case STATUS_BUFFER_OVERFLOW:
+ {
+ u8 *newbuf;
+
+ bufsize *= 2;
+ if (buf == _buf)
+ newbuf = MALLOC(bufsize);
+ else
+ newbuf = REALLOC(buf, bufsize);
+ if (!newbuf) {
+ ret = WIMLIB_ERR_NOMEM;
+ goto out_free_buf;
+ }
+ buf = newbuf;
+ }
+ break;
+ case STATUS_NOT_IMPLEMENTED:
+ case STATUS_NOT_SUPPORTED:
+ case STATUS_INVALID_INFO_CLASS:
+ goto unnamed_only;
+ default:
+ winnt_error(status,
+ L"\"%ls\": Failed to query stream information",
+ printable_path(ctx));
+ ret = WIMLIB_ERR_READ;
+ goto out_free_buf;
+ }
+ }
+
+ if (iosb.Information == 0) {
+ /* No stream information. */
+ ret = 0;
+ goto out_free_buf;
+ }
+
+ /* Parse one or more stream information structures. */
+ info = (FILE_STREAM_INFORMATION *)buf;
+ for (;;) {
+ /* Load the stream information. */
+ ret = winnt_scan_data_stream(info->StreamName,
+ info->StreamNameLength / 2,
+ info->StreamSize.QuadPart,
+ inode, ctx);
+ if (ret)
+ goto out_free_buf;
+
+ if (info->NextEntryOffset == 0) {
+ /* No more stream information. */
+ break;
+ }
+ /* Advance to next stream information. */
+ info = (FILE_STREAM_INFORMATION *)
+ ((u8 *)info + info->NextEntryOffset);
+ }
+ ret = 0;
+ goto out_free_buf;
+
+unnamed_only:
+ /* The volume does not support named streams. Only capture the unnamed
+ * data stream. */
+ if (inode->i_attributes & (FILE_ATTRIBUTE_DIRECTORY |
+ FILE_ATTRIBUTE_REPARSE_POINT))
+ {
+ ret = 0;
+ goto out_free_buf;
+ }
+
+ {
+ wchar_t stream_name[] = L"::$DATA";
+ ret = winnt_scan_data_stream(stream_name, 7, file_size,
+ inode, ctx);
+ }
+out_free_buf:
+ /* Free buffer if allocated on heap. */
+ if (unlikely(buf != _buf))
+ FREE(buf);
+ return ret;
+}
+
+static u64
+extract_starting_lcn(const RETRIEVAL_POINTERS_BUFFER *extents)
+{
+ if (extents->ExtentCount < 1)
+ return 0;
+
+ return extents->Extents[0].Lcn.QuadPart;
+}
+
+static noinline_for_stack u64
+get_sort_key(HANDLE h)
+{
+ STARTING_VCN_INPUT_BUFFER in = { .StartingVcn.QuadPart = 0 };
+ RETRIEVAL_POINTERS_BUFFER out;
+
+ if (!NT_SUCCESS(winnt_fsctl(h, FSCTL_GET_RETRIEVAL_POINTERS,
+ &in, sizeof(in), &out, sizeof(out), NULL)))
+ return 0;
+
+ return extract_starting_lcn(&out);
+}
+
+static void
+set_sort_key(struct wim_inode *inode, u64 sort_key)
+{
+ for (unsigned i = 0; i < inode->i_num_streams; i++) {
+ struct wim_inode_stream *strm = &inode->i_streams[i];
+ struct blob_descriptor *blob = stream_blob_resolved(strm);
+ if (blob && blob->blob_location == BLOB_IN_WINDOWS_FILE)
+ blob->windows_file->sort_key = sort_key;
+ }
+}
+
+static inline bool
+should_try_to_use_wimboot_hash(const struct wim_inode *inode,
+ const struct winnt_scan_ctx *ctx)
+{
+ /* Directories and encrypted files aren't valid for external backing. */
+ if (inode->i_attributes & (FILE_ATTRIBUTE_DIRECTORY |
+ FILE_ATTRIBUTE_ENCRYPTED))
+ return false;
+
+ /* If the file is a reparse point, then try the hash fixup if it's a WOF
+ * reparse point and we're in WIMBOOT mode. Otherwise, try the hash
+ * fixup if WOF may be attached. */
+ if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT)
+ return (inode->i_reparse_tag == WIM_IO_REPARSE_TAG_WOF) &&
+ (ctx->params->add_flags & WIMLIB_ADD_FLAG_WIMBOOT);
+ return !ctx->wof_not_attached;
+}
+
+/*
+ * This function implements an optimization for capturing files from a
+ * filesystem with a backing WIM(s). If a file is WIM-backed, then we can
+ * retrieve the SHA-1 message digest of its original contents from its reparse
+ * point. This may eliminate the need to read the file's data and/or allow the
+ * file's data to be immediately deduplicated with existing data in the WIM.
+ *
+ * If WOF is attached, then this function is merely an optimization, but
+ * potentially a very effective one. If WOF is detached, then this function
+ * really causes WIM-backed files to be, effectively, automatically
+ * "dereferenced" when possible; the unnamed data stream is updated to reference
+ * the original contents and the reparse point is removed.
+ *
+ * This function returns 0 if the fixup succeeded or was intentionally not
+ * executed. Otherwise it returns an error code.
+ */
+static noinline_for_stack int
+try_to_use_wimboot_hash(HANDLE h, struct wim_inode *inode,
+ struct winnt_scan_ctx *ctx)
+{
+ struct blob_table *blob_table = ctx->params->blob_table;
+ struct wim_inode_stream *reparse_strm = NULL;
+ struct wim_inode_stream *strm;
+ struct blob_descriptor *blob;
+ u8 hash[SHA1_HASH_SIZE];
+ int ret;
+
+ if (inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT) {
+ struct reparse_buffer_disk rpbuf;
+ struct {
+ WOF_EXTERNAL_INFO wof_info;
+ struct wim_provider_rpdata wim_info;
+ } *rpdata = (void *)rpbuf.rpdata;
+ struct blob_descriptor *reparse_blob;
+
+ /* The file has a WOF reparse point, so WOF must be detached.
+ * We can read the reparse point directly. */
+ ctx->wof_not_attached = true;
+ reparse_strm = inode_get_unnamed_stream(inode, STREAM_TYPE_REPARSE_POINT);
+ reparse_blob = stream_blob_resolved(reparse_strm);
+
+ if (!reparse_blob || reparse_blob->size < sizeof(*rpdata))
+ return 0; /* Not a WIM-backed file */
+
+ ret = read_blob_into_buf(reparse_blob, rpdata);
+ if (ret)
+ return ret;
+
+ if (rpdata->wof_info.Version != WOF_CURRENT_VERSION ||
+ rpdata->wof_info.Provider != WOF_PROVIDER_WIM ||
+ rpdata->wim_info.version != 2)
+ return 0; /* Not a WIM-backed file */
+
+ /* Okay, this is a WIM backed file. Get its SHA-1 hash. */
+ copy_hash(hash, rpdata->wim_info.unnamed_data_stream_hash);
+ } else {
+ struct {
+ WOF_EXTERNAL_INFO wof_info;
+ WIM_PROVIDER_EXTERNAL_INFO wim_info;
+ } out;
+ NTSTATUS status;
+
+ /* WOF may be attached. Try reading this file's external
+ * backing info. */
+ status = winnt_fsctl(h, FSCTL_GET_EXTERNAL_BACKING,
+ NULL, 0, &out, sizeof(out), NULL);
+
+ /* Is WOF not attached? */
+ if (status == STATUS_INVALID_DEVICE_REQUEST ||
+ status == STATUS_NOT_SUPPORTED) {
+ ctx->wof_not_attached = true;
+ return 0;
+ }
+
+ /* Is this file not externally backed? */
+ if (status == STATUS_OBJECT_NOT_EXTERNALLY_BACKED)
+ return 0;
+
+ /* Does this file have an unknown type of external backing that
+ * needed a larger information buffer? */
+ if (status == STATUS_BUFFER_TOO_SMALL)
+ return 0;
+
+ /* Was there some other failure? */
+ if (status != STATUS_SUCCESS) {
+ winnt_error(status,
+ L"\"%ls\": FSCTL_GET_EXTERNAL_BACKING failed",
+ printable_path(ctx));
+ return WIMLIB_ERR_STAT;
+ }
+
+ /* Is this file backed by a WIM? */
+ if (out.wof_info.Version != WOF_CURRENT_VERSION ||
+ out.wof_info.Provider != WOF_PROVIDER_WIM ||
+ out.wim_info.Version != WIM_PROVIDER_CURRENT_VERSION)
+ return 0;
+
+ /* Okay, this is a WIM backed file. Get its SHA-1 hash. */
+ copy_hash(hash, out.wim_info.ResourceHash);
+ }
+
+ /* If the file's unnamed data stream is nonempty, then fill in its hash
+ * and deduplicate it if possible.
+ *
+ * With WOF detached, we require that the blob *must* de-duplicable for
+ * any action can be taken, since without WOF we can't fall back to
+ * getting the "dereferenced" data by reading the stream (the real
+ * stream is sparse and contains all zeroes). */
+ strm = inode_get_unnamed_data_stream(inode);
+ if (strm && (blob = stream_blob_resolved(strm))) {
+ struct blob_descriptor **back_ptr;
+
+ if (reparse_strm && !lookup_blob(blob_table, hash))
+ return 0;
+ back_ptr = retrieve_pointer_to_unhashed_blob(blob);
+ copy_hash(blob->hash, hash);
+ if (after_blob_hashed(blob, back_ptr, blob_table,
+ inode) != blob)
+ free_blob_descriptor(blob);
+ }
+
+ /* Remove the reparse point, if present. */
+ if (reparse_strm) {
+ inode_remove_stream(inode, reparse_strm, blob_table);
+ inode->i_attributes &= ~(FILE_ATTRIBUTE_REPARSE_POINT |
+ FILE_ATTRIBUTE_SPARSE_FILE);
+ if (inode->i_attributes == 0)
+ inode->i_attributes = FILE_ATTRIBUTE_NORMAL;
+ }
+
+ return 0;
+}
+
+struct file_info {
+ u32 attributes;
+ u32 num_links;
+ u64 creation_time;
+ u64 last_write_time;
+ u64 last_access_time;
+ u64 ino;
+ u64 end_of_file;
+ u32 ea_size;
+};
+
+static noinline_for_stack NTSTATUS
+get_file_info(HANDLE h, struct file_info *info)
+{
+ IO_STATUS_BLOCK iosb;
+ NTSTATUS status;
+ FILE_ALL_INFORMATION all_info;
+
+ status = NtQueryInformationFile(h, &iosb, &all_info, sizeof(all_info),
+ FileAllInformation);
+
+ if (unlikely(!NT_SUCCESS(status) && status != STATUS_BUFFER_OVERFLOW))
+ return status;
+
+ info->attributes = all_info.BasicInformation.FileAttributes;
+ info->num_links = all_info.StandardInformation.NumberOfLinks;
+ info->creation_time = all_info.BasicInformation.CreationTime.QuadPart;
+ info->last_write_time = all_info.BasicInformation.LastWriteTime.QuadPart;
+ info->last_access_time = all_info.BasicInformation.LastAccessTime.QuadPart;
+ info->ino = all_info.InternalInformation.IndexNumber.QuadPart;
+ info->end_of_file = all_info.StandardInformation.EndOfFile.QuadPart;
+ info->ea_size = all_info.EaInformation.EaSize;
+ return STATUS_SUCCESS;
+}
+
+static void
+get_volume_information(HANDLE h, struct winnt_scan_ctx *ctx)
+{
+ u8 _attr_info[sizeof(FILE_FS_ATTRIBUTE_INFORMATION) + 128] _aligned_attribute(8);
+ FILE_FS_ATTRIBUTE_INFORMATION *attr_info = (void *)_attr_info;
+ FILE_FS_VOLUME_INFORMATION vol_info;
+ struct file_info file_info;
+ IO_STATUS_BLOCK iosb;
+ NTSTATUS status;
+
+ /* Get volume flags */
+ status = NtQueryVolumeInformationFile(h, &iosb, attr_info,
+ sizeof(_attr_info),
+ FileFsAttributeInformation);
+ if (NT_SUCCESS(status)) {
+ ctx->vol_flags = attr_info->FileSystemAttributes;
+ ctx->is_ntfs = (attr_info->FileSystemNameLength == 4 * sizeof(wchar_t)) &&
+ !wmemcmp(attr_info->FileSystemName, L"NTFS", 4);
+ } else {
+ winnt_warning(status, L"\"%ls\": Can't get volume attributes",
+ printable_path(ctx));
+ }
+
+ /* Get volume ID. */
+ status = NtQueryVolumeInformationFile(h, &iosb, &vol_info,
+ sizeof(vol_info),
+ FileFsVolumeInformation);
+ if ((NT_SUCCESS(status) || status == STATUS_BUFFER_OVERFLOW) &&
+ (iosb.Information >= offsetof(FILE_FS_VOLUME_INFORMATION,
+ VolumeSerialNumber) +
+ sizeof(vol_info.VolumeSerialNumber)))
+ {
+ ctx->params->capture_root_dev = vol_info.VolumeSerialNumber;
+ } else {
+ winnt_warning(status, L"\"%ls\": Can't get volume ID",
+ printable_path(ctx));
+ }
+
+ /* Get inode number. */
+ status = get_file_info(h, &file_info);
+ if (NT_SUCCESS(status)) {
+ ctx->params->capture_root_ino = file_info.ino;
+ } else {
+ winnt_warning(status, L"\"%ls\": Can't get file information",
+ printable_path(ctx));
+ }
+}
+
+static int
+winnt_build_dentry_tree(struct wim_dentry **root_ret,
+ HANDLE cur_dir,
+ const wchar_t *relative_path,
+ size_t relative_path_nchars,
+ const wchar_t *filename,
+ struct winnt_scan_ctx *ctx,
+ bool recursive)
+{
+ struct wim_dentry *root = NULL;
+ struct wim_inode *inode = NULL;
+ HANDLE h = NULL;
+ int ret;
+ NTSTATUS status;
+ struct file_info file_info;
+ u64 sort_key;
+
+ ret = try_exclude(ctx->params);
+ if (unlikely(ret < 0)) /* Excluded? */
+ goto out_progress;
+ if (unlikely(ret > 0)) /* Error? */
+ goto out;
+
+ /* Open the file with permission to read metadata. Although we will
+ * later need a handle with FILE_LIST_DIRECTORY permission (or,
+ * equivalently, FILE_READ_DATA; they're the same numeric value) if the
+ * file is a directory, it can significantly slow things down to request
+ * this permission on all nondirectories. Perhaps it causes Windows to
+ * start prefetching the file contents... */
+ status = winnt_openat(cur_dir, relative_path, relative_path_nchars,
+ FILE_READ_ATTRIBUTES | FILE_READ_EA |
+ READ_CONTROL | ACCESS_SYSTEM_SECURITY,
+ &h);
+ if (unlikely(!NT_SUCCESS(status))) {
+ if (status == STATUS_DELETE_PENDING) {
+ WARNING("\"%ls\": Deletion pending; skipping file",
+ printable_path(ctx));
+ ret = 0;
+ goto out;
+ }
+ if (status == STATUS_SHARING_VIOLATION) {
+ ERROR("Can't open \"%ls\":\n"
+ " File is in use by another process! "
+ "Consider using snapshot (VSS) mode.",
+ printable_path(ctx));
+ ret = WIMLIB_ERR_OPEN;
+ goto out;
+ }
+ winnt_error(status, L"\"%ls\": Can't open file",
+ printable_path(ctx));
+ if (status == STATUS_FVE_LOCKED_VOLUME)
+ ret = WIMLIB_ERR_FVE_LOCKED_VOLUME;
+ else
+ ret = WIMLIB_ERR_OPEN;
+ goto out;
+ }
+
+ /* Get information about the file. */
+ status = get_file_info(h, &file_info);
+ if (!NT_SUCCESS(status)) {
+ winnt_error(status, L"\"%ls\": Can't get file information",
+ printable_path(ctx));
+ ret = WIMLIB_ERR_STAT;
+ goto out;
+ }
+
+ /* Create a WIM dentry with an associated inode, which may be shared.
+ *
+ * However, we need to explicitly check for directories and files with
+ * only 1 link and refuse to hard link them. This is because Windows
+ * has a bug where it can return duplicate File IDs for files and
+ * directories on the FAT filesystem.
+ *
+ * Since we don't follow mount points on Windows, we don't need to query
+ * the volume ID per-file. Just once, for the root, is enough. But we
+ * can't simply pass 0, because then there could be inode collisions
+ * among multiple calls to win32_build_dentry_tree() that are scanning
+ * files on different volumes. */
+ ret = inode_table_new_dentry(ctx->params->inode_table,
+ filename,
+ file_info.ino,
+ ctx->params->capture_root_dev,
+ (file_info.num_links <= 1),
+ &root);
+ if (ret)
+ goto out;
+
+ /* Get the short (DOS) name of the file. */
+ status = winnt_get_short_name(h, root);
+
+ /* If we can't read the short filename for any reason other than
+ * out-of-memory, just ignore the error and assume the file has no short
+ * name. This shouldn't be an issue, since the short names are
+ * essentially obsolete anyway. */
+ if (unlikely(status == STATUS_NO_MEMORY)) {
+ ret = WIMLIB_ERR_NOMEM;
+ goto out;
+ }
+
+ inode = root->d_inode;
+
+ if (inode->i_nlink > 1) {
+ /* Shared inode (hard link); skip reading per-inode information.
+ */
+ goto out_progress;
+ }
+
+ inode->i_attributes = file_info.attributes;
+ inode->i_creation_time = file_info.creation_time;
+ inode->i_last_write_time = file_info.last_write_time;
+ inode->i_last_access_time = file_info.last_access_time;
+
+ /* Get the file's security descriptor, unless we are capturing in
+ * NO_ACLS mode or the volume does not support security descriptors. */
+ if (!(ctx->params->add_flags & WIMLIB_ADD_FLAG_NO_ACLS)
+ && (ctx->vol_flags & FILE_PERSISTENT_ACLS))
+ {
+ ret = winnt_load_security_descriptor(h, inode, ctx);
+ if (ret)
+ goto out;
+ }
+
+ /* Get the file's object ID. */
+ ret = winnt_load_object_id(h, inode, ctx);
+ if (ret)
+ goto out;
+
+ /* Get the file's extended attributes. */
+ if (unlikely(file_info.ea_size != 0)) {
+ ret = winnt_load_xattrs(h, inode, ctx, file_info.ea_size);
+ if (ret)
+ goto out;
+ }
+
+ /* If this is a reparse point, load the reparse data. */
+ if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_REPARSE_POINT)) {
+ ret = winnt_load_reparse_data(h, inode, ctx);
+ if (ret)
+ goto out;
+ }
+
+ sort_key = get_sort_key(h);
+
+ if (unlikely(inode->i_attributes & FILE_ATTRIBUTE_ENCRYPTED)) {
+ /* Load information about the raw encrypted data. This is
+ * needed for any directory or non-directory that has
+ * FILE_ATTRIBUTE_ENCRYPTED set.
+ *
+ * Note: since OpenEncryptedFileRaw() fails with
+ * ERROR_SHARING_VIOLATION if there are any open handles to the
+ * file, we have to close the file and re-open it later if
+ * needed. */
+ NtClose(h);
+ h = NULL;
+ ret = winnt_scan_efsrpc_raw_data(inode, ctx);
+ if (ret)
+ goto out;
+ } else {
+ /*
+ * Load information about data streams (unnamed and named).
+ *
+ * Skip this step for encrypted files, since the data from
+ * ReadEncryptedFileRaw() already contains all data streams (and
+ * they do in fact all get restored by WriteEncryptedFileRaw().)
+ *
+ * Note: WIMGAPI (as of Windows 8.1) gets wrong and stores both
+ * the EFSRPC data and the named data stream(s)...!
+ */
+ ret = winnt_scan_data_streams(h,
+ inode,
+ file_info.end_of_file,
+ ctx);
+ if (ret)
+ goto out;
+ }
+
+ if (unlikely(should_try_to_use_wimboot_hash(inode, ctx))) {
+ ret = try_to_use_wimboot_hash(h, inode, ctx);
+ if (ret)
+ goto out;
+ }
+
+ set_sort_key(inode, sort_key);
+
+ if (inode_is_directory(inode) && recursive) {
+
+ /* Directory: recurse to children. */
+
+ /* Re-open the directory with FILE_LIST_DIRECTORY access. */
+ if (h) {
+ NtClose(h);
+ h = NULL;
+ }
+ status = winnt_openat(cur_dir, relative_path,
+ relative_path_nchars, FILE_LIST_DIRECTORY,
+ &h);
+ if (!NT_SUCCESS(status)) {
+ winnt_error(status, L"\"%ls\": Can't open directory",
+ printable_path(ctx));
+ ret = WIMLIB_ERR_OPEN;
+ goto out;
+ }
+ ret = winnt_recurse_directory(h, root, ctx);
+ if (ret)
+ goto out;
+ }
+
+out_progress:
+ ret = 0;
+ if (recursive) { /* if !recursive, caller handles progress */
+ if (likely(root))
+ ret = do_scan_progress(ctx->params,
+ WIMLIB_SCAN_DENTRY_OK, inode);
+ else
+ ret = do_scan_progress(ctx->params,
+ WIMLIB_SCAN_DENTRY_EXCLUDED,
+ NULL);
+ }
+out:
+ if (likely(h))
+ NtClose(h);
+ if (unlikely(ret)) {
+ free_dentry_tree(root, ctx->params->blob_table);
+ root = NULL;
+ ret = report_scan_error(ctx->params, ret);
+ }
+ *root_ret = root;
+ return ret;
+}
+
+static void
+winnt_do_scan_warnings(const wchar_t *path, const struct winnt_scan_ctx *ctx)
+{
+ if (likely(ctx->num_get_sacl_priv_notheld == 0 &&
+ ctx->num_get_sd_access_denied == 0))
+ return;
+
+ WARNING("Scan of \"%ls\" complete, but with one or more warnings:", path);
+ if (ctx->num_get_sacl_priv_notheld != 0) {
+ WARNING("- Could not capture SACL (System Access Control List)\n"
+ " on %lu files or directories.",
+ ctx->num_get_sacl_priv_notheld);
+ }
+ if (ctx->num_get_sd_access_denied != 0) {
+ WARNING("- Could not capture security descriptor at all\n"
+ " on %lu files or directories.",
+ ctx->num_get_sd_access_denied);
+ }
+ WARNING("To fully capture all security descriptors, run the program\n"
+ " with Administrator rights.");
+}
+
+/*----------------------------------------------------------------------------*
+ * Fast MFT scan implementation *
+ *----------------------------------------------------------------------------*/
+
+#define ENABLE_FAST_MFT_SCAN 1
+
+#ifdef ENABLE_FAST_MFT_SCAN
+
+typedef struct {
+ u64 StartingCluster;
+ u64 ClusterCount;
+} CLUSTER_RANGE;
+
+typedef struct {
+ u64 StartingFileReferenceNumber;
+ u64 EndingFileReferenceNumber;
+} FILE_REFERENCE_RANGE;
+
+/* The FSCTL_QUERY_FILE_LAYOUT ioctl. This ioctl can be used on Windows 8 and
+ * later to scan the MFT of an NTFS volume. */
+#define FSCTL_QUERY_FILE_LAYOUT CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 157, METHOD_NEITHER, FILE_ANY_ACCESS)
+
+/* The input to FSCTL_QUERY_FILE_LAYOUT */
+typedef struct {
+ u32 NumberOfPairs;
+#define QUERY_FILE_LAYOUT_RESTART 0x00000001
+#define QUERY_FILE_LAYOUT_INCLUDE_NAMES 0x00000002
+#define QUERY_FILE_LAYOUT_INCLUDE_STREAMS 0x00000004
+#define QUERY_FILE_LAYOUT_INCLUDE_EXTENTS 0x00000008
+#define QUERY_FILE_LAYOUT_INCLUDE_EXTRA_INFO 0x00000010
+#define QUERY_FILE_LAYOUT_INCLUDE_STREAMS_WITH_NO_CLUSTERS_ALLOCATED 0x00000020
+ u32 Flags;
+#define QUERY_FILE_LAYOUT_FILTER_TYPE_NONE 0
+#define QUERY_FILE_LAYOUT_FILTER_TYPE_CLUSTERS 1
+#define QUERY_FILE_LAYOUT_FILTER_TYPE_FILEID 2
+#define QUERY_FILE_LAYOUT_NUM_FILTER_TYPES 3
+ u32 FilterType;
+ u32 Reserved;
+ union {
+ CLUSTER_RANGE ClusterRanges[1];
+ FILE_REFERENCE_RANGE FileReferenceRanges[1];
+ } Filter;
+} QUERY_FILE_LAYOUT_INPUT;
+
+/* The header of the buffer returned by FSCTL_QUERY_FILE_LAYOUT */
+typedef struct {
+ u32 FileEntryCount;
+ u32 FirstFileOffset;
+#define QUERY_FILE_LAYOUT_SINGLE_INSTANCED 0x00000001
+ u32 Flags;
+ u32 Reserved;
+} QUERY_FILE_LAYOUT_OUTPUT;
+
+/* Inode information returned by FSCTL_QUERY_FILE_LAYOUT */
+typedef struct {
+ u32 Version;
+ u32 NextFileOffset;
+ u32 Flags;
+ u32 FileAttributes;
+ u64 FileReferenceNumber;
+ u32 FirstNameOffset;
+ u32 FirstStreamOffset;
+ u32 ExtraInfoOffset;
+ u32 Reserved;
+} FILE_LAYOUT_ENTRY;
+
+/* Extra inode information returned by FSCTL_QUERY_FILE_LAYOUT */
+typedef struct {
+ struct {
+ u64 CreationTime;
+ u64 LastAccessTime;
+ u64 LastWriteTime;
+ u64 ChangeTime;
+ u32 FileAttributes;
+ } BasicInformation;
+ u32 OwnerId;
+ u32 SecurityId;
+ s64 Usn;
+} FILE_LAYOUT_INFO_ENTRY;
+
+/* Filename (or dentry) information returned by FSCTL_QUERY_FILE_LAYOUT */
+typedef struct {
+ u32 NextNameOffset;
+#define FILE_LAYOUT_NAME_ENTRY_PRIMARY 0x00000001
+#define FILE_LAYOUT_NAME_ENTRY_DOS 0x00000002
+ u32 Flags;
+ u64 ParentFileReferenceNumber;
+ u32 FileNameLength;
+ u32 Reserved;
+ wchar_t FileName[1];
+} FILE_LAYOUT_NAME_ENTRY;
+
+/* Stream information returned by FSCTL_QUERY_FILE_LAYOUT */
+typedef struct {
+ u32 Version;
+ u32 NextStreamOffset;
+#define STREAM_LAYOUT_ENTRY_IMMOVABLE 0x00000001
+#define STREAM_LAYOUT_ENTRY_PINNED 0x00000002
+#define STREAM_LAYOUT_ENTRY_RESIDENT 0x00000004
+#define STREAM_LAYOUT_ENTRY_NO_CLUSTERS_ALLOCATED 0x00000008
+ u32 Flags;
+ u32 ExtentInformationOffset;
+ u64 AllocationSize;
+ u64 EndOfFile;
+ u64 Reserved;
+ u32 AttributeFlags;
+ u32 StreamIdentifierLength;
+ wchar_t StreamIdentifier[1];
+} STREAM_LAYOUT_ENTRY;
+
+
+typedef struct {
+#define STREAM_EXTENT_ENTRY_AS_RETRIEVAL_POINTERS 0x00000001
+#define STREAM_EXTENT_ENTRY_ALL_EXTENTS 0x00000002
+ u32 Flags;
+ union {
+ RETRIEVAL_POINTERS_BUFFER RetrievalPointers;
+ } ExtentInformation;
+} STREAM_EXTENT_ENTRY;
+
+/* Extract the MFT number part of the full inode number */
+#define NTFS_MFT_NO(ref) ((ref) & (((u64)1 << 48) - 1))
+
+/* Is the file the root directory of the NTFS volume? The root directory always
+ * occupies MFT record 5. */
+#define NTFS_IS_ROOT_FILE(ino) (NTFS_MFT_NO(ino) == 5)
+
+/* Is the file a special NTFS file, other than the root directory? The special
+ * files are the first 16 records in the MFT. */
+#define NTFS_IS_SPECIAL_FILE(ino) \
+ (NTFS_MFT_NO(ino) <= 15 && !NTFS_IS_ROOT_FILE(ino))
+
+#define NTFS_SPECIAL_STREAM_OBJECT_ID 0x00000001
+#define NTFS_SPECIAL_STREAM_EA 0x00000002
+#define NTFS_SPECIAL_STREAM_EA_INFORMATION 0x00000004
+
+/* Intermediate inode structure. This is used to temporarily save information
+ * from FSCTL_QUERY_FILE_LAYOUT before creating the full 'struct wim_inode'. */
+struct ntfs_inode {
+ struct avl_tree_node index_node;
+ u64 ino;
+ u64 creation_time;
+ u64 last_access_time;
+ u64 last_write_time;
+ u64 starting_lcn;
+ u32 attributes;
+ u32 security_id;
+ u32 num_aliases;
+ u32 num_streams;
+ u32 special_streams;
+ u32 first_stream_offset;
+ struct ntfs_dentry *first_child;
+ wchar_t short_name[13];
+};
+
+/* Intermediate dentry structure. This is used to temporarily save information
+ * from FSCTL_QUERY_FILE_LAYOUT before creating the full 'struct wim_dentry'. */
+struct ntfs_dentry {
+ u32 offset_from_inode : 31;
+ u32 is_primary : 1;
+ union {
+ /* Note: build_children_lists() replaces 'parent_ino' with
+ * 'next_child'. */
+ u64 parent_ino;
+ struct ntfs_dentry *next_child;
+ };
+ wchar_t name[0];
+};
+
+/* Intermediate stream structure. This is used to temporarily save information
+ * from FSCTL_QUERY_FILE_LAYOUT before creating the full 'struct
+ * wim_inode_stream'. */
+struct ntfs_stream {
+ u64 size;
+ wchar_t name[0];
+};
+
+/* Map of all known NTFS inodes, keyed by inode number */
+struct ntfs_inode_map {
+ struct avl_tree_node *root;
+};
+
+#define NTFS_INODE(node) \
+ avl_tree_entry((node), struct ntfs_inode, index_node)
+
+#define SKIP_ALIGNED(p, size) ((void *)(p) + ALIGN((size), 8))
+
+/* Get a pointer to the first dentry of the inode. */
+#define FIRST_DENTRY(ni) SKIP_ALIGNED((ni), sizeof(struct ntfs_inode))
+
+/* Get a pointer to the first stream of the inode. */
+#define FIRST_STREAM(ni) ((const void *)ni + ni->first_stream_offset)
+
+/* Advance to the next dentry of the inode. */
+#define NEXT_DENTRY(nd) SKIP_ALIGNED((nd), sizeof(struct ntfs_dentry) + \
+ (wcslen((nd)->name) + 1) * sizeof(wchar_t))
+
+/* Advance to the next stream of the inode. */
+#define NEXT_STREAM(ns) SKIP_ALIGNED((ns), sizeof(struct ntfs_stream) + \
+ (wcslen((ns)->name) + 1) * sizeof(wchar_t))
+
+static int
+_avl_cmp_ntfs_inodes(const struct avl_tree_node *node1,
+ const struct avl_tree_node *node2)
+{
+ return cmp_u64(NTFS_INODE(node1)->ino, NTFS_INODE(node2)->ino);
+}
+
+/* Adds an NTFS inode to the map. */
+static void
+ntfs_inode_map_add_inode(struct ntfs_inode_map *map, struct ntfs_inode *ni)
+{
+ if (avl_tree_insert(&map->root, &ni->index_node, _avl_cmp_ntfs_inodes)) {
+ WARNING("Inode 0x%016"PRIx64" is a duplicate!", ni->ino);
+ FREE(ni);
+ }
+}
+
+/* Find an ntfs_inode in the map by inode number. Returns NULL if not found. */
+static struct ntfs_inode *
+ntfs_inode_map_lookup(struct ntfs_inode_map *map, u64 ino)
+{
+ struct ntfs_inode tmp;
+ struct avl_tree_node *res;
+
+ tmp.ino = ino;
+ res = avl_tree_lookup_node(map->root, &tmp.index_node, _avl_cmp_ntfs_inodes);
+ if (!res)
+ return NULL;
+ return NTFS_INODE(res);
+}
+
+/* Remove an ntfs_inode from the map and free it. */
+static void
+ntfs_inode_map_remove(struct ntfs_inode_map *map, struct ntfs_inode *ni)
+{
+ avl_tree_remove(&map->root, &ni->index_node);
+ FREE(ni);
+}
+
+/* Free all ntfs_inodes in the map. */
+static void
+ntfs_inode_map_destroy(struct ntfs_inode_map *map)
+{
+ struct ntfs_inode *ni;
+
+ avl_tree_for_each_in_postorder(ni, map->root, struct ntfs_inode, index_node)
+ FREE(ni);
+}
+
+static bool
+file_has_streams(const FILE_LAYOUT_ENTRY *file)
+{
+ return (file->FirstStreamOffset != 0) &&
+ !(file->FileAttributes & FILE_ATTRIBUTE_ENCRYPTED);
+}
+
+static bool
+is_valid_name_entry(const FILE_LAYOUT_NAME_ENTRY *name)
+{
+ return name->FileNameLength > 0 &&
+ name->FileNameLength % 2 == 0 &&
+ !wmemchr(name->FileName, L'\0', name->FileNameLength / 2) &&
+ (!(name->Flags & FILE_LAYOUT_NAME_ENTRY_DOS) ||
+ name->FileNameLength <= 24);
+}
+
+/* Validate the FILE_LAYOUT_NAME_ENTRYs of the specified file and compute the
+ * total length in bytes of the ntfs_dentry structures needed to hold the name
+ * information. */
+static int
+validate_names_and_compute_total_length(const FILE_LAYOUT_ENTRY *file,
+ size_t *total_length_ret)
+{
+ const FILE_LAYOUT_NAME_ENTRY *name =
+ (const void *)file + file->FirstNameOffset;
+ size_t total = 0;
+ size_t num_long_names = 0;
+
+ for (;;) {
+ if (unlikely(!is_valid_name_entry(name))) {
+ ERROR("Invalid FILE_LAYOUT_NAME_ENTRY! "
+ "FileReferenceNumber=0x%016"PRIx64", "
+ "FileNameLength=%"PRIu32", "
+ "FileName=%.*ls, Flags=0x%08"PRIx32,
+ file->FileReferenceNumber,
+ name->FileNameLength,
+ (int)(name->FileNameLength / 2),
+ name->FileName, name->Flags);
+ return WIMLIB_ERR_UNSUPPORTED;
+ }
+ if (name->Flags != FILE_LAYOUT_NAME_ENTRY_DOS) {
+ num_long_names++;
+ total += ALIGN(sizeof(struct ntfs_dentry) +
+ name->FileNameLength + sizeof(wchar_t),
+ 8);
+ }
+ if (name->NextNameOffset == 0)
+ break;
+ name = (const void *)name + name->NextNameOffset;
+ }
+
+ if (unlikely(num_long_names == 0)) {
+ ERROR("Inode 0x%016"PRIx64" has no long names!",
+ file->FileReferenceNumber);
+ return WIMLIB_ERR_UNSUPPORTED;
+ }
+
+ *total_length_ret = total;
+ return 0;