Updated 'mkwinpeimg' to work correctly on images that have a "windows"
(lower case) directory rather than a "Windows" (upper case) directory.
+ Sparse files are now extracted as sparse.
+
On Windows, improved the way in which files deduplicated with Windows'
Data Deduplication feature are captured.
.IP \[bu]
Reparse points other than symbolic links and junction points.
.IP \[bu]
-Certain file attributes such as compression, encryption, and sparseness.
+Certain file attributes such as compression and encryption.
.IP \[bu]
Short (DOS) names for files.
.IP \[bu]
.IP \[bu]
Object IDs.
.PP
-However, there are also several known limitations of the NTFS volume extraction
-mode:
-.IP \[bu] 4
-Encrypted files will not be extracted.
-.IP \[bu]
-Sparse file attributes will not be extracted.
+However, a limitation of the NTFS volume extraction mode is that encrypted files
+will not be extracted.
.PP
Regardless, since almost all information from the WIM image is restored in this
mode, it is possible (and fully supported) to restore an image of an actual
exact metadata and data of the WIM image, for example due to features mentioned
above not being supported by the target filesystem.
.IP \[bu]
-Sparse file attributes will not be extracted.
-.IP \[bu]
Since encrypted files (with FILE_ATTRIBUTE_ENCRYPTED) are not stored in
plaintext in the WIM image, \fBwimlib-imagex\fR cannot restore encrypted
files to filesystems not supporting encryption. Therefore, on such filesystems,
return report_error(ctx->progfunc, ctx->progctx, error_code, path);
}
+extern bool
+detect_sparse_region(const void *data, size_t size, size_t *len_ret);
+
+static inline bool
+maybe_detect_sparse_region(const void *data, size_t size, size_t *len_ret,
+ bool enabled)
+{
+ if (!enabled) {
+ /* Force non-sparse without checking */
+ *len_ret = size;
+ return false;
+ }
+ return detect_sparse_region(data, size, len_ret);
+}
+
#define inode_first_extraction_dentry(inode) \
((inode)->i_first_extraction_alias)
return end_file_phase(ctx, WIMLIB_PROGRESS_MSG_EXTRACT_METADATA);
}
+/* Are all bytes in the specified buffer zero? */
+static bool
+is_all_zeroes(const u8 *p, const size_t size)
+{
+ const u8 * const end = p + size;
+
+ for (; (uintptr_t)p % WORDBYTES && p != end; p++)
+ if (*p)
+ return false;
+
+ for (; end - p >= WORDBYTES; p += WORDBYTES)
+ if (*(const machine_word_t *)p)
+ return false;
+
+ for (; p != end; p++)
+ if (*p)
+ return false;
+
+ return true;
+}
+
+/*
+ * Sparse regions should be detected at the granularity of the filesystem block
+ * size. For now just assume 4096 bytes, which is the default block size on
+ * NTFS and most Linux filesystems.
+ */
+#define SPARSE_UNIT 4096
+
+/*
+ * Detect whether the specified buffer begins with a region of all zero bytes.
+ * Return %true if a zero region was found or %false if a nonzero region was
+ * found, and sets *len_ret to the length of the region. This operates at a
+ * granularity of SPARSE_UNIT bytes, meaning that to extend a zero region, there
+ * must be SPARSE_UNIT zero bytes with no interruption, but to extend a nonzero
+ * region, just one nonzero byte in the next SPARSE_UNIT bytes is sufficient.
+ *
+ * Note: besides compression, the WIM format doesn't yet have a way to
+ * efficiently represent zero regions, so that's why we need to detect them
+ * ourselves. Things will still fall apart badly on extremely large sparse
+ * files, but this is a start...
+ */
+bool
+detect_sparse_region(const void *data, size_t size, size_t *len_ret)
+{
+ const void *p = data;
+ const void * const end = data + size;
+ size_t len = 0;
+ bool zeroes = false;
+
+ while (p != end) {
+ size_t n = min(end - p, SPARSE_UNIT);
+ bool z = is_all_zeroes(p, n);
+
+ if (len != 0 && z != zeroes)
+ break;
+ zeroes = z;
+ len += n;
+ p += n;
+ }
+
+ *len_ret = len;
+ return zeroes;
+}
+
#define PWM_FOUND_WIM_HDR (-1)
/* Read the header for a blob in a pipable WIM. If @pwm_hdr_ret is not NULL,
supported_features->archive_files = 1;
supported_features->compressed_files = 1;
supported_features->not_context_indexed_files = 1;
+ supported_features->sparse_files = 1;
supported_features->named_data_streams = 1;
supported_features->hard_links = 1;
supported_features->reparse_points = 1;
ntfs_inode *open_inodes[MAX_OPEN_FILES];
unsigned num_open_inodes;
+ /* For each currently open attribute, whether we're writing to it in
+ * "sparse" mode or not. */
+ bool is_sparse_attr[MAX_OPEN_FILES];
+
+ /* Whether is_sparse_attr[] is true for any currently open attribute */
+ bool any_sparse_attrs;
+
struct reparse_buffer_disk rpbuf;
u8 *reparse_ptr;
if (!(extract_flags & WIMLIB_EXTRACT_FLAG_NO_ATTRIBUTES)) {
u32 attrib = inode->i_attributes;
- attrib &= ~(FILE_ATTRIBUTE_SPARSE_FILE |
- FILE_ATTRIBUTE_ENCRYPTED);
-
if (ntfs_set_ntfs_attrib(ni, (const char *)&attrib,
sizeof(attrib), 0))
{
struct wim_dentry *one_dentry = inode_first_extraction_dentry(inode);
ntfschar *stream_name;
size_t stream_name_nchars;
- ntfs_attr *attr;
+ ntfs_attr *na;
if (unlikely(strm->stream_type == STREAM_TYPE_REPARSE_POINT)) {
/* This should be ensured by extract_blob_list() */
wimlib_assert(ctx->num_open_attrs < MAX_OPEN_FILES);
- attr = ntfs_attr_open(ni, AT_DATA, stream_name, stream_name_nchars);
- if (!attr) {
+ na = ntfs_attr_open(ni, AT_DATA, stream_name, stream_name_nchars);
+ if (!na) {
ERROR_WITH_ERRNO("Failed to open data stream of \"%s\"",
dentry_full_path(one_dentry));
return WIMLIB_ERR_NTFS_3G;
}
- ctx->open_attrs[ctx->num_open_attrs++] = attr;
- ntfs_attr_truncate_solid(attr, blob->size);
+
+ /*
+ * Note: there are problems with trying to combine compression with
+ * sparseness when extracting. For example, doing ntfs_attr_truncate()
+ * at the end to extend the attribute to its final size actually extends
+ * to a compression block size boundary rather than to the requested
+ * size. Until these problems are solved, we always write the full data
+ * to compressed attributes. We also don't attempt to preallocate space
+ * for compressed attributes, since we don't know how much space they
+ * are going to actually need.
+ */
+ ctx->is_sparse_attr[ctx->num_open_attrs] = false;
+ if (!(na->data_flags & ATTR_COMPRESSION_MASK)) {
+ if (inode->i_attributes & FILE_ATTRIBUTE_SPARSE_FILE) {
+ ctx->is_sparse_attr[ctx->num_open_attrs] = true;
+ ctx->any_sparse_attrs = true;
+ } else {
+ ntfs_attr_truncate_solid(na, blob->size);
+ }
+ }
+ ctx->open_attrs[ctx->num_open_attrs++] = na;
return 0;
}
}
ctx->num_open_inodes = 0;
+ ctx->any_sparse_attrs = false;
ctx->reparse_ptr = NULL;
ctx->num_reparse_inodes = 0;
return ret;
const void *chunk, size_t size, void *_ctx)
{
struct ntfs_3g_apply_ctx *ctx = _ctx;
-
- for (unsigned i = 0; i < ctx->num_open_attrs; i++) {
- if (!ntfs_3g_full_pwrite(ctx->open_attrs[i], offset,
- size, chunk))
- {
- ERROR_WITH_ERRNO("Error writing data to NTFS volume");
- return WIMLIB_ERR_NTFS_3G;
+ const void * const end = chunk + size;
+ const void *p;
+ bool zeroes;
+ size_t len;
+ unsigned i;
+
+ /*
+ * For sparse attributes, only write nonzero regions. This lets the
+ * filesystem use holes to represent zero regions.
+ */
+ for (p = chunk; p != end; p += len, offset += len) {
+ zeroes = maybe_detect_sparse_region(p, end - p, &len,
+ ctx->any_sparse_attrs);
+ for (i = 0; i < ctx->num_open_attrs; i++) {
+ if (!zeroes || !ctx->is_sparse_attr[i]) {
+ if (!ntfs_3g_full_pwrite(ctx->open_attrs[i],
+ offset, len, p))
+ goto err;
+ }
}
}
+
if (ctx->reparse_ptr)
ctx->reparse_ptr = mempcpy(ctx->reparse_ptr, chunk, size);
return 0;
+
+err:
+ ERROR_WITH_ERRNO("Error writing data to NTFS volume");
+ return WIMLIB_ERR_NTFS_3G;
}
static int
goto out;
}
+ /* Extend sparse attributes to their final size. */
+ if (ctx->any_sparse_attrs) {
+ for (unsigned i = 0; i < ctx->num_open_attrs; i++) {
+ if (!ctx->is_sparse_attr[i])
+ continue;
+ if (ntfs_attr_truncate(ctx->open_attrs[i], blob->size))
+ {
+ ERROR_WITH_ERRNO("Error extending attribute to "
+ "final size");
+ ret = WIMLIB_ERR_WRITE;
+ goto out;
+ }
+ }
+ }
+
for (u32 i = 0; i < ctx->num_reparse_inodes; i++) {
ret = ntfs_3g_restore_reparse_point(ctx->ntfs_reparse_inodes[i],
ctx->wim_reparse_inodes[i],
unix_get_supported_features(const char *target,
struct wim_features *supported_features)
{
+ supported_features->sparse_files = 1;
supported_features->hard_links = 1;
supported_features->symlink_reparse_points = 1;
supported_features->unix_data = 1;
* the beginning of the array. */
unsigned num_open_fds;
+ /* For each currently open file, whether we're writing to it in "sparse"
+ * mode or not. */
+ bool is_sparse_file[MAX_OPEN_FILES];
+
+ /* Whether is_sparse_file[] is true for any currently open file */
+ bool any_sparse_files;
+
/* Buffer for reading reparse point data into memory */
u8 reparse_data[REPARSE_DATA_MAX_SIZE];
for (unsigned i = offset; i < ctx->num_open_fds; i++)
filedes_close(&ctx->open_fds[i]);
ctx->num_open_fds = 0;
+ ctx->any_sparse_files = false;
}
static int
ERROR_WITH_ERRNO("Can't create regular file \"%s\"", first_path);
return WIMLIB_ERR_OPEN;
}
- filedes_init(&ctx->open_fds[ctx->num_open_fds++], fd);
+ if (inode->i_attributes & FILE_ATTRIBUTE_SPARSE_FILE) {
+ ctx->is_sparse_file[ctx->num_open_fds] = true;
+ ctx->any_sparse_files = true;
+ } else {
+ ctx->is_sparse_file[ctx->num_open_fds] = false;
#ifdef HAVE_POSIX_FALLOCATE
- posix_fallocate(fd, 0, blob->size);
+ posix_fallocate(fd, 0, blob->size);
#endif
+ }
+ filedes_init(&ctx->open_fds[ctx->num_open_fds++], fd);
return unix_create_hardlinks(inode, first_dentry, first_path, ctx);
}
const void *chunk, size_t size, void *_ctx)
{
struct unix_apply_ctx *ctx = _ctx;
+ const void * const end = chunk + size;
+ const void *p;
+ bool zeroes;
+ size_t len;
+ unsigned i;
int ret;
- for (unsigned i = 0; i < ctx->num_open_fds; i++) {
- ret = full_write(&ctx->open_fds[i], chunk, size);
- if (ret) {
- ERROR_WITH_ERRNO("Error writing data to filesystem");
- return ret;
+ /*
+ * For sparse files, only write nonzero regions. This lets the
+ * filesystem use holes to represent zero regions.
+ */
+ for (p = chunk; p != end; p += len, offset += len) {
+ zeroes = maybe_detect_sparse_region(p, end - p, &len,
+ ctx->any_sparse_files);
+ for (i = 0; i < ctx->num_open_fds; i++) {
+ if (!zeroes || !ctx->is_sparse_file[i]) {
+ ret = full_pwrite(&ctx->open_fds[i],
+ p, len, offset);
+ if (ret)
+ goto err;
+ }
}
}
+
if (ctx->reparse_ptr)
ctx->reparse_ptr = mempcpy(ctx->reparse_ptr, chunk, size);
return 0;
+
+err:
+ ERROR_WITH_ERRNO("Error writing data to filesystem");
+ return ret;
}
/* Called when a blob has been fully read for extraction */
if (ret)
break;
} else {
- /* Set metadata on regular file just before closing it.
- */
struct filedes *fd = &ctx->open_fds[j];
+ /* If the file is sparse, extend it to its final size. */
+ if (ctx->is_sparse_file[j] && ftruncate(fd->fd, blob->size)) {
+ ERROR_WITH_ERRNO("Error extending \"%s\" to final size",
+ unix_build_inode_extraction_path(inode, ctx));
+ ret = WIMLIB_ERR_WRITE;
+ break;
+ }
+
+ /* Set metadata on regular file just before closing. */
ret = unix_set_metadata(fd->fd, inode, NULL, ctx);
if (ret)
break;
* beginning of the array) */
unsigned num_open_handles;
+ /* For each currently open stream, whether we're writing to it in
+ * "sparse" mode or not. */
+ bool is_sparse_stream[MAX_OPEN_FILES];
+
+ /* Whether is_sparse_stream[] is true for any currently open stream */
+ bool any_sparse_streams;
+
/* List of dentries, joined by @d_tmp_list, that need to have reparse
* data extracted as soon as the whole blob has been read into
* @data_buffer. */
supported_features->not_context_indexed_files = 1;
- /* Don't do anything with FILE_SUPPORTS_SPARSE_FILES. */
+ if (vol_flags & FILE_SUPPORTS_SPARSE_FILES)
+ supported_features->sparse_files = 1;
if (vol_flags & FILE_NAMED_STREAMS)
supported_features->named_data_streams = 1;
return WIMLIB_ERR_SET_ATTRIBUTES;
}
+static bool
+need_sparse_flag(const struct wim_inode *inode,
+ const struct win32_apply_ctx *ctx)
+{
+ return (inode->i_attributes & FILE_ATTRIBUTE_SPARSE_FILE) &&
+ ctx->common.supported_features.sparse_files;
+}
+
+static int
+set_sparse_flag(HANDLE h, struct win32_apply_ctx *ctx)
+{
+ NTSTATUS status;
+
+ status = winnt_fsctl(h, FSCTL_SET_SPARSE, NULL, 0, NULL, 0, NULL);
+ if (NT_SUCCESS(status))
+ return 0;
+
+ winnt_error(status, L"Can't set sparse flag on \"%ls\"",
+ current_path(ctx));
+ return WIMLIB_ERR_SET_ATTRIBUTES;
+}
+
/* Try to enable short name support on the target volume. If successful, return
* true. If unsuccessful, issue a warning and return false. */
static bool
if (ret)
goto out_close;
+ if (need_sparse_flag(dentry->d_inode, ctx)) {
+ ret = set_sparse_flag(h, ctx);
+ if (ret)
+ goto out_close;
+ }
+
ret = create_empty_streams(dentry, ctx);
if (ret)
goto out_close;
const struct wim_inode_stream *strm,
struct win32_apply_ctx *ctx)
{
- FILE_ALLOCATION_INFORMATION alloc_info;
HANDLE h;
NTSTATUS status;
return WIMLIB_ERR_OPEN;
}
+ ctx->is_sparse_stream[ctx->num_open_handles] = false;
+ if (need_sparse_flag(dentry->d_inode, ctx)) {
+ /* If the stream is unnamed, then the sparse flag was already
+ * set when the file was created. But if the stream is named,
+ * then we need to set the sparse flag here. */
+ if (unlikely(stream_is_named(strm))) {
+ int ret = set_sparse_flag(h, ctx);
+ if (ret) {
+ NtClose(h);
+ return ret;
+ }
+ }
+ ctx->is_sparse_stream[ctx->num_open_handles] = true;
+ ctx->any_sparse_streams = true;
+ } else {
+ /* Allocate space for the data. */
+ FILE_ALLOCATION_INFORMATION info =
+ { .AllocationSize = { .QuadPart = blob->size }};
+ NtSetInformationFile(h, &ctx->iosb, &info, sizeof(info),
+ FileAllocationInformation);
+ }
ctx->open_handles[ctx->num_open_handles++] = h;
-
- /* Allocate space for the data. */
- alloc_info.AllocationSize.QuadPart = blob->size;
- NtSetInformationFile(h, &ctx->iosb, &alloc_info, sizeof(alloc_info),
- FileAllocationInformation);
return 0;
}
ctx->num_open_handles = 0;
ctx->data_buffer_ptr = NULL;
+ ctx->any_sparse_streams = false;
INIT_LIST_HEAD(&ctx->reparse_dentries);
INIT_LIST_HEAD(&ctx->encrypted_dentries);
return ret;
}
+static int
+pwrite_to_handle(HANDLE h, const void *data, size_t size, u64 offset)
+{
+ const void * const end = data + size;
+ const void *p;
+ IO_STATUS_BLOCK iosb;
+ NTSTATUS status;
+
+ for (p = data; p != end; p += iosb.Information,
+ offset += iosb.Information)
+ {
+ LARGE_INTEGER offs = { .QuadPart = offset };
+
+ status = NtWriteFile(h, NULL, NULL, NULL, &iosb,
+ (void *)p, min(INT32_MAX, end - p),
+ &offs, NULL);
+ if (!NT_SUCCESS(status)) {
+ winnt_error(status,
+ L"Error writing data to target volume");
+ return WIMLIB_ERR_WRITE;
+ }
+ }
+ return 0;
+}
+
/* Called when the next chunk of a blob has been read for extraction */
static int
win32_extract_chunk(const struct blob_descriptor *blob, u64 offset,
const void *chunk, size_t size, void *_ctx)
{
struct win32_apply_ctx *ctx = _ctx;
+ const void * const end = chunk + size;
+ const void *p;
+ bool zeroes;
+ size_t len;
+ unsigned i;
+ int ret;
- /* Write the data chunk to each open handle */
- for (unsigned i = 0; i < ctx->num_open_handles; i++) {
- u8 *bufptr = (u8 *)chunk;
- size_t bytes_remaining = size;
- NTSTATUS status;
- while (bytes_remaining) {
- ULONG count = min(0xFFFFFFFF, bytes_remaining);
-
- status = NtWriteFile(ctx->open_handles[i],
- NULL, NULL, NULL,
- &ctx->iosb, bufptr, count,
- NULL, NULL);
- if (!NT_SUCCESS(status)) {
- winnt_error(status, L"Error writing data to target volume");
- return WIMLIB_ERR_WRITE;
+ /*
+ * For sparse streams, only write nonzero regions. This lets the
+ * filesystem use holes to represent zero regions.
+ */
+ for (p = chunk; p != end; p += len, offset += len) {
+ zeroes = maybe_detect_sparse_region(p, end - p, &len,
+ ctx->any_sparse_streams);
+ for (i = 0; i < ctx->num_open_handles; i++) {
+ if (!zeroes || !ctx->is_sparse_stream[i]) {
+ ret = pwrite_to_handle(ctx->open_handles[i],
+ p, len, offset);
+ if (ret)
+ return ret;
}
- bufptr += ctx->iosb.Information;
- bytes_remaining -= ctx->iosb.Information;
}
}
int ret;
const struct wim_dentry *dentry;
+ /* Extend sparse streams to their final size. */
+ if (ctx->any_sparse_streams && !status) {
+ for (unsigned i = 0; i < ctx->num_open_handles; i++) {
+ FILE_END_OF_FILE_INFORMATION info =
+ { .EndOfFile = { .QuadPart = blob->size } };
+ NTSTATUS ntstatus;
+
+ if (!ctx->is_sparse_stream[i])
+ continue;
+
+ ntstatus = NtSetInformationFile(ctx->open_handles[i],
+ &ctx->iosb,
+ &info, sizeof(info),
+ FileEndOfFileInformation);
+ if (!NT_SUCCESS(ntstatus)) {
+ winnt_error(ntstatus, L"Error writing data to "
+ "target volume (while extending)");
+ status = WIMLIB_ERR_WRITE;
+ break;
+ }
+ }
+ }
+
close_handles(ctx);
if (status)