*/
/*
- * Copyright (C) 2012-2018 Eric Biggers
+ * Copyright (C) 2012-2021 Eric Biggers
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
IMAGEX_PRESERVE_DIR_STRUCTURE_OPTION,
IMAGEX_REBUILD_OPTION,
IMAGEX_RECOMPRESS_OPTION,
+ IMAGEX_RECOVER_DATA_OPTION,
IMAGEX_RECURSIVE_OPTION,
IMAGEX_REF_OPTION,
IMAGEX_RPFIX_OPTION,
{T("include-invalid-names"), no_argument, NULL, IMAGEX_INCLUDE_INVALID_NAMES_OPTION},
{T("wimboot"), no_argument, NULL, IMAGEX_WIMBOOT_OPTION},
{T("compact"), required_argument, NULL, IMAGEX_COMPACT_OPTION},
+ {T("recover-data"), no_argument, NULL, IMAGEX_RECOVER_DATA_OPTION},
{NULL, 0, NULL, 0},
};
{T("preserve-dir-structure"), no_argument, NULL, IMAGEX_PRESERVE_DIR_STRUCTURE_OPTION},
{T("wimboot"), no_argument, NULL, IMAGEX_WIMBOOT_OPTION},
{T("compact"), required_argument, NULL, IMAGEX_COMPACT_OPTION},
+ {T("recover-data"), no_argument, NULL, IMAGEX_RECOVER_DATA_OPTION},
{NULL, 0, NULL, 0},
};
if (ret)
goto out_free_refglobs;
break;
+ case IMAGEX_RECOVER_DATA_OPTION:
+ extract_flags |= WIMLIB_EXTRACT_FLAG_RECOVER_DATA;
+ break;
default:
goto out_usage;
}
if (ret)
goto out_free_refglobs;
break;
+ case IMAGEX_RECOVER_DATA_OPTION:
+ extract_flags |= WIMLIB_EXTRACT_FLAG_RECOVER_DATA;
+ break;
default:
goto out_usage;
}
" [--check] [--ref=\"GLOB\"] [--no-acls] [--strict-acls]\n"
" [--no-attributes] [--rpfix] [--norpfix]\n"
" [--include-invalid-names] [--wimboot] [--unix-data]\n"
-" [--compact=FORMAT]\n"
+" [--compact=FORMAT] [--recover-data]\n"
),
[CMD_CAPTURE] =
T(
" %"TS" WIMFILE IMAGE [(PATH | @LISTFILE)...]\n"
" [--check] [--ref=\"GLOB\"] [--dest-dir=CMD_DIR]\n"
" [--to-stdout] [--no-acls] [--strict-acls]\n"
-" [--no-attributes] [--include-invalid-names]\n"
-" [--no-globs] [--nullglob] [--preserve-dir-structure]\n"
+" [--no-attributes] [--include-invalid-names] [--no-globs]\n"
+" [--nullglob] [--preserve-dir-structure] [--recover-data]\n"
),
[CMD_INFO] =
T(
static const tchar * const fmt =
T(
"wimlib-imagex " PACKAGE_VERSION " (using wimlib %"TS")\n"
-"Copyright (C) 2012-2018 Eric Biggers\n"
+"Copyright (C) 2012-2021 Eric Biggers\n"
"License GPLv3+; GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>.\n"
"This is free software: you are free to change and redistribute it.\n"
"There is NO WARRANTY, to the extent permitted by law.\n"
/* Keep in sync with wimlib.h */
#define WIMLIB_EXTRACT_MASK_PUBLIC \
(WIMLIB_EXTRACT_FLAG_NTFS | \
+ WIMLIB_EXTRACT_FLAG_RECOVER_DATA | \
WIMLIB_EXTRACT_FLAG_UNIX_DATA | \
WIMLIB_EXTRACT_FLAG_NO_ACLS | \
WIMLIB_EXTRACT_FLAG_STRICT_ACLS | \
&& (blob->out_refcnt))
{
wim_reshdr_to_desc_and_blob(&reshdr, ctx->wim, &rdesc, blob);
- ret = read_blob_with_sha1(blob, cbs);
+ ret = read_blob_with_sha1(blob, cbs,
+ ctx->extract_flags &
+ WIMLIB_EXTRACT_FLAG_RECOVER_DATA);
blob_unset_is_located_in_wim_resource(blob);
if (ret)
return ret;
for (u32 i = 0; i < orig_blob->out_refcnt; i++) {
tmpfile_blob.inline_blob_extraction_targets[0] = targets[i];
- ret = read_blob_with_cbs(&tmpfile_blob, cbs);
+ ret = read_blob_with_cbs(&tmpfile_blob, cbs, false);
if (ret)
return ret;
}
return 0;
}
+static void
+warn_about_corrupted_file(struct wim_dentry *dentry,
+ const struct wim_inode_stream *stream)
+{
+ WARNING("Corruption in %s\"%"TS"\"! Extracting anyway since data recovery mode is enabled.",
+ stream_is_unnamed_data_stream(stream) ? "" : "alternate stream of ",
+ dentry_full_path(dentry));
+}
+
static int
end_extract_blob(struct blob_descriptor *blob, int status, void *_ctx)
{
struct apply_ctx *ctx = _ctx;
+ if ((ctx->extract_flags & WIMLIB_EXTRACT_FLAG_RECOVER_DATA) &&
+ !status && blob->corrupted) {
+ const struct blob_extraction_target *targets =
+ blob_extraction_targets(blob);
+ for (u32 i = 0; i < blob->out_refcnt; i++) {
+ struct wim_dentry *dentry =
+ inode_first_extraction_dentry(targets[i].inode);
+
+ warn_about_corrupted_file(dentry, targets[i].stream);
+ }
+ }
+
if (unlikely(filedes_valid(&ctx->tmpfile_fd))) {
filedes_close(&ctx->tmpfile_fd);
if (!status)
if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) {
return read_blobs_from_pipe(ctx, &wrapper_cbs);
} else {
+ int flags = VERIFY_BLOB_HASHES;
+
+ if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_RECOVER_DATA)
+ flags |= RECOVER_DATA;
+
return read_blob_list(&ctx->blob_list,
offsetof(struct blob_descriptor,
extraction_list),
- &wrapper_cbs, VERIFY_BLOB_HASHES);
+ &wrapper_cbs, flags);
}
}
* unnamed data stream only. */
static int
extract_dentry_to_stdout(struct wim_dentry *dentry,
- const struct blob_table *blob_table)
+ const struct blob_table *blob_table, int extract_flags)
{
struct wim_inode *inode = dentry->d_inode;
struct blob_descriptor *blob;
struct filedes _stdout;
+ bool recover = (extract_flags & WIMLIB_EXTRACT_FLAG_RECOVER_DATA);
+ int ret;
if (inode->i_attributes & (FILE_ATTRIBUTE_REPARSE_POINT |
FILE_ATTRIBUTE_DIRECTORY |
}
filedes_init(&_stdout, STDOUT_FILENO);
- return extract_blob_to_fd(blob, &_stdout);
+ ret = extract_blob_to_fd(blob, &_stdout, recover);
+ if (ret)
+ return ret;
+ if (recover && blob->corrupted)
+ warn_about_corrupted_file(dentry,
+ inode_get_unnamed_data_stream(inode));
+ return 0;
}
static int
extract_dentries_to_stdout(struct wim_dentry **dentries, size_t num_dentries,
- const struct blob_table *blob_table)
+ const struct blob_table *blob_table,
+ int extract_flags)
{
for (size_t i = 0; i < num_dentries; i++) {
- int ret = extract_dentry_to_stdout(dentries[i], blob_table);
+ int ret = extract_dentry_to_stdout(dentries[i], blob_table,
+ extract_flags);
if (ret)
return ret;
}
if (extract_flags & WIMLIB_EXTRACT_FLAG_TO_STDOUT) {
ret = extract_dentries_to_stdout(trees, num_trees,
- wim->blob_table);
+ wim->blob_table,
+ extract_flags);
goto out;
}
u64 size;
};
+static int
+decompress_chunk(const void *cbuf, u32 chunk_csize, u8 *ubuf, u32 chunk_usize,
+ struct wimlib_decompressor *decompressor, bool recover_data)
+{
+ int res = wimlib_decompress(cbuf, chunk_csize, ubuf, chunk_usize,
+ decompressor);
+ if (likely(res == 0))
+ return 0;
+
+ if (recover_data) {
+ WARNING("Failed to decompress data! Continuing anyway since data recovery mode is enabled.");
+
+ /* Continue on with *something*. In the worst case just use a
+ * zeroed buffer. But, try to fill as much of it with
+ * decompressed data as we can. This works because if the
+ * corruption isn't located right at the beginning of the
+ * compressed chunk, wimlib_decompress() may write some correct
+ * output at the beginning even if it fails later. */
+ memset(ubuf, 0, chunk_usize);
+ (void)wimlib_decompress(cbuf, chunk_csize, ubuf,
+ chunk_usize, decompressor);
+ return 0;
+ }
+ ERROR("Failed to decompress data!");
+ errno = EINVAL;
+ return WIMLIB_ERR_DECOMPRESSION;
+}
+
/*
* Read data from a compressed WIM resource.
*
* the data being read. Each call provides the next chunk of the requested
* data, uncompressed. Each chunk will be nonempty and will not cross
* range boundaries but otherwise will be of unspecified size.
+ * @recover_data
+ * If a chunk can't be fully decompressed due to being corrupted, continue
+ * with whatever data can be recovered rather than return an error.
*
* Possible return values:
*
read_compressed_wim_resource(const struct wim_resource_descriptor * const rdesc,
const struct data_range * const ranges,
const size_t num_ranges,
- const struct consume_chunk_callback *cb)
+ const struct consume_chunk_callback *cb,
+ bool recover_data)
{
int ret;
u64 *chunk_offsets = NULL;
goto read_error;
if (read_buf == cbuf) {
- ret = wimlib_decompress(cbuf,
- chunk_csize,
- ubuf,
- chunk_usize,
- decompressor);
- if (unlikely(ret)) {
- ERROR("Failed to decompress data!");
- ret = WIMLIB_ERR_DECOMPRESSION;
- errno = EINVAL;
+ ret = decompress_chunk(cbuf, chunk_csize,
+ ubuf, chunk_usize,
+ decompressor,
+ recover_data);
+ if (unlikely(ret))
goto out_cleanup;
- }
}
cur_read_offset += chunk_csize;
static int
read_partial_wim_resource(const struct wim_resource_descriptor *rdesc,
const u64 offset, const u64 size,
- const struct consume_chunk_callback *cb)
+ const struct consume_chunk_callback *cb,
+ bool recover_data)
{
if (rdesc->flags & (WIM_RESHDR_FLAG_COMPRESSED |
WIM_RESHDR_FLAG_SOLID))
.offset = offset,
.size = size,
};
- return read_compressed_wim_resource(rdesc, &range, 1, cb);
+ return read_compressed_wim_resource(rdesc, &range, 1, cb,
+ recover_data);
}
/* Uncompressed resource */
return read_partial_wim_resource(blob->rdesc,
blob->offset_in_res + offset,
size,
- &cb);
+ &cb, false);
}
static int
.func = noop_cb,
};
return read_partial_wim_resource(rdesc, 0,
- rdesc->uncompressed_size, &cb);
+ rdesc->uncompressed_size, &cb, false);
}
static int
read_wim_blob_prefix(const struct blob_descriptor *blob, u64 size,
- const struct consume_chunk_callback *cb)
+ const struct consume_chunk_callback *cb, bool recover_data)
{
return read_partial_wim_resource(blob->rdesc, blob->offset_in_res,
- size, cb);
+ size, cb, recover_data);
}
/* This function handles reading blob data that is located in an external file,
* encrypted), so Windows uses its own code for its equivalent case. */
static int
read_file_on_disk_prefix(const struct blob_descriptor *blob, u64 size,
- const struct consume_chunk_callback *cb)
+ const struct consume_chunk_callback *cb,
+ bool recover_data)
{
int ret;
int raw_fd;
#ifdef WITH_FUSE
static int
read_staging_file_prefix(const struct blob_descriptor *blob, u64 size,
- const struct consume_chunk_callback *cb)
+ const struct consume_chunk_callback *cb,
+ bool recover_data)
{
int raw_fd;
struct filedes fd;
* already located in an in-memory buffer. */
static int
read_buffer_prefix(const struct blob_descriptor *blob,
- u64 size, const struct consume_chunk_callback *cb)
+ u64 size, const struct consume_chunk_callback *cb,
+ bool recover_data)
{
if (unlikely(!size))
return 0;
typedef int (*read_blob_prefix_handler_t)(const struct blob_descriptor *blob,
u64 size,
- const struct consume_chunk_callback *cb);
+ const struct consume_chunk_callback *cb,
+ bool recover_data);
/*
* Read the first @size bytes from a generic "blob", which may be located in any
* Returns 0 on success; nonzero on error. A nonzero value will be returned if
* the blob data cannot be successfully read (for a number of different reasons,
* depending on the blob location), or if @cb returned nonzero in which case
- * that error code will be returned.
+ * that error code will be returned. If @recover_data is true, then errors
+ * decompressing chunks in WIM resources will be ignored.
*/
static int
read_blob_prefix(const struct blob_descriptor *blob, u64 size,
- const struct consume_chunk_callback *cb)
+ const struct consume_chunk_callback *cb, bool recover_data)
{
static const read_blob_prefix_handler_t handlers[] = {
[BLOB_IN_WIM] = read_wim_blob_prefix,
wimlib_assert(blob->blob_location < ARRAY_LEN(handlers)
&& handlers[blob->blob_location] != NULL);
wimlib_assert(size <= blob->size);
- return handlers[blob->blob_location](blob, size, cb);
+ return handlers[blob->blob_location](blob, size, cb, recover_data);
}
struct blob_chunk_ctx {
* callbacks (all of which are optional). */
int
read_blob_with_cbs(struct blob_descriptor *blob,
- const struct read_blob_callbacks *cbs)
+ const struct read_blob_callbacks *cbs, bool recover_data)
{
int ret;
struct blob_chunk_ctx ctx = {
if (unlikely(ret))
return ret;
- ret = read_blob_prefix(blob, blob->size, &cb);
+ ret = read_blob_prefix(blob, blob->size, &cb, recover_data);
return call_end_blob(blob, ret, cbs);
}
.func = bufferer_cb,
.ctx = &buf,
};
- return read_blob_prefix(blob, blob->size, &cb);
+ return read_blob_prefix(blob, blob->size, &cb, false);
}
/* Retrieve the full uncompressed data of the specified blob. A buffer large
struct hasher_context *ctx = _ctx;
sha1_init(&ctx->sha_ctx);
+ blob->corrupted = 0;
return call_begin_blob(blob, &ctx->cbs);
}
}
static int
-report_sha1_mismatch_error(const struct blob_descriptor *blob,
- const u8 actual_hash[SHA1_HASH_SIZE])
+report_sha1_mismatch(struct blob_descriptor *blob,
+ const u8 actual_hash[SHA1_HASH_SIZE], bool recover_data)
{
tchar expected_hashstr[SHA1_HASH_SIZE * 2 + 1];
tchar actual_hashstr[SHA1_HASH_SIZE * 2 + 1];
sprint_hash(blob->hash, expected_hashstr);
sprint_hash(actual_hash, actual_hashstr);
+ blob->corrupted = 1;
+
if (blob_is_in_file(blob)) {
ERROR("A file was concurrently modified!\n"
" Path: \"%"TS"\"\n"
blob_file_path(blob), expected_hashstr, actual_hashstr);
return WIMLIB_ERR_CONCURRENT_MODIFICATION_DETECTED;
} else if (blob->blob_location == BLOB_IN_WIM) {
+ #ifdef ENABLE_ERROR_MESSAGES
const struct wim_resource_descriptor *rdesc = blob->rdesc;
- ERROR("A WIM resource is corrupted!\n"
- " WIM file: \"%"TS"\"\n"
- " Blob uncompressed size: %"PRIu64"\n"
- " Resource offset in WIM: %"PRIu64"\n"
- " Resource uncompressed size: %"PRIu64"\n"
- " Resource size in WIM: %"PRIu64"\n"
- " Resource flags: 0x%x%"TS"\n"
- " Resource compression type: %"TS"\n"
- " Resource compression chunk size: %"PRIu32"\n"
- " Expected SHA-1: %"TS"\n"
- " Actual SHA-1: %"TS"\n",
+
+ (recover_data ? wimlib_warning : wimlib_error)(
+ T("A WIM resource is corrupted!\n"
+ " WIM file: \"%"TS"\"\n"
+ " Blob uncompressed size: %"PRIu64"\n"
+ " Resource offset in WIM: %"PRIu64"\n"
+ " Resource uncompressed size: %"PRIu64"\n"
+ " Resource size in WIM: %"PRIu64"\n"
+ " Resource flags: 0x%x%"TS"\n"
+ " Resource compression type: %"TS"\n"
+ " Resource compression chunk size: %"PRIu32"\n"
+ " Expected SHA-1: %"TS"\n"
+ " Actual SHA-1: %"TS"\n"),
rdesc->wim->filename,
blob->size,
rdesc->offset_in_wim,
rdesc->compression_type),
rdesc->chunk_size,
expected_hashstr, actual_hashstr);
+ #endif /* ENABLE_ERROR_MESSAGES */
+ if (recover_data)
+ return 0;
return WIMLIB_ERR_INVALID_RESOURCE_HASH;
} else {
ERROR("File data was concurrently modified!\n"
} else if ((ctx->flags & VERIFY_BLOB_HASHES) &&
unlikely(!hashes_equal(hash, blob->hash)))
{
- ret = report_sha1_mismatch_error(blob, hash);
+ ret = report_sha1_mismatch(blob, hash,
+ ctx->flags & RECOVER_DATA);
goto out_next_cb;
}
ret = 0;
* SHA-1 message digest of the blob. */
int
read_blob_with_sha1(struct blob_descriptor *blob,
- const struct read_blob_callbacks *cbs)
+ const struct read_blob_callbacks *cbs, bool recover_data)
{
struct hasher_context hasher_ctx = {
- .flags = VERIFY_BLOB_HASHES | COMPUTE_MISSING_BLOB_HASHES,
+ .flags = VERIFY_BLOB_HASHES | COMPUTE_MISSING_BLOB_HASHES |
+ (recover_data ? RECOVER_DATA : 0),
.cbs = *cbs,
};
struct read_blob_callbacks hasher_cbs = {
.end_blob = hasher_end_blob,
.ctx = &hasher_ctx,
};
- return read_blob_with_cbs(blob, &hasher_cbs);
+ return read_blob_with_cbs(blob, &hasher_cbs, recover_data);
}
static int
struct blob_descriptor *last_blob,
size_t blob_count,
size_t list_head_offset,
- const struct read_blob_callbacks *sink_cbs)
+ const struct read_blob_callbacks *sink_cbs,
+ bool recover_data)
{
struct data_range *ranges;
bool ranges_malloced;
};
ret = read_compressed_wim_resource(first_blob->rdesc, ranges,
- blob_count, &cb);
+ blob_count, &cb, recover_data);
if (ranges_malloced)
FREE(ranges);
* For all blobs being read that have already had SHA-1 message
* digests computed, calculate the SHA-1 message digest of the read
* data and compare it with the previously computed value. If they
- * do not match, return WIMLIB_ERR_INVALID_RESOURCE_HASH.
+ * do not match, return WIMLIB_ERR_INVALID_RESOURCE_HASH (unless
+ * RECOVER_DATA is also set, in which case just issue a warning).
*
* COMPUTE_MISSING_BLOB_HASHES
* For all blobs being read that have not yet had their SHA-1
* BLOB_LIST_ALREADY_SORTED
* @blob_list is already sorted in sequential order for reading.
*
+ * RECOVER_DATA
+ * Don't consider corrupted blob data to be an error.
+ *
* The callback functions are allowed to delete the current blob from the list
* if necessary.
*
ret = read_blobs_in_solid_resource(blob, blob_last,
blob_count,
list_head_offset,
- sink_cbs);
+ sink_cbs,
+ flags & RECOVER_DATA);
if (ret)
return ret;
continue;
}
}
- ret = read_blob_with_cbs(blob, sink_cbs);
+ ret = read_blob_with_cbs(blob, sink_cbs, flags & RECOVER_DATA);
if (unlikely(ret && ret != BEGIN_BLOB_STATUS_SKIP_BLOB))
return ret;
}
.func = extract_chunk_to_fd,
.ctx = fd,
};
- return read_blob_prefix(blob, size, &cb);
+ return read_blob_prefix(blob, size, &cb, false);
}
/* Extract the full uncompressed contents of the specified blob to the specified
* file descriptor. This checks the SHA-1 message digest. */
int
-extract_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd)
+extract_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd,
+ bool recover_data)
{
struct read_blob_callbacks cbs = {
.continue_blob = extract_blob_chunk_to_fd,
.ctx = fd,
};
- return read_blob_with_sha1(blob, &cbs);
+ return read_blob_with_sha1(blob, &cbs, recover_data);
}
/* Calculate the SHA-1 message digest of a blob and store it in @blob->hash. */
{
static const struct read_blob_callbacks cbs = {
};
- return read_blob_with_sha1(blob, &cbs);
+ return read_blob_with_sha1(blob, &cbs, false);
}
/*