From 571e0b066fe7526690d68a5223d1025604c6e48b Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 6 Apr 2021 21:31:48 -0700 Subject: [PATCH] Add support for a data recovery mode Add support for extracting file data even if it is corrupted (i.e. if its hash doesn't match or some of its chunks can't be decompressed). This isn't recommended for general use, but it could be useful for recovering data from a corrupted WIM archive. --- Makefile.am | 3 +- NEWS | 6 ++ doc/man1/wimapply.1 | 17 ++- doc/man1/wimextract.1 | 3 + include/wimlib.h | 4 + include/wimlib/blob_table.h | 3 + include/wimlib/ntfs_3g.h | 3 +- include/wimlib/resource.h | 8 +- include/wimlib/win32.h | 3 +- programs/imagex.c | 19 +++- src/extract.c | 56 ++++++++-- src/ntfs-3g_capture.c | 3 +- src/resource.c | 158 ++++++++++++++++++---------- src/win32_capture.c | 3 +- src/write.c | 2 +- tests/test-imagex-capture_and_apply | 14 +++ tests/wims/README | 4 + tests/wims/corrupted_file_1.wim | Bin 0 -> 1324 bytes tests/wims/corrupted_file_2.wim | Bin 0 -> 1280 bytes 19 files changed, 229 insertions(+), 80 deletions(-) create mode 100644 tests/wims/corrupted_file_1.wim create mode 100644 tests/wims/corrupted_file_2.wim diff --git a/Makefile.am b/Makefile.am index 85b038b5..391b09ca 100644 --- a/Makefile.am +++ b/Makefile.am @@ -340,7 +340,8 @@ EXTRA_DIST += \ tests/security_descriptor_1.base64 \ tests/security_descriptor_1.bin \ tests/security_descriptor_2.base64 \ - tests/security_descriptor_2.bin + tests/security_descriptor_2.bin \ + tests/wims if WINDOWS_NATIVE_BUILD # Tests are run manually for Windows builds. diff --git a/NEWS b/NEWS index 3ac38c16..69f8ce03 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,12 @@ Version 1.13.4-BETA1: wimsplit now prints progress messages regularly rather than just once per WIM part. + Added support for a data recovery mode which causes files to be + extracted even if they are corrupted. The option is --recover-data for + wimapply and wimextract, and WIMLIB_EXTRACT_FLAG_RECOVER_DATA for the + library. Note that this option won't help with all types of corruption; + some types of corruption will still cause a fatal error. + Version 1.13.3: On Windows, improved performance of capturing an entire drive in some cases. diff --git a/doc/man1/wimapply.1 b/doc/man1/wimapply.1 index 4ae339f0..ebd8f7eb 100644 --- a/doc/man1/wimapply.1 +++ b/doc/man1/wimapply.1 @@ -355,15 +355,22 @@ files matching any of the patterns in this section will not be compressed. In addition, wimlib has a hardcoded list of files for which it knows, for compatibility with the Windows bootloader, to override the requested compression format. +.TP +\fB--recover-data\fR +If a file is corrupted (its stored hash doesn't match its actual hash, or some +parts of it can't be decompressed), extract the corrupted file anyway with a +warning, rather than aborting with an error. This may be useful to recover data +if a WIM archive was corrupted. Note that recovering data is not guaranteed to +succeed, as it depends on the type of corruption that occurred. .SH NOTES \fIData integrity\fR: WIM files include checksums of file data. To detect accidental (non-malicious) data corruption, wimlib calculates the checksum of every file it extracts and issues an error if it does not have the expected -value. (This default behavior seems equivalent to the \fB/verify\fR option of -ImageX.) In addition, a WIM file can include an integrity table (extra -checksums) over the raw data of the entire WIM file. For performance reasons -wimlib does not check the integrity table by default, but the \fB--check\fR -option can be passed to make it do so. +value, unless the \fB--recover-data\fR option is given. (This default behavior +seems equivalent to the \fB/verify\fR option of ImageX.) In addition, a WIM +file can include an integrity table (extra checksums) over the raw data of the +entire WIM file. For performance reasons wimlib does not check the integrity +table by default, but the \fB--check\fR option can be passed to make it do so. .PP \fIESD files\fR: wimlib can extract files from solid-compressed WIMs, or "ESD" (.esd) files, just like from normal WIM (.wim) files. However, Microsoft diff --git a/doc/man1/wimextract.1 b/doc/man1/wimextract.1 index c402d644..01e87d2d 100644 --- a/doc/man1/wimextract.1 +++ b/doc/man1/wimextract.1 @@ -152,6 +152,9 @@ See the documentation for this option to \fBwimapply\fR(1). .TP \fB--compact\fR=\fIFORMAT\fR See the documentation for this option to \fBwimapply\fR(1). +.TP +\fB--recover-data\fR +See the documentation for this option to \fBwimapply\fR(1). .SH NOTES See \fBwimapply\fR(1) for information about what data and metadata are extracted on UNIX-like systems versus on Windows. diff --git a/include/wimlib.h b/include/wimlib.h index 47c357d0..b1aff629 100644 --- a/include/wimlib.h +++ b/include/wimlib.h @@ -1928,6 +1928,10 @@ typedef int (*wimlib_iterate_lookup_table_callback_t)(const struct wimlib_resour * wimlib_extract_paths() when passed multiple paths. */ #define WIMLIB_EXTRACT_FLAG_NTFS 0x00000001 +/** Since wimlib v1.13.4: Don't consider corrupted files to be an error. Just + * extract them in whatever form we can. */ +#define WIMLIB_EXTRACT_FLAG_RECOVER_DATA 0x00000002 + /** UNIX-like systems only: Extract UNIX-specific metadata captured with * ::WIMLIB_ADD_FLAG_UNIX_DATA. */ #define WIMLIB_EXTRACT_FLAG_UNIX_DATA 0x00000020 diff --git a/include/wimlib/blob_table.h b/include/wimlib/blob_table.h index 610db241..f15c6117 100644 --- a/include/wimlib/blob_table.h +++ b/include/wimlib/blob_table.h @@ -141,6 +141,9 @@ struct blob_descriptor { /* 1 iff the SHA-1 message digest of this blob is unknown. */ u16 unhashed : 1; + /* 1 iff this blob has failed its checksum. */ + u16 corrupted : 1; + /* Temporary fields used when writing blobs; set as documented for * prepare_blob_list_for_write(). */ u16 unique_size : 1; diff --git a/include/wimlib/ntfs_3g.h b/include/wimlib/ntfs_3g.h index 7909e43e..e2ed6d6b 100644 --- a/include/wimlib/ntfs_3g.h +++ b/include/wimlib/ntfs_3g.h @@ -11,7 +11,8 @@ struct ntfs_location; extern int read_ntfs_attribute_prefix(const struct blob_descriptor *blob, u64 size, - const struct consume_chunk_callback *cb); + const struct consume_chunk_callback *cb, + bool recover_data); extern struct ntfs_location * clone_ntfs_location(const struct ntfs_location *loc); diff --git a/include/wimlib/resource.h b/include/wimlib/resource.h index 05ceed70..94e43e5f 100644 --- a/include/wimlib/resource.h +++ b/include/wimlib/resource.h @@ -273,6 +273,7 @@ call_end_blob(struct blob_descriptor *blob, int status, #define VERIFY_BLOB_HASHES 0x1 #define COMPUTE_MISSING_BLOB_HASHES 0x2 #define BLOB_LIST_ALREADY_SORTED 0x4 +#define RECOVER_DATA 0x8 extern int read_blob_list(struct list_head *blob_list, size_t list_head_offset, @@ -280,18 +281,19 @@ read_blob_list(struct list_head *blob_list, size_t list_head_offset, extern int read_blob_with_cbs(struct blob_descriptor *blob, - const struct read_blob_callbacks *cbs); + const struct read_blob_callbacks *cbs, bool recover_data); extern int read_blob_with_sha1(struct blob_descriptor *blob, - const struct read_blob_callbacks *cbs); + const struct read_blob_callbacks *cbs, bool recover_data); extern int extract_blob_prefix_to_fd(struct blob_descriptor *blob, u64 size, struct filedes *fd); extern int -extract_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd); +extract_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd, + bool recover_data); /* Miscellaneous blob functions. */ diff --git a/include/wimlib/win32.h b/include/wimlib/win32.h index 94511a87..71a77107 100644 --- a/include/wimlib/win32.h +++ b/include/wimlib/win32.h @@ -25,7 +25,8 @@ cmp_windows_files(const struct windows_file *file1, extern int read_windows_file_prefix(const struct blob_descriptor *blob, u64 size, - const struct consume_chunk_callback *cb); + const struct consume_chunk_callback *cb, + bool recover_data); extern int win32_global_init(int init_flags); diff --git a/programs/imagex.c b/programs/imagex.c index dd43d75c..59181eb9 100644 --- a/programs/imagex.c +++ b/programs/imagex.c @@ -6,7 +6,7 @@ */ /* - * Copyright (C) 2012-2018 Eric Biggers + * Copyright (C) 2012-2021 Eric Biggers * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -202,6 +202,7 @@ enum { IMAGEX_PRESERVE_DIR_STRUCTURE_OPTION, IMAGEX_REBUILD_OPTION, IMAGEX_RECOMPRESS_OPTION, + IMAGEX_RECOVER_DATA_OPTION, IMAGEX_RECURSIVE_OPTION, IMAGEX_REF_OPTION, IMAGEX_RPFIX_OPTION, @@ -239,6 +240,7 @@ static const struct option apply_options[] = { {T("include-invalid-names"), no_argument, NULL, IMAGEX_INCLUDE_INVALID_NAMES_OPTION}, {T("wimboot"), no_argument, NULL, IMAGEX_WIMBOOT_OPTION}, {T("compact"), required_argument, NULL, IMAGEX_COMPACT_OPTION}, + {T("recover-data"), no_argument, NULL, IMAGEX_RECOVER_DATA_OPTION}, {NULL, 0, NULL, 0}, }; @@ -336,6 +338,7 @@ static const struct option extract_options[] = { {T("preserve-dir-structure"), no_argument, NULL, IMAGEX_PRESERVE_DIR_STRUCTURE_OPTION}, {T("wimboot"), no_argument, NULL, IMAGEX_WIMBOOT_OPTION}, {T("compact"), required_argument, NULL, IMAGEX_COMPACT_OPTION}, + {T("recover-data"), no_argument, NULL, IMAGEX_RECOVER_DATA_OPTION}, {NULL, 0, NULL, 0}, }; @@ -1747,6 +1750,9 @@ imagex_apply(int argc, tchar **argv, int cmd) if (ret) goto out_free_refglobs; break; + case IMAGEX_RECOVER_DATA_OPTION: + extract_flags |= WIMLIB_EXTRACT_FLAG_RECOVER_DATA; + break; default: goto out_usage; } @@ -3294,6 +3300,9 @@ imagex_extract(int argc, tchar **argv, int cmd) if (ret) goto out_free_refglobs; break; + case IMAGEX_RECOVER_DATA_OPTION: + extract_flags |= WIMLIB_EXTRACT_FLAG_RECOVER_DATA; + break; default: goto out_usage; } @@ -4484,7 +4493,7 @@ T( " [--check] [--ref=\"GLOB\"] [--no-acls] [--strict-acls]\n" " [--no-attributes] [--rpfix] [--norpfix]\n" " [--include-invalid-names] [--wimboot] [--unix-data]\n" -" [--compact=FORMAT]\n" +" [--compact=FORMAT] [--recover-data]\n" ), [CMD_CAPTURE] = T( @@ -4517,8 +4526,8 @@ T( " %"TS" WIMFILE IMAGE [(PATH | @LISTFILE)...]\n" " [--check] [--ref=\"GLOB\"] [--dest-dir=CMD_DIR]\n" " [--to-stdout] [--no-acls] [--strict-acls]\n" -" [--no-attributes] [--include-invalid-names]\n" -" [--no-globs] [--nullglob] [--preserve-dir-structure]\n" +" [--no-attributes] [--include-invalid-names] [--no-globs]\n" +" [--nullglob] [--preserve-dir-structure] [--recover-data]\n" ), [CMD_INFO] = T( @@ -4602,7 +4611,7 @@ version(void) static const tchar * const fmt = T( "wimlib-imagex " PACKAGE_VERSION " (using wimlib %"TS")\n" -"Copyright (C) 2012-2018 Eric Biggers\n" +"Copyright (C) 2012-2021 Eric Biggers\n" "License GPLv3+; GNU GPL version 3 or later .\n" "This is free software: you are free to change and redistribute it.\n" "There is NO WARRANTY, to the extent permitted by law.\n" diff --git a/src/extract.c b/src/extract.c index 49564f4f..0e5d4ec4 100644 --- a/src/extract.c +++ b/src/extract.c @@ -72,6 +72,7 @@ /* Keep in sync with wimlib.h */ #define WIMLIB_EXTRACT_MASK_PUBLIC \ (WIMLIB_EXTRACT_FLAG_NTFS | \ + WIMLIB_EXTRACT_FLAG_RECOVER_DATA | \ WIMLIB_EXTRACT_FLAG_UNIX_DATA | \ WIMLIB_EXTRACT_FLAG_NO_ACLS | \ WIMLIB_EXTRACT_FLAG_STRICT_ACLS | \ @@ -310,7 +311,9 @@ read_blobs_from_pipe(struct apply_ctx *ctx, const struct read_blob_callbacks *cb && (blob->out_refcnt)) { wim_reshdr_to_desc_and_blob(&reshdr, ctx->wim, &rdesc, blob); - ret = read_blob_with_sha1(blob, cbs); + ret = read_blob_with_sha1(blob, cbs, + ctx->extract_flags & + WIMLIB_EXTRACT_FLAG_RECOVER_DATA); blob_unset_is_located_in_wim_resource(blob); if (ret) return ret; @@ -504,18 +507,39 @@ extract_from_tmpfile(const tchar *tmpfile_name, for (u32 i = 0; i < orig_blob->out_refcnt; i++) { tmpfile_blob.inline_blob_extraction_targets[0] = targets[i]; - ret = read_blob_with_cbs(&tmpfile_blob, cbs); + ret = read_blob_with_cbs(&tmpfile_blob, cbs, false); if (ret) return ret; } return 0; } +static void +warn_about_corrupted_file(struct wim_dentry *dentry, + const struct wim_inode_stream *stream) +{ + WARNING("Corruption in %s\"%"TS"\"! Extracting anyway since data recovery mode is enabled.", + stream_is_unnamed_data_stream(stream) ? "" : "alternate stream of ", + dentry_full_path(dentry)); +} + static int end_extract_blob(struct blob_descriptor *blob, int status, void *_ctx) { struct apply_ctx *ctx = _ctx; + if ((ctx->extract_flags & WIMLIB_EXTRACT_FLAG_RECOVER_DATA) && + !status && blob->corrupted) { + const struct blob_extraction_target *targets = + blob_extraction_targets(blob); + for (u32 i = 0; i < blob->out_refcnt; i++) { + struct wim_dentry *dentry = + inode_first_extraction_dentry(targets[i].inode); + + warn_about_corrupted_file(dentry, targets[i].stream); + } + } + if (unlikely(filedes_valid(&ctx->tmpfile_fd))) { filedes_close(&ctx->tmpfile_fd); if (!status) @@ -560,10 +584,15 @@ extract_blob_list(struct apply_ctx *ctx, const struct read_blob_callbacks *cbs) if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_FROM_PIPE) { return read_blobs_from_pipe(ctx, &wrapper_cbs); } else { + int flags = VERIFY_BLOB_HASHES; + + if (ctx->extract_flags & WIMLIB_EXTRACT_FLAG_RECOVER_DATA) + flags |= RECOVER_DATA; + return read_blob_list(&ctx->blob_list, offsetof(struct blob_descriptor, extraction_list), - &wrapper_cbs, VERIFY_BLOB_HASHES); + &wrapper_cbs, flags); } } @@ -574,11 +603,13 @@ extract_blob_list(struct apply_ctx *ctx, const struct read_blob_callbacks *cbs) * unnamed data stream only. */ static int extract_dentry_to_stdout(struct wim_dentry *dentry, - const struct blob_table *blob_table) + const struct blob_table *blob_table, int extract_flags) { struct wim_inode *inode = dentry->d_inode; struct blob_descriptor *blob; struct filedes _stdout; + bool recover = (extract_flags & WIMLIB_EXTRACT_FLAG_RECOVER_DATA); + int ret; if (inode->i_attributes & (FILE_ATTRIBUTE_REPARSE_POINT | FILE_ATTRIBUTE_DIRECTORY | @@ -598,15 +629,23 @@ extract_dentry_to_stdout(struct wim_dentry *dentry, } filedes_init(&_stdout, STDOUT_FILENO); - return extract_blob_to_fd(blob, &_stdout); + ret = extract_blob_to_fd(blob, &_stdout, recover); + if (ret) + return ret; + if (recover && blob->corrupted) + warn_about_corrupted_file(dentry, + inode_get_unnamed_data_stream(inode)); + return 0; } static int extract_dentries_to_stdout(struct wim_dentry **dentries, size_t num_dentries, - const struct blob_table *blob_table) + const struct blob_table *blob_table, + int extract_flags) { for (size_t i = 0; i < num_dentries; i++) { - int ret = extract_dentry_to_stdout(dentries[i], blob_table); + int ret = extract_dentry_to_stdout(dentries[i], blob_table, + extract_flags); if (ret) return ret; } @@ -1446,7 +1485,8 @@ extract_trees(WIMStruct *wim, struct wim_dentry **trees, size_t num_trees, if (extract_flags & WIMLIB_EXTRACT_FLAG_TO_STDOUT) { ret = extract_dentries_to_stdout(trees, num_trees, - wim->blob_table); + wim->blob_table, + extract_flags); goto out; } diff --git a/src/ntfs-3g_capture.c b/src/ntfs-3g_capture.c index bc8fed71..1745cbd1 100644 --- a/src/ntfs-3g_capture.c +++ b/src/ntfs-3g_capture.c @@ -117,7 +117,8 @@ open_ntfs_attr(ntfs_inode *ni, const struct ntfs_location *loc) int read_ntfs_attribute_prefix(const struct blob_descriptor *blob, u64 size, - const struct consume_chunk_callback *cb) + const struct consume_chunk_callback *cb, + bool recover_data) { const struct ntfs_location *loc = blob->ntfs_loc; ntfs_volume *vol = loc->volume->vol; diff --git a/src/resource.c b/src/resource.c index 9054b55e..8b6139ad 100644 --- a/src/resource.c +++ b/src/resource.c @@ -83,6 +83,34 @@ struct data_range { u64 size; }; +static int +decompress_chunk(const void *cbuf, u32 chunk_csize, u8 *ubuf, u32 chunk_usize, + struct wimlib_decompressor *decompressor, bool recover_data) +{ + int res = wimlib_decompress(cbuf, chunk_csize, ubuf, chunk_usize, + decompressor); + if (likely(res == 0)) + return 0; + + if (recover_data) { + WARNING("Failed to decompress data! Continuing anyway since data recovery mode is enabled."); + + /* Continue on with *something*. In the worst case just use a + * zeroed buffer. But, try to fill as much of it with + * decompressed data as we can. This works because if the + * corruption isn't located right at the beginning of the + * compressed chunk, wimlib_decompress() may write some correct + * output at the beginning even if it fails later. */ + memset(ubuf, 0, chunk_usize); + (void)wimlib_decompress(cbuf, chunk_csize, ubuf, + chunk_usize, decompressor); + return 0; + } + ERROR("Failed to decompress data!"); + errno = EINVAL; + return WIMLIB_ERR_DECOMPRESSION; +} + /* * Read data from a compressed WIM resource. * @@ -98,6 +126,9 @@ struct data_range { * the data being read. Each call provides the next chunk of the requested * data, uncompressed. Each chunk will be nonempty and will not cross * range boundaries but otherwise will be of unspecified size. + * @recover_data + * If a chunk can't be fully decompressed due to being corrupted, continue + * with whatever data can be recovered rather than return an error. * * Possible return values: * @@ -114,7 +145,8 @@ static int read_compressed_wim_resource(const struct wim_resource_descriptor * const rdesc, const struct data_range * const ranges, const size_t num_ranges, - const struct consume_chunk_callback *cb) + const struct consume_chunk_callback *cb, + bool recover_data) { int ret; u64 *chunk_offsets = NULL; @@ -446,17 +478,12 @@ read_compressed_wim_resource(const struct wim_resource_descriptor * const rdesc, goto read_error; if (read_buf == cbuf) { - ret = wimlib_decompress(cbuf, - chunk_csize, - ubuf, - chunk_usize, - decompressor); - if (unlikely(ret)) { - ERROR("Failed to decompress data!"); - ret = WIMLIB_ERR_DECOMPRESSION; - errno = EINVAL; + ret = decompress_chunk(cbuf, chunk_csize, + ubuf, chunk_usize, + decompressor, + recover_data); + if (unlikely(ret)) goto out_cleanup; - } } cur_read_offset += chunk_csize; @@ -592,7 +619,8 @@ bufferer_cb(const void *chunk, size_t size, void *_ctx) static int read_partial_wim_resource(const struct wim_resource_descriptor *rdesc, const u64 offset, const u64 size, - const struct consume_chunk_callback *cb) + const struct consume_chunk_callback *cb, + bool recover_data) { if (rdesc->flags & (WIM_RESHDR_FLAG_COMPRESSED | WIM_RESHDR_FLAG_SOLID)) @@ -604,7 +632,8 @@ read_partial_wim_resource(const struct wim_resource_descriptor *rdesc, .offset = offset, .size = size, }; - return read_compressed_wim_resource(rdesc, &range, 1, cb); + return read_compressed_wim_resource(rdesc, &range, 1, cb, + recover_data); } /* Uncompressed resource */ @@ -626,7 +655,7 @@ read_partial_wim_blob_into_buf(const struct blob_descriptor *blob, return read_partial_wim_resource(blob->rdesc, blob->offset_in_res + offset, size, - &cb); + &cb, false); } static int @@ -643,15 +672,15 @@ skip_wim_resource(const struct wim_resource_descriptor *rdesc) .func = noop_cb, }; return read_partial_wim_resource(rdesc, 0, - rdesc->uncompressed_size, &cb); + rdesc->uncompressed_size, &cb, false); } static int read_wim_blob_prefix(const struct blob_descriptor *blob, u64 size, - const struct consume_chunk_callback *cb) + const struct consume_chunk_callback *cb, bool recover_data) { return read_partial_wim_resource(blob->rdesc, blob->offset_in_res, - size, cb); + size, cb, recover_data); } /* This function handles reading blob data that is located in an external file, @@ -664,7 +693,8 @@ read_wim_blob_prefix(const struct blob_descriptor *blob, u64 size, * encrypted), so Windows uses its own code for its equivalent case. */ static int read_file_on_disk_prefix(const struct blob_descriptor *blob, u64 size, - const struct consume_chunk_callback *cb) + const struct consume_chunk_callback *cb, + bool recover_data) { int ret; int raw_fd; @@ -684,7 +714,8 @@ read_file_on_disk_prefix(const struct blob_descriptor *blob, u64 size, #ifdef WITH_FUSE static int read_staging_file_prefix(const struct blob_descriptor *blob, u64 size, - const struct consume_chunk_callback *cb) + const struct consume_chunk_callback *cb, + bool recover_data) { int raw_fd; struct filedes fd; @@ -708,7 +739,8 @@ read_staging_file_prefix(const struct blob_descriptor *blob, u64 size, * already located in an in-memory buffer. */ static int read_buffer_prefix(const struct blob_descriptor *blob, - u64 size, const struct consume_chunk_callback *cb) + u64 size, const struct consume_chunk_callback *cb, + bool recover_data) { if (unlikely(!size)) return 0; @@ -717,7 +749,8 @@ read_buffer_prefix(const struct blob_descriptor *blob, typedef int (*read_blob_prefix_handler_t)(const struct blob_descriptor *blob, u64 size, - const struct consume_chunk_callback *cb); + const struct consume_chunk_callback *cb, + bool recover_data); /* * Read the first @size bytes from a generic "blob", which may be located in any @@ -728,11 +761,12 @@ typedef int (*read_blob_prefix_handler_t)(const struct blob_descriptor *blob, * Returns 0 on success; nonzero on error. A nonzero value will be returned if * the blob data cannot be successfully read (for a number of different reasons, * depending on the blob location), or if @cb returned nonzero in which case - * that error code will be returned. + * that error code will be returned. If @recover_data is true, then errors + * decompressing chunks in WIM resources will be ignored. */ static int read_blob_prefix(const struct blob_descriptor *blob, u64 size, - const struct consume_chunk_callback *cb) + const struct consume_chunk_callback *cb, bool recover_data) { static const read_blob_prefix_handler_t handlers[] = { [BLOB_IN_WIM] = read_wim_blob_prefix, @@ -751,7 +785,7 @@ read_blob_prefix(const struct blob_descriptor *blob, u64 size, wimlib_assert(blob->blob_location < ARRAY_LEN(handlers) && handlers[blob->blob_location] != NULL); wimlib_assert(size <= blob->size); - return handlers[blob->blob_location](blob, size, cb); + return handlers[blob->blob_location](blob, size, cb, recover_data); } struct blob_chunk_ctx { @@ -775,7 +809,7 @@ consume_blob_chunk(const void *chunk, size_t size, void *_ctx) * callbacks (all of which are optional). */ int read_blob_with_cbs(struct blob_descriptor *blob, - const struct read_blob_callbacks *cbs) + const struct read_blob_callbacks *cbs, bool recover_data) { int ret; struct blob_chunk_ctx ctx = { @@ -792,7 +826,7 @@ read_blob_with_cbs(struct blob_descriptor *blob, if (unlikely(ret)) return ret; - ret = read_blob_prefix(blob, blob->size, &cb); + ret = read_blob_prefix(blob, blob->size, &cb, recover_data); return call_end_blob(blob, ret, cbs); } @@ -807,7 +841,7 @@ read_blob_into_buf(const struct blob_descriptor *blob, void *buf) .func = bufferer_cb, .ctx = &buf, }; - return read_blob_prefix(blob, blob->size, &cb); + return read_blob_prefix(blob, blob->size, &cb, false); } /* Retrieve the full uncompressed data of the specified blob. A buffer large @@ -955,6 +989,7 @@ hasher_begin_blob(struct blob_descriptor *blob, void *_ctx) struct hasher_context *ctx = _ctx; sha1_init(&ctx->sha_ctx); + blob->corrupted = 0; return call_begin_blob(blob, &ctx->cbs); } @@ -977,8 +1012,8 @@ hasher_continue_blob(const struct blob_descriptor *blob, u64 offset, } static int -report_sha1_mismatch_error(const struct blob_descriptor *blob, - const u8 actual_hash[SHA1_HASH_SIZE]) +report_sha1_mismatch(struct blob_descriptor *blob, + const u8 actual_hash[SHA1_HASH_SIZE], bool recover_data) { tchar expected_hashstr[SHA1_HASH_SIZE * 2 + 1]; tchar actual_hashstr[SHA1_HASH_SIZE * 2 + 1]; @@ -989,6 +1024,8 @@ report_sha1_mismatch_error(const struct blob_descriptor *blob, sprint_hash(blob->hash, expected_hashstr); sprint_hash(actual_hash, actual_hashstr); + blob->corrupted = 1; + if (blob_is_in_file(blob)) { ERROR("A file was concurrently modified!\n" " Path: \"%"TS"\"\n" @@ -997,18 +1034,21 @@ report_sha1_mismatch_error(const struct blob_descriptor *blob, blob_file_path(blob), expected_hashstr, actual_hashstr); return WIMLIB_ERR_CONCURRENT_MODIFICATION_DETECTED; } else if (blob->blob_location == BLOB_IN_WIM) { + #ifdef ENABLE_ERROR_MESSAGES const struct wim_resource_descriptor *rdesc = blob->rdesc; - ERROR("A WIM resource is corrupted!\n" - " WIM file: \"%"TS"\"\n" - " Blob uncompressed size: %"PRIu64"\n" - " Resource offset in WIM: %"PRIu64"\n" - " Resource uncompressed size: %"PRIu64"\n" - " Resource size in WIM: %"PRIu64"\n" - " Resource flags: 0x%x%"TS"\n" - " Resource compression type: %"TS"\n" - " Resource compression chunk size: %"PRIu32"\n" - " Expected SHA-1: %"TS"\n" - " Actual SHA-1: %"TS"\n", + + (recover_data ? wimlib_warning : wimlib_error)( + T("A WIM resource is corrupted!\n" + " WIM file: \"%"TS"\"\n" + " Blob uncompressed size: %"PRIu64"\n" + " Resource offset in WIM: %"PRIu64"\n" + " Resource uncompressed size: %"PRIu64"\n" + " Resource size in WIM: %"PRIu64"\n" + " Resource flags: 0x%x%"TS"\n" + " Resource compression type: %"TS"\n" + " Resource compression chunk size: %"PRIu32"\n" + " Expected SHA-1: %"TS"\n" + " Actual SHA-1: %"TS"\n"), rdesc->wim->filename, blob->size, rdesc->offset_in_wim, @@ -1020,6 +1060,9 @@ report_sha1_mismatch_error(const struct blob_descriptor *blob, rdesc->compression_type), rdesc->chunk_size, expected_hashstr, actual_hashstr); + #endif /* ENABLE_ERROR_MESSAGES */ + if (recover_data) + return 0; return WIMLIB_ERR_INVALID_RESOURCE_HASH; } else { ERROR("File data was concurrently modified!\n" @@ -1058,7 +1101,8 @@ hasher_end_blob(struct blob_descriptor *blob, int status, void *_ctx) } else if ((ctx->flags & VERIFY_BLOB_HASHES) && unlikely(!hashes_equal(hash, blob->hash))) { - ret = report_sha1_mismatch_error(blob, hash); + ret = report_sha1_mismatch(blob, hash, + ctx->flags & RECOVER_DATA); goto out_next_cb; } ret = 0; @@ -1071,10 +1115,11 @@ out_next_cb: * SHA-1 message digest of the blob. */ int read_blob_with_sha1(struct blob_descriptor *blob, - const struct read_blob_callbacks *cbs) + const struct read_blob_callbacks *cbs, bool recover_data) { struct hasher_context hasher_ctx = { - .flags = VERIFY_BLOB_HASHES | COMPUTE_MISSING_BLOB_HASHES, + .flags = VERIFY_BLOB_HASHES | COMPUTE_MISSING_BLOB_HASHES | + (recover_data ? RECOVER_DATA : 0), .cbs = *cbs, }; struct read_blob_callbacks hasher_cbs = { @@ -1083,7 +1128,7 @@ read_blob_with_sha1(struct blob_descriptor *blob, .end_blob = hasher_end_blob, .ctx = &hasher_ctx, }; - return read_blob_with_cbs(blob, &hasher_cbs); + return read_blob_with_cbs(blob, &hasher_cbs, recover_data); } static int @@ -1091,7 +1136,8 @@ read_blobs_in_solid_resource(struct blob_descriptor *first_blob, struct blob_descriptor *last_blob, size_t blob_count, size_t list_head_offset, - const struct read_blob_callbacks *sink_cbs) + const struct read_blob_callbacks *sink_cbs, + bool recover_data) { struct data_range *ranges; bool ranges_malloced; @@ -1141,7 +1187,7 @@ read_blobs_in_solid_resource(struct blob_descriptor *first_blob, }; ret = read_compressed_wim_resource(first_blob->rdesc, ranges, - blob_count, &cb); + blob_count, &cb, recover_data); if (ranges_malloced) FREE(ranges); @@ -1178,7 +1224,8 @@ oom: * For all blobs being read that have already had SHA-1 message * digests computed, calculate the SHA-1 message digest of the read * data and compare it with the previously computed value. If they - * do not match, return WIMLIB_ERR_INVALID_RESOURCE_HASH. + * do not match, return WIMLIB_ERR_INVALID_RESOURCE_HASH (unless + * RECOVER_DATA is also set, in which case just issue a warning). * * COMPUTE_MISSING_BLOB_HASHES * For all blobs being read that have not yet had their SHA-1 @@ -1188,6 +1235,9 @@ oom: * BLOB_LIST_ALREADY_SORTED * @blob_list is already sorted in sequential order for reading. * + * RECOVER_DATA + * Don't consider corrupted blob data to be an error. + * * The callback functions are allowed to delete the current blob from the list * if necessary. * @@ -1273,14 +1323,15 @@ read_blob_list(struct list_head *blob_list, size_t list_head_offset, ret = read_blobs_in_solid_resource(blob, blob_last, blob_count, list_head_offset, - sink_cbs); + sink_cbs, + flags & RECOVER_DATA); if (ret) return ret; continue; } } - ret = read_blob_with_cbs(blob, sink_cbs); + ret = read_blob_with_cbs(blob, sink_cbs, flags & RECOVER_DATA); if (unlikely(ret && ret != BEGIN_BLOB_STATUS_SKIP_BLOB)) return ret; } @@ -1314,19 +1365,20 @@ extract_blob_prefix_to_fd(struct blob_descriptor *blob, u64 size, .func = extract_chunk_to_fd, .ctx = fd, }; - return read_blob_prefix(blob, size, &cb); + return read_blob_prefix(blob, size, &cb, false); } /* Extract the full uncompressed contents of the specified blob to the specified * file descriptor. This checks the SHA-1 message digest. */ int -extract_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd) +extract_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd, + bool recover_data) { struct read_blob_callbacks cbs = { .continue_blob = extract_blob_chunk_to_fd, .ctx = fd, }; - return read_blob_with_sha1(blob, &cbs); + return read_blob_with_sha1(blob, &cbs, recover_data); } /* Calculate the SHA-1 message digest of a blob and store it in @blob->hash. */ @@ -1335,7 +1387,7 @@ sha1_blob(struct blob_descriptor *blob) { static const struct read_blob_callbacks cbs = { }; - return read_blob_with_sha1(blob, &cbs); + return read_blob_with_sha1(blob, &cbs, false); } /* diff --git a/src/win32_capture.c b/src/win32_capture.c index bc05d707..1b088de9 100644 --- a/src/win32_capture.c +++ b/src/win32_capture.c @@ -488,7 +488,8 @@ read_win32_encrypted_file_prefix(const wchar_t *path, bool is_dir, u64 size, * described by @blob. */ int read_windows_file_prefix(const struct blob_descriptor *blob, u64 size, - const struct consume_chunk_callback *cb) + const struct consume_chunk_callback *cb, + bool recover_data) { const struct windows_file *file = blob->windows_file; diff --git a/src/write.c b/src/write.c index bca5e862..e28069eb 100644 --- a/src/write.c +++ b/src/write.c @@ -767,7 +767,7 @@ write_blob_uncompressed(struct blob_descriptor *blob, struct filedes *out_fd) if (filedes_seek(out_fd, begin_offset) == -1) return 0; - ret = extract_blob_to_fd(blob, out_fd); + ret = extract_blob_to_fd(blob, out_fd, false); if (ret) { /* Error reading the uncompressed data. */ if (out_fd->offset == begin_offset && diff --git a/tests/test-imagex-capture_and_apply b/tests/test-imagex-capture_and_apply index fe8cdca0..24503729 100755 --- a/tests/test-imagex-capture_and_apply +++ b/tests/test-imagex-capture_and_apply @@ -142,6 +142,20 @@ mkdir in.dir out.dir . $srcdir/tests/common_tests.sh +# Test the data recovery mode +__msg "Testing data recovery mode" +for file in corrupted_file_1.wim corrupted_file_2.wim; do + rm -rf out.dir + wimapply $srcdir/tests/wims/$file 1 out.dir 2>/dev/null && \ + error "Applying $file in default mode unexpectedly succeeded" + rm -rf out.dir + wimapply --recover-data $srcdir/tests/wims/$file 1 out.dir || \ + error "Applying $file in data recovery mode unexpectedly failed" + if [ ! -e out.dir/file ]; then + error "Recovered file not found" + fi +done + # Make sure exclusion list works __msg "Testing default capture configuration file" touch in.dir/hiberfil.sys diff --git a/tests/wims/README b/tests/wims/README index 07cfc6bb..e766f120 100644 --- a/tests/wims/README +++ b/tests/wims/README @@ -1,5 +1,9 @@ Some fun files: +corrupted_file_1.wim: This WIM contains a file whose SHA-1 digest doesn't match. + +corrupted_file_2.wim: This WIM contains a file that fails to decompress. + cyclic.wim: This WIM has an image with a cyclic directory structure and should be detected as invalid. diff --git a/tests/wims/corrupted_file_1.wim b/tests/wims/corrupted_file_1.wim new file mode 100644 index 0000000000000000000000000000000000000000..4bb087ef82c09bd4df9657f08f976e36a8a1af0a GIT binary patch literal 1324 zcmeYb4)^qBU|_fa#0Akmbq{3^v~gYy);#8wL-cjv%1Be87J4M$u~k)`?Gr4bXK#K${$a_IU#B2l)vU zUZ5}lne7P-FAs(QupJ=Z8ZcAVv}^k~Tr%jyFjO8V!sLAjrUw0#e7o zz_b9w0RkwW;SUp-%fteqVSKP4Jqb1i1|g7r$_z#fXZY^REc|w`;X&j6`wSKkSNul< z?2Pswe*9l@Zr|~QND)bX1qQJD14K<2HuA4jIW*_ku|wx3A9EC5Epp?=1Cb3zY8x2M z4GlyEw-|O9f=mnmhKdr8X3$NWbNU-eBXV=iE*pb-QG<* z)OPsU3M1Dbt}SeJJDT^$?ECXOWB<~uuCsdc*k1Yt>we_=(a>?tWr_QxlckPq1slFg zFeFTZ7{|s34et*SJ~TGY{8XJ`;``y0|Jz4TWpCeGG4E*2N;`-O7I2IL*@-ARYF+QG zx$FDt*7vE(s{)sD85d;O%l`jo!w}Bk$>7Ui$6y1d9U0siTp1LAY(EAUAS;5wmO+WZ z5Qsr4{1_a8YF&Zs5{6WUVjyY5ppPVK1JvWm5X9ij;Li{W6b}KaGz6=`6t`h;1FH1_ znue(wn~V+6G=Cs=1nO~Ohy=1+8G?c4TQZn~ZNM&T!{7nb5d?IX57-Xp~95#9#vSB`AzA zRoVbut++sSgfwNZJI2JNCFX1%|sB*gov?pm2kwb}j}0x(V!& literal 0 HcmV?d00001 -- 2.43.0