X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Fresource.c;fp=src%2Fresource.c;h=8b6139ad4dbf65da2a0dc32a4c1043c59f90f667;hb=571e0b066fe7526690d68a5223d1025604c6e48b;hp=9054b55efaf46e058a145231cf79e4fd462c2e4d;hpb=11955ba7ab5c13a888e944c895d6051bab80156c;p=wimlib diff --git a/src/resource.c b/src/resource.c index 9054b55e..8b6139ad 100644 --- a/src/resource.c +++ b/src/resource.c @@ -83,6 +83,34 @@ struct data_range { u64 size; }; +static int +decompress_chunk(const void *cbuf, u32 chunk_csize, u8 *ubuf, u32 chunk_usize, + struct wimlib_decompressor *decompressor, bool recover_data) +{ + int res = wimlib_decompress(cbuf, chunk_csize, ubuf, chunk_usize, + decompressor); + if (likely(res == 0)) + return 0; + + if (recover_data) { + WARNING("Failed to decompress data! Continuing anyway since data recovery mode is enabled."); + + /* Continue on with *something*. In the worst case just use a + * zeroed buffer. But, try to fill as much of it with + * decompressed data as we can. This works because if the + * corruption isn't located right at the beginning of the + * compressed chunk, wimlib_decompress() may write some correct + * output at the beginning even if it fails later. */ + memset(ubuf, 0, chunk_usize); + (void)wimlib_decompress(cbuf, chunk_csize, ubuf, + chunk_usize, decompressor); + return 0; + } + ERROR("Failed to decompress data!"); + errno = EINVAL; + return WIMLIB_ERR_DECOMPRESSION; +} + /* * Read data from a compressed WIM resource. * @@ -98,6 +126,9 @@ struct data_range { * the data being read. Each call provides the next chunk of the requested * data, uncompressed. Each chunk will be nonempty and will not cross * range boundaries but otherwise will be of unspecified size. + * @recover_data + * If a chunk can't be fully decompressed due to being corrupted, continue + * with whatever data can be recovered rather than return an error. * * Possible return values: * @@ -114,7 +145,8 @@ static int read_compressed_wim_resource(const struct wim_resource_descriptor * const rdesc, const struct data_range * const ranges, const size_t num_ranges, - const struct consume_chunk_callback *cb) + const struct consume_chunk_callback *cb, + bool recover_data) { int ret; u64 *chunk_offsets = NULL; @@ -446,17 +478,12 @@ read_compressed_wim_resource(const struct wim_resource_descriptor * const rdesc, goto read_error; if (read_buf == cbuf) { - ret = wimlib_decompress(cbuf, - chunk_csize, - ubuf, - chunk_usize, - decompressor); - if (unlikely(ret)) { - ERROR("Failed to decompress data!"); - ret = WIMLIB_ERR_DECOMPRESSION; - errno = EINVAL; + ret = decompress_chunk(cbuf, chunk_csize, + ubuf, chunk_usize, + decompressor, + recover_data); + if (unlikely(ret)) goto out_cleanup; - } } cur_read_offset += chunk_csize; @@ -592,7 +619,8 @@ bufferer_cb(const void *chunk, size_t size, void *_ctx) static int read_partial_wim_resource(const struct wim_resource_descriptor *rdesc, const u64 offset, const u64 size, - const struct consume_chunk_callback *cb) + const struct consume_chunk_callback *cb, + bool recover_data) { if (rdesc->flags & (WIM_RESHDR_FLAG_COMPRESSED | WIM_RESHDR_FLAG_SOLID)) @@ -604,7 +632,8 @@ read_partial_wim_resource(const struct wim_resource_descriptor *rdesc, .offset = offset, .size = size, }; - return read_compressed_wim_resource(rdesc, &range, 1, cb); + return read_compressed_wim_resource(rdesc, &range, 1, cb, + recover_data); } /* Uncompressed resource */ @@ -626,7 +655,7 @@ read_partial_wim_blob_into_buf(const struct blob_descriptor *blob, return read_partial_wim_resource(blob->rdesc, blob->offset_in_res + offset, size, - &cb); + &cb, false); } static int @@ -643,15 +672,15 @@ skip_wim_resource(const struct wim_resource_descriptor *rdesc) .func = noop_cb, }; return read_partial_wim_resource(rdesc, 0, - rdesc->uncompressed_size, &cb); + rdesc->uncompressed_size, &cb, false); } static int read_wim_blob_prefix(const struct blob_descriptor *blob, u64 size, - const struct consume_chunk_callback *cb) + const struct consume_chunk_callback *cb, bool recover_data) { return read_partial_wim_resource(blob->rdesc, blob->offset_in_res, - size, cb); + size, cb, recover_data); } /* This function handles reading blob data that is located in an external file, @@ -664,7 +693,8 @@ read_wim_blob_prefix(const struct blob_descriptor *blob, u64 size, * encrypted), so Windows uses its own code for its equivalent case. */ static int read_file_on_disk_prefix(const struct blob_descriptor *blob, u64 size, - const struct consume_chunk_callback *cb) + const struct consume_chunk_callback *cb, + bool recover_data) { int ret; int raw_fd; @@ -684,7 +714,8 @@ read_file_on_disk_prefix(const struct blob_descriptor *blob, u64 size, #ifdef WITH_FUSE static int read_staging_file_prefix(const struct blob_descriptor *blob, u64 size, - const struct consume_chunk_callback *cb) + const struct consume_chunk_callback *cb, + bool recover_data) { int raw_fd; struct filedes fd; @@ -708,7 +739,8 @@ read_staging_file_prefix(const struct blob_descriptor *blob, u64 size, * already located in an in-memory buffer. */ static int read_buffer_prefix(const struct blob_descriptor *blob, - u64 size, const struct consume_chunk_callback *cb) + u64 size, const struct consume_chunk_callback *cb, + bool recover_data) { if (unlikely(!size)) return 0; @@ -717,7 +749,8 @@ read_buffer_prefix(const struct blob_descriptor *blob, typedef int (*read_blob_prefix_handler_t)(const struct blob_descriptor *blob, u64 size, - const struct consume_chunk_callback *cb); + const struct consume_chunk_callback *cb, + bool recover_data); /* * Read the first @size bytes from a generic "blob", which may be located in any @@ -728,11 +761,12 @@ typedef int (*read_blob_prefix_handler_t)(const struct blob_descriptor *blob, * Returns 0 on success; nonzero on error. A nonzero value will be returned if * the blob data cannot be successfully read (for a number of different reasons, * depending on the blob location), or if @cb returned nonzero in which case - * that error code will be returned. + * that error code will be returned. If @recover_data is true, then errors + * decompressing chunks in WIM resources will be ignored. */ static int read_blob_prefix(const struct blob_descriptor *blob, u64 size, - const struct consume_chunk_callback *cb) + const struct consume_chunk_callback *cb, bool recover_data) { static const read_blob_prefix_handler_t handlers[] = { [BLOB_IN_WIM] = read_wim_blob_prefix, @@ -751,7 +785,7 @@ read_blob_prefix(const struct blob_descriptor *blob, u64 size, wimlib_assert(blob->blob_location < ARRAY_LEN(handlers) && handlers[blob->blob_location] != NULL); wimlib_assert(size <= blob->size); - return handlers[blob->blob_location](blob, size, cb); + return handlers[blob->blob_location](blob, size, cb, recover_data); } struct blob_chunk_ctx { @@ -775,7 +809,7 @@ consume_blob_chunk(const void *chunk, size_t size, void *_ctx) * callbacks (all of which are optional). */ int read_blob_with_cbs(struct blob_descriptor *blob, - const struct read_blob_callbacks *cbs) + const struct read_blob_callbacks *cbs, bool recover_data) { int ret; struct blob_chunk_ctx ctx = { @@ -792,7 +826,7 @@ read_blob_with_cbs(struct blob_descriptor *blob, if (unlikely(ret)) return ret; - ret = read_blob_prefix(blob, blob->size, &cb); + ret = read_blob_prefix(blob, blob->size, &cb, recover_data); return call_end_blob(blob, ret, cbs); } @@ -807,7 +841,7 @@ read_blob_into_buf(const struct blob_descriptor *blob, void *buf) .func = bufferer_cb, .ctx = &buf, }; - return read_blob_prefix(blob, blob->size, &cb); + return read_blob_prefix(blob, blob->size, &cb, false); } /* Retrieve the full uncompressed data of the specified blob. A buffer large @@ -955,6 +989,7 @@ hasher_begin_blob(struct blob_descriptor *blob, void *_ctx) struct hasher_context *ctx = _ctx; sha1_init(&ctx->sha_ctx); + blob->corrupted = 0; return call_begin_blob(blob, &ctx->cbs); } @@ -977,8 +1012,8 @@ hasher_continue_blob(const struct blob_descriptor *blob, u64 offset, } static int -report_sha1_mismatch_error(const struct blob_descriptor *blob, - const u8 actual_hash[SHA1_HASH_SIZE]) +report_sha1_mismatch(struct blob_descriptor *blob, + const u8 actual_hash[SHA1_HASH_SIZE], bool recover_data) { tchar expected_hashstr[SHA1_HASH_SIZE * 2 + 1]; tchar actual_hashstr[SHA1_HASH_SIZE * 2 + 1]; @@ -989,6 +1024,8 @@ report_sha1_mismatch_error(const struct blob_descriptor *blob, sprint_hash(blob->hash, expected_hashstr); sprint_hash(actual_hash, actual_hashstr); + blob->corrupted = 1; + if (blob_is_in_file(blob)) { ERROR("A file was concurrently modified!\n" " Path: \"%"TS"\"\n" @@ -997,18 +1034,21 @@ report_sha1_mismatch_error(const struct blob_descriptor *blob, blob_file_path(blob), expected_hashstr, actual_hashstr); return WIMLIB_ERR_CONCURRENT_MODIFICATION_DETECTED; } else if (blob->blob_location == BLOB_IN_WIM) { + #ifdef ENABLE_ERROR_MESSAGES const struct wim_resource_descriptor *rdesc = blob->rdesc; - ERROR("A WIM resource is corrupted!\n" - " WIM file: \"%"TS"\"\n" - " Blob uncompressed size: %"PRIu64"\n" - " Resource offset in WIM: %"PRIu64"\n" - " Resource uncompressed size: %"PRIu64"\n" - " Resource size in WIM: %"PRIu64"\n" - " Resource flags: 0x%x%"TS"\n" - " Resource compression type: %"TS"\n" - " Resource compression chunk size: %"PRIu32"\n" - " Expected SHA-1: %"TS"\n" - " Actual SHA-1: %"TS"\n", + + (recover_data ? wimlib_warning : wimlib_error)( + T("A WIM resource is corrupted!\n" + " WIM file: \"%"TS"\"\n" + " Blob uncompressed size: %"PRIu64"\n" + " Resource offset in WIM: %"PRIu64"\n" + " Resource uncompressed size: %"PRIu64"\n" + " Resource size in WIM: %"PRIu64"\n" + " Resource flags: 0x%x%"TS"\n" + " Resource compression type: %"TS"\n" + " Resource compression chunk size: %"PRIu32"\n" + " Expected SHA-1: %"TS"\n" + " Actual SHA-1: %"TS"\n"), rdesc->wim->filename, blob->size, rdesc->offset_in_wim, @@ -1020,6 +1060,9 @@ report_sha1_mismatch_error(const struct blob_descriptor *blob, rdesc->compression_type), rdesc->chunk_size, expected_hashstr, actual_hashstr); + #endif /* ENABLE_ERROR_MESSAGES */ + if (recover_data) + return 0; return WIMLIB_ERR_INVALID_RESOURCE_HASH; } else { ERROR("File data was concurrently modified!\n" @@ -1058,7 +1101,8 @@ hasher_end_blob(struct blob_descriptor *blob, int status, void *_ctx) } else if ((ctx->flags & VERIFY_BLOB_HASHES) && unlikely(!hashes_equal(hash, blob->hash))) { - ret = report_sha1_mismatch_error(blob, hash); + ret = report_sha1_mismatch(blob, hash, + ctx->flags & RECOVER_DATA); goto out_next_cb; } ret = 0; @@ -1071,10 +1115,11 @@ out_next_cb: * SHA-1 message digest of the blob. */ int read_blob_with_sha1(struct blob_descriptor *blob, - const struct read_blob_callbacks *cbs) + const struct read_blob_callbacks *cbs, bool recover_data) { struct hasher_context hasher_ctx = { - .flags = VERIFY_BLOB_HASHES | COMPUTE_MISSING_BLOB_HASHES, + .flags = VERIFY_BLOB_HASHES | COMPUTE_MISSING_BLOB_HASHES | + (recover_data ? RECOVER_DATA : 0), .cbs = *cbs, }; struct read_blob_callbacks hasher_cbs = { @@ -1083,7 +1128,7 @@ read_blob_with_sha1(struct blob_descriptor *blob, .end_blob = hasher_end_blob, .ctx = &hasher_ctx, }; - return read_blob_with_cbs(blob, &hasher_cbs); + return read_blob_with_cbs(blob, &hasher_cbs, recover_data); } static int @@ -1091,7 +1136,8 @@ read_blobs_in_solid_resource(struct blob_descriptor *first_blob, struct blob_descriptor *last_blob, size_t blob_count, size_t list_head_offset, - const struct read_blob_callbacks *sink_cbs) + const struct read_blob_callbacks *sink_cbs, + bool recover_data) { struct data_range *ranges; bool ranges_malloced; @@ -1141,7 +1187,7 @@ read_blobs_in_solid_resource(struct blob_descriptor *first_blob, }; ret = read_compressed_wim_resource(first_blob->rdesc, ranges, - blob_count, &cb); + blob_count, &cb, recover_data); if (ranges_malloced) FREE(ranges); @@ -1178,7 +1224,8 @@ oom: * For all blobs being read that have already had SHA-1 message * digests computed, calculate the SHA-1 message digest of the read * data and compare it with the previously computed value. If they - * do not match, return WIMLIB_ERR_INVALID_RESOURCE_HASH. + * do not match, return WIMLIB_ERR_INVALID_RESOURCE_HASH (unless + * RECOVER_DATA is also set, in which case just issue a warning). * * COMPUTE_MISSING_BLOB_HASHES * For all blobs being read that have not yet had their SHA-1 @@ -1188,6 +1235,9 @@ oom: * BLOB_LIST_ALREADY_SORTED * @blob_list is already sorted in sequential order for reading. * + * RECOVER_DATA + * Don't consider corrupted blob data to be an error. + * * The callback functions are allowed to delete the current blob from the list * if necessary. * @@ -1273,14 +1323,15 @@ read_blob_list(struct list_head *blob_list, size_t list_head_offset, ret = read_blobs_in_solid_resource(blob, blob_last, blob_count, list_head_offset, - sink_cbs); + sink_cbs, + flags & RECOVER_DATA); if (ret) return ret; continue; } } - ret = read_blob_with_cbs(blob, sink_cbs); + ret = read_blob_with_cbs(blob, sink_cbs, flags & RECOVER_DATA); if (unlikely(ret && ret != BEGIN_BLOB_STATUS_SKIP_BLOB)) return ret; } @@ -1314,19 +1365,20 @@ extract_blob_prefix_to_fd(struct blob_descriptor *blob, u64 size, .func = extract_chunk_to_fd, .ctx = fd, }; - return read_blob_prefix(blob, size, &cb); + return read_blob_prefix(blob, size, &cb, false); } /* Extract the full uncompressed contents of the specified blob to the specified * file descriptor. This checks the SHA-1 message digest. */ int -extract_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd) +extract_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd, + bool recover_data) { struct read_blob_callbacks cbs = { .continue_blob = extract_blob_chunk_to_fd, .ctx = fd, }; - return read_blob_with_sha1(blob, &cbs); + return read_blob_with_sha1(blob, &cbs, recover_data); } /* Calculate the SHA-1 message digest of a blob and store it in @blob->hash. */ @@ -1335,7 +1387,7 @@ sha1_blob(struct blob_descriptor *blob) { static const struct read_blob_callbacks cbs = { }; - return read_blob_with_sha1(blob, &cbs); + return read_blob_with_sha1(blob, &cbs, false); } /*