Prevent huge memory allocations from fuzzed header fields

[wimlib] / src / resource.c
diff --git a/src/resource.c b/src/resource.c

index 74c9bb8b9a9570aa52110139e73cc9a0907982bb..0036c1b90377823ce4dee8502a70323289c4ba04 100644 (file)
--- a/src/resource.c
+++ b/src/resource.c
@@ -57,8 +57,8 @@
   *   "chunk table" provides the offset, in bytes relative to the end of the
   *   chunk table, of the start of each compressed chunk, except for the first
   *   chunk which is omitted as it always has an offset of 0.  Chunk table
- *   entries are 32-bit for resources <= 4 GiB uncompressed and 64-bit for
- *   resources > 4 GiB uncompressed.
+ *   entries are 32-bit for resources < 4 GiB uncompressed and 64-bit for
+ *   resources >= 4 GiB uncompressed.
   *
   * - Solid resource format (distinguished by the use of WIM_RESHDR_FLAG_SOLID
   *   instead of WIM_RESHDR_FLAG_COMPRESSED): similar to the original format, but
@@ -83,6 +83,34 @@ struct data_range {
         u64 size;
  };
  
+static int
+decompress_chunk(const void *cbuf, u32 chunk_csize, u8 *ubuf, u32 chunk_usize,
+                struct wimlib_decompressor *decompressor, bool recover_data)
+{
+       int res = wimlib_decompress(cbuf, chunk_csize, ubuf, chunk_usize,
+                                   decompressor);
+       if (likely(res == 0))
+               return 0;
+
+       if (recover_data) {
+               WARNING("Failed to decompress data!  Continuing anyway since data recovery mode is enabled.");
+
+               /* Continue on with *something*.  In the worst case just use a
+                * zeroed buffer.  But, try to fill as much of it with
+                * decompressed data as we can.  This works because if the
+                * corruption isn't located right at the beginning of the
+                * compressed chunk, wimlib_decompress() may write some correct
+                * output at the beginning even if it fails later.  */
+               memset(ubuf, 0, chunk_usize);
+               (void)wimlib_decompress(cbuf, chunk_csize, ubuf,
+                                       chunk_usize, decompressor);
+               return 0;
+       }
+       ERROR("Failed to decompress data!");
+       errno = EINVAL;
+       return WIMLIB_ERR_DECOMPRESSION;
+}
+
  /*
   * Read data from a compressed WIM resource.
   *
@@ -93,28 +121,32 @@ struct data_range {
   *     read, sorted by increasing offset.
   * @num_ranges
   *     Number of ranges in @ranges; must be at least 1.
- * @cbs
- *     Structure which provides the consume_chunk() callback to feed the data
- *     being read.  Each call provides the next chunk of the requested data,
- *     uncompressed.  Each chunk will be nonempty and will not cross range
- *     boundaries but otherwise will be of unspecified size.
+ * @cb
+ *     Structure which provides the consume_chunk callback into which to feed
+ *     the data being read.  Each call provides the next chunk of the requested
+ *     data, uncompressed.  Each chunk will be nonempty and will not cross
+ *     range boundaries but otherwise will be of unspecified size.
+ * @recover_data
+ *     If a chunk can't be fully decompressed due to being corrupted, continue
+ *     with whatever data can be recovered rather than return an error.
   *
   * Possible return values:
   *
   *     WIMLIB_ERR_SUCCESS (0)
   *     WIMLIB_ERR_READ                   (errno set)
- *     WIMLIB_ERR_UNEXPECTED_END_OF_FILE (errno set to 0)
+ *     WIMLIB_ERR_UNEXPECTED_END_OF_FILE (errno set to EINVAL)
   *     WIMLIB_ERR_NOMEM                  (errno set to ENOMEM)
   *     WIMLIB_ERR_DECOMPRESSION          (errno set to EINVAL)
   *     WIMLIB_ERR_INVALID_CHUNK_SIZE     (errno set to EINVAL)
   *
- *     or other error code returned by the cbs->consume_chunk() function.
+ *     or other error code returned by the callback function.
   */
  static int
  read_compressed_wim_resource(const struct wim_resource_descriptor * const rdesc,
                              const struct data_range * const ranges,
                              const size_t num_ranges,
-                            const struct read_blob_callbacks *cbs)
+                            const struct consume_chunk_callback *cb,
+                            bool recover_data)
  {
         int ret;
         u64 *chunk_offsets = NULL;
@@ -188,7 +220,7 @@ read_compressed_wim_resource(const struct wim_resource_descriptor * const rdesc,
                 }
         }
  
-       const u32 chunk_order = fls32(chunk_size);
+       const u32 chunk_order = bsr32(chunk_size);
  
         /* Calculate the total number of chunks the resource is divided into.  */
         const u64 num_chunks = (rdesc->uncompressed_size + chunk_size - 1) >> chunk_order;
@@ -234,35 +266,34 @@ read_compressed_wim_resource(const struct wim_resource_descriptor * const rdesc,
                  * to initialize the chunk_offsets array.  */
  
                 u64 first_chunk_entry_to_read;
-               u64 last_chunk_entry_to_read;
+               u64 num_chunk_entries_to_read;
  
                 if (alt_chunk_table) {
                         /* The alternate chunk table contains chunk sizes, not
                          * offsets, so we always must read all preceding entries
                          * in order to determine offsets.  */
                         first_chunk_entry_to_read = 0;
-                       last_chunk_entry_to_read = last_needed_chunk;
+                       num_chunk_entries_to_read = last_needed_chunk + 1;
                 } else {
-                       /* Here we must account for the fact that the first
-                        * chunk has no explicit chunk table entry.  */
  
-                       if (read_start_chunk == 0)
+                       num_chunk_entries_to_read = last_needed_chunk - read_start_chunk + 1;
+
+                       /* The first chunk has no explicit chunk table entry.  */
+                       if (read_start_chunk == 0) {
+                               num_chunk_entries_to_read--;
                                 first_chunk_entry_to_read = 0;
-                       else
+                       } else {
                                 first_chunk_entry_to_read = read_start_chunk - 1;
+                       }
  
-                       if (last_needed_chunk == 0)
-                               last_chunk_entry_to_read = 0;
-                       else
-                               last_chunk_entry_to_read = last_needed_chunk - 1;
-
+                       /* Unless we're reading the final chunk of the resource,
+                        * we need the offset of the chunk following the last
+                        * needed chunk so that the compressed size of the last
+                        * needed chunk can be computed.  */
                         if (last_needed_chunk < num_chunks - 1)
-                               last_chunk_entry_to_read++;
+                               num_chunk_entries_to_read++;
                 }
  
-               const u64 num_chunk_entries_to_read =
-                       last_chunk_entry_to_read - first_chunk_entry_to_read + 1;
-
                 const u64 chunk_offsets_alloc_size =
                         max(num_chunk_entries_to_read,
                             num_needed_chunk_offsets) * sizeof(chunk_offsets[0]);
@@ -447,17 +478,12 @@ read_compressed_wim_resource(const struct wim_resource_descriptor * const rdesc,
                                 goto read_error;
  
                         if (read_buf == cbuf) {
-                               ret = wimlib_decompress(cbuf,
-                                                       chunk_csize,
-                                                       ubuf,
-                                                       chunk_usize,
-                                                       decompressor);
-                               if (unlikely(ret)) {
-                                       ERROR("Failed to decompress data!");
-                                       ret = WIMLIB_ERR_DECOMPRESSION;
-                                       errno = EINVAL;
+                               ret = decompress_chunk(cbuf, chunk_csize,
+                                                      ubuf, chunk_usize,
+                                                      decompressor,
+                                                      recover_data);
+                               if (unlikely(ret))
                                         goto out_cleanup;
-                               }
                         }
                         cur_read_offset += chunk_csize;
  
@@ -474,7 +500,7 @@ read_compressed_wim_resource(const struct wim_resource_descriptor * const rdesc,
                                 end = min(cur_range_end, chunk_end_offset) - chunk_start_offset;
                                 size = end - start;
  
-                               ret = call_consume_chunk(&ubuf[start], size, cbs);
+                               ret = consume_chunk(cb, &ubuf[start], size);
                                 if (unlikely(ret))
                                         goto out_cleanup;
  
@@ -534,10 +560,11 @@ read_error:
  }
  
  /* Read raw data from a file descriptor at the specified offset, feeding the
- * data in nonempty chunks into the cbs->consume_chunk() function.  */
+ * data in nonempty chunks into the specified callback function.  */
  static int
  read_raw_file_data(struct filedes *in_fd, u64 offset, u64 size,
-                  const struct read_blob_callbacks *cbs)
+                  const struct consume_chunk_callback *cb,
+                  const tchar *filename)
  {
         u8 buf[BUFFER_SIZE];
         size_t bytes_to_read;
@@ -546,20 +573,29 @@ read_raw_file_data(struct filedes *in_fd, u64 offset, u64 size,
         while (size) {
                 bytes_to_read = min(sizeof(buf), size);
                 ret = full_pread(in_fd, buf, bytes_to_read, offset);
-               if (unlikely(ret)) {
-                       ERROR_WITH_ERRNO("Read error");
-                       return ret;
-               }
-               ret = call_consume_chunk(buf, bytes_to_read, cbs);
+               if (unlikely(ret))
+                       goto read_error;
+               ret = consume_chunk(cb, buf, bytes_to_read);
                 if (unlikely(ret))
                         return ret;
                 size -= bytes_to_read;
                 offset += bytes_to_read;
         }
         return 0;
+
+read_error:
+       if (!filename) {
+               ERROR_WITH_ERRNO("Error reading data from WIM file");
+       } else if (ret == WIMLIB_ERR_UNEXPECTED_END_OF_FILE) {
+               ERROR("\"%"TS"\": File was concurrently truncated", filename);
+               ret = WIMLIB_ERR_CONCURRENT_MODIFICATION_DETECTED;
+       } else {
+               ERROR_WITH_ERRNO("\"%"TS"\": Error reading data", filename);
+       }
+       return ret;
  }
  
-/* A consume_chunk() implementation that simply concatenates all chunks into an
+/* A consume_chunk implementation which simply concatenates all chunks into an
   * in-memory buffer.  */
  static int
  bufferer_cb(const void *chunk, size_t size, void *_ctx)
@@ -572,7 +608,7 @@ bufferer_cb(const void *chunk, size_t size, void *_ctx)
  
  /*
   * Read @size bytes at @offset in the WIM resource described by @rdesc and feed
- * the data into the @cbs->consume_chunk callback function.
+ * the data into the @cb callback function.
   *
   * @offset and @size are assumed to have already been validated against the
   * resource's uncompressed size.
@@ -583,7 +619,8 @@ bufferer_cb(const void *chunk, size_t size, void *_ctx)
  static int
  read_partial_wim_resource(const struct wim_resource_descriptor *rdesc,
                           const u64 offset, const u64 size,
-                         const struct read_blob_callbacks *cbs)
+                         const struct consume_chunk_callback *cb,
+                         bool recover_data)
  {
         if (rdesc->flags & (WIM_RESHDR_FLAG_COMPRESSED |
                             WIM_RESHDR_FLAG_SOLID))
@@ -595,13 +632,14 @@ read_partial_wim_resource(const struct wim_resource_descriptor *rdesc,
                         .offset = offset,
                         .size = size,
                 };
-               return read_compressed_wim_resource(rdesc, &range, 1, cbs);
+               return read_compressed_wim_resource(rdesc, &range, 1, cb,
+                                                   recover_data);
         }
  
         /* Uncompressed resource  */
         return read_raw_file_data(&rdesc->wim->in_fd,
                                   rdesc->offset_in_wim + offset,
-                                 size, cbs);
+                                 size, cb, NULL);
  }
  
  /* Read the specified range of uncompressed data from the specified blob, which
@@ -610,32 +648,39 @@ int
  read_partial_wim_blob_into_buf(const struct blob_descriptor *blob,
                                u64 offset, size_t size, void *buf)
  {
-       struct read_blob_callbacks cbs = {
-               .consume_chunk  = bufferer_cb,
-               .ctx            = &buf,
+       struct consume_chunk_callback cb = {
+               .func   = bufferer_cb,
+               .ctx    = &buf,
         };
         return read_partial_wim_resource(blob->rdesc,
                                          blob->offset_in_res + offset,
                                          size,
-                                        &cbs);
+                                        &cb, false);
+}
+
+static int
+noop_cb(const void *chunk, size_t size, void *_ctx)
+{
+       return 0;
  }
  
  /* Skip over the data of the specified WIM resource.  */
  int
  skip_wim_resource(const struct wim_resource_descriptor *rdesc)
  {
-       struct read_blob_callbacks cbs = {
+       static const struct consume_chunk_callback cb = {
+               .func = noop_cb,
         };
         return read_partial_wim_resource(rdesc, 0,
-                                        rdesc->uncompressed_size, &cbs);
+                                        rdesc->uncompressed_size, &cb, false);
  }
  
  static int
  read_wim_blob_prefix(const struct blob_descriptor *blob, u64 size,
-                    const struct read_blob_callbacks *cbs)
+                    const struct consume_chunk_callback *cb, bool recover_data)
  {
         return read_partial_wim_resource(blob->rdesc, blob->offset_in_res,
-                                        size, cbs);
+                                        size, cb, recover_data);
  }
  
  /* This function handles reading blob data that is located in an external file,
@@ -648,7 +693,8 @@ read_wim_blob_prefix(const struct blob_descriptor *blob, u64 size,
   * encrypted), so Windows uses its own code for its equivalent case.  */
  static int
  read_file_on_disk_prefix(const struct blob_descriptor *blob, u64 size,
-                        const struct read_blob_callbacks *cbs)
+                        const struct consume_chunk_callback *cb,
+                        bool recover_data)
  {
         int ret;
         int raw_fd;
@@ -660,7 +706,7 @@ read_file_on_disk_prefix(const struct blob_descriptor *blob, u64 size,
                 return WIMLIB_ERR_OPEN;
         }
         filedes_init(&fd, raw_fd);
-       ret = read_raw_file_data(&fd, 0, size, cbs);
+       ret = read_raw_file_data(&fd, 0, size, cb, blob->file_on_disk);
         filedes_close(&fd);
         return ret;
  }
@@ -668,7 +714,8 @@ read_file_on_disk_prefix(const struct blob_descriptor *blob, u64 size,
  #ifdef WITH_FUSE
  static int
  read_staging_file_prefix(const struct blob_descriptor *blob, u64 size,
-                        const struct read_blob_callbacks *cbs)
+                        const struct consume_chunk_callback *cb,
+                        bool recover_data)
  {
         int raw_fd;
         struct filedes fd;
@@ -682,7 +729,7 @@ read_staging_file_prefix(const struct blob_descriptor *blob, u64 size,
                 return WIMLIB_ERR_OPEN;
         }
         filedes_init(&fd, raw_fd);
-       ret = read_raw_file_data(&fd, 0, size, cbs);
+       ret = read_raw_file_data(&fd, 0, size, cb, blob->staging_file_name);
         filedes_close(&fd);
         return ret;
  }
@@ -692,32 +739,34 @@ read_staging_file_prefix(const struct blob_descriptor *blob, u64 size,
   * already located in an in-memory buffer.  */
  static int
  read_buffer_prefix(const struct blob_descriptor *blob,
-                  u64 size, const struct read_blob_callbacks *cbs)
+                  u64 size, const struct consume_chunk_callback *cb,
+                  bool recover_data)
  {
         if (unlikely(!size))
                 return 0;
-       return call_consume_chunk(blob->attached_buffer, size, cbs);
+       return consume_chunk(cb, blob->attached_buffer, size);
  }
  
  typedef int (*read_blob_prefix_handler_t)(const struct blob_descriptor *blob,
                                           u64 size,
-                                         const struct read_blob_callbacks *cbs);
+                                         const struct consume_chunk_callback *cb,
+                                         bool recover_data);
  
  /*
   * Read the first @size bytes from a generic "blob", which may be located in any
   * one of several locations, such as in a WIM resource (possibly compressed), in
   * an external file, or directly in an in-memory buffer.  The blob data will be
- * fed to the cbs->consume_chunk() callback function in chunks that are nonempty
- * but otherwise are of unspecified size.
+ * fed to @cb in chunks that are nonempty but otherwise are of unspecified size.
   *
   * Returns 0 on success; nonzero on error.  A nonzero value will be returned if
   * the blob data cannot be successfully read (for a number of different reasons,
- * depending on the blob location), or if cbs->consume_chunk() returned nonzero
- * in which case that error code will be returned.
+ * depending on the blob location), or if @cb returned nonzero in which case
+ * that error code will be returned.  If @recover_data is true, then errors
+ * decompressing chunks in WIM resources will be ignored.
   */
  static int
  read_blob_prefix(const struct blob_descriptor *blob, u64 size,
-                const struct read_blob_callbacks *cbs)
+                const struct consume_chunk_callback *cb, bool recover_data)
  {
         static const read_blob_prefix_handler_t handlers[] = {
                 [BLOB_IN_WIM] = read_wim_blob_prefix,
@@ -729,30 +778,55 @@ read_blob_prefix(const struct blob_descriptor *blob, u64 size,
         #ifdef WITH_NTFS_3G
                 [BLOB_IN_NTFS_VOLUME] = read_ntfs_attribute_prefix,
         #endif
-       #ifdef __WIN32__
-               [BLOB_IN_WINNT_FILE_ON_DISK] = read_winnt_stream_prefix,
-               [BLOB_WIN32_ENCRYPTED] = read_win32_encrypted_file_prefix,
+       #ifdef _WIN32
+               [BLOB_IN_WINDOWS_FILE] = read_windows_file_prefix,
         #endif
         };
         wimlib_assert(blob->blob_location < ARRAY_LEN(handlers)
                       && handlers[blob->blob_location] != NULL);
         wimlib_assert(size <= blob->size);
-       return handlers[blob->blob_location](blob, size, cbs);
+       return handlers[blob->blob_location](blob, size, cb, recover_data);
+}
+
+struct blob_chunk_ctx {
+       const struct blob_descriptor *blob;
+       const struct read_blob_callbacks *cbs;
+       u64 offset;
+};
+
+static int
+consume_blob_chunk(const void *chunk, size_t size, void *_ctx)
+{
+       struct blob_chunk_ctx *ctx = _ctx;
+       int ret;
+
+       ret = call_continue_blob(ctx->blob, ctx->offset, chunk, size, ctx->cbs);
+       ctx->offset += size;
+       return ret;
  }
  
  /* Read the full data of the specified blob, passing the data into the specified
   * callbacks (all of which are optional).  */
  int
  read_blob_with_cbs(struct blob_descriptor *blob,
-                  const struct read_blob_callbacks *cbs)
+                  const struct read_blob_callbacks *cbs, bool recover_data)
  {
         int ret;
+       struct blob_chunk_ctx ctx = {
+               .blob = blob,
+               .offset = 0,
+               .cbs = cbs,
+       };
+       struct consume_chunk_callback cb = {
+               .func = consume_blob_chunk,
+               .ctx = &ctx,
+       };
  
         ret = call_begin_blob(blob, cbs);
         if (unlikely(ret))
                 return ret;
  
-       ret = read_blob_prefix(blob, blob->size, cbs);
+       ret = read_blob_prefix(blob, blob->size, &cb, recover_data);
  
         return call_end_blob(blob, ret, cbs);
  }
@@ -763,11 +837,11 @@ read_blob_with_cbs(struct blob_descriptor *blob,
  int
  read_blob_into_buf(const struct blob_descriptor *blob, void *buf)
  {
-       struct read_blob_callbacks cbs = {
-               .consume_chunk  = bufferer_cb,
-               .ctx            = &buf,
+       struct consume_chunk_callback cb = {
+               .func   = bufferer_cb,
+               .ctx    = &buf,
         };
-       return read_blob_prefix(blob, blob->size, &cbs);
+       return read_blob_prefix(blob, blob->size, &cb, false);
  }
  
  /* Retrieve the full uncompressed data of the specified blob.  A buffer large
@@ -808,8 +882,7 @@ wim_reshdr_to_data(const struct wim_reshdr *reshdr, WIMStruct *wim,
         struct wim_resource_descriptor rdesc;
         struct blob_descriptor blob;
  
-       wim_reshdr_to_desc(reshdr, wim, &rdesc);
-       blob_set_is_located_in_nonsolid_wim_resource(&blob, &rdesc);
+       wim_reshdr_to_desc_and_blob(reshdr, wim, &rdesc, &blob);
  
         return read_blob_into_alloc_buf(&blob, buf_ret);
  }
@@ -824,8 +897,7 @@ wim_reshdr_to_hash(const struct wim_reshdr *reshdr, WIMStruct *wim,
         struct blob_descriptor blob;
         int ret;
  
-       wim_reshdr_to_desc(reshdr, wim, &rdesc);
-       blob_set_is_located_in_nonsolid_wim_resource(&blob, &rdesc);
+       wim_reshdr_to_desc_and_blob(reshdr, wim, &rdesc, &blob);
         blob.unhashed = 1;
  
         ret = sha1_blob(&blob);
@@ -855,9 +927,10 @@ next_blob(struct blob_descriptor *blob, size_t list_head_offset)
         return (struct blob_descriptor*)((u8*)cur->next - list_head_offset);
  }
  
-/* A consume_chunk() implementation that translates raw resource data into
- * blobs, calling the begin_blob, consume_chunk, and end_blob callbacks as
- * appropriate.  */
+/*
+ * A consume_chunk implementation that translates raw resource data into blobs,
+ * calling the begin_blob, continue_blob, and end_blob callbacks as appropriate.
+ */
  static int
  blobifier_cb(const void *chunk, size_t size, void *_ctx)
  {
@@ -874,9 +947,9 @@ blobifier_cb(const void *chunk, size_t size, void *_ctx)
                         return ret;
         }
  
+       ret = call_continue_blob(ctx->cur_blob, ctx->cur_blob_offset,
+                                chunk, size, &ctx->cbs);
         ctx->cur_blob_offset += size;
-
-       ret = call_consume_chunk(chunk, size, &ctx->cbs);
         if (ret)
                 return ret;
  
@@ -903,7 +976,7 @@ blobifier_cb(const void *chunk, size_t size, void *_ctx)
  }
  
  struct hasher_context {
-       SHA_CTX sha_ctx;
+       struct sha1_ctx sha_ctx;
         int flags;
         struct read_blob_callbacks cbs;
  };
@@ -916,22 +989,88 @@ hasher_begin_blob(struct blob_descriptor *blob, void *_ctx)
         struct hasher_context *ctx = _ctx;
  
         sha1_init(&ctx->sha_ctx);
+       blob->corrupted = 0;
  
         return call_begin_blob(blob, &ctx->cbs);
  }
  
-/* A consume_chunk() implementation that continues calculating the SHA-1 message
+/*
+ * A continue_blob() implementation that continues calculating the SHA-1 message
   * digest of the blob being read, then optionally passes the data on to another
- * consume_chunk() implementation.  This allows checking the SHA-1 message
- * digest of a blob being extracted, for example.  */
+ * continue_blob() implementation.  This allows checking the SHA-1 message
+ * digest of a blob being extracted, for example.
+ */
  static int
-hasher_consume_chunk(const void *chunk, size_t size, void *_ctx)
+hasher_continue_blob(const struct blob_descriptor *blob, u64 offset,
+                    const void *chunk, size_t size, void *_ctx)
  {
         struct hasher_context *ctx = _ctx;
  
         sha1_update(&ctx->sha_ctx, chunk, size);
  
-       return call_consume_chunk(chunk, size, &ctx->cbs);
+       return call_continue_blob(blob, offset, chunk, size, &ctx->cbs);
+}
+
+static int
+report_sha1_mismatch(struct blob_descriptor *blob,
+                    const u8 actual_hash[SHA1_HASH_SIZE], bool recover_data)
+{
+       tchar expected_hashstr[SHA1_HASH_STRING_LEN];
+       tchar actual_hashstr[SHA1_HASH_STRING_LEN];
+
+       wimlib_assert(blob->blob_location != BLOB_NONEXISTENT);
+       wimlib_assert(blob->blob_location != BLOB_IN_ATTACHED_BUFFER);
+
+       sprint_hash(blob->hash, expected_hashstr);
+       sprint_hash(actual_hash, actual_hashstr);
+
+       blob->corrupted = 1;
+
+       if (blob_is_in_file(blob)) {
+               ERROR("A file was concurrently modified!\n"
+                     "        Path: \"%"TS"\"\n"
+                     "        Expected SHA-1: %"TS"\n"
+                     "        Actual SHA-1: %"TS"\n",
+                     blob_file_path(blob), expected_hashstr, actual_hashstr);
+               return WIMLIB_ERR_CONCURRENT_MODIFICATION_DETECTED;
+       } else if (blob->blob_location == BLOB_IN_WIM) {
+               const struct wim_resource_descriptor *rdesc = blob->rdesc;
+
+               (recover_data ? wimlib_warning : wimlib_error)(
+                     T("A WIM resource is corrupted!\n"
+                       "        WIM file: \"%"TS"\"\n"
+                       "        Blob uncompressed size: %"PRIu64"\n"
+                       "        Resource offset in WIM: %"PRIu64"\n"
+                       "        Resource uncompressed size: %"PRIu64"\n"
+                       "        Resource size in WIM: %"PRIu64"\n"
+                       "        Resource flags: 0x%x%"TS"\n"
+                       "        Resource compression type: %"TS"\n"
+                       "        Resource compression chunk size: %"PRIu32"\n"
+                       "        Expected SHA-1: %"TS"\n"
+                       "        Actual SHA-1: %"TS"\n"),
+                     rdesc->wim->filename,
+                     blob->size,
+                     rdesc->offset_in_wim,
+                     rdesc->uncompressed_size,
+                     rdesc->size_in_wim,
+                     (unsigned int)rdesc->flags,
+                     (rdesc->is_pipable ? T(", pipable") : T("")),
+                     wimlib_get_compression_type_string(
+                                               rdesc->compression_type),
+                     rdesc->chunk_size,
+                     expected_hashstr, actual_hashstr);
+               if (recover_data)
+                       return 0;
+               return WIMLIB_ERR_INVALID_RESOURCE_HASH;
+       } else {
+               ERROR("File data was concurrently modified!\n"
+                     "        Location ID: %d\n"
+                     "        Expected SHA-1: %"TS"\n"
+                     "        Actual SHA-1: %"TS"\n",
+                     (int)blob->blob_location,
+                     expected_hashstr, actual_hashstr);
+               return WIMLIB_ERR_CONCURRENT_MODIFICATION_DETECTED;
+       }
  }
  
  /* Callback for finishing reading a blob while calculating its SHA-1 message
@@ -950,7 +1089,7 @@ hasher_end_blob(struct blob_descriptor *blob, int status, void *_ctx)
         }
  
         /* Retrieve the final SHA-1 message digest.  */
-       sha1_final(hash, &ctx->sha_ctx);
+       sha1_final(&ctx->sha_ctx, hash);
  
         /* Set the SHA-1 message digest of the blob, or compare the calculated
          * value with stored value.  */
@@ -960,16 +1099,8 @@ hasher_end_blob(struct blob_descriptor *blob, int status, void *_ctx)
         } else if ((ctx->flags & VERIFY_BLOB_HASHES) &&
                    unlikely(!hashes_equal(hash, blob->hash)))
         {
-               if (wimlib_print_errors) {
-                       tchar expected_hashstr[SHA1_HASH_SIZE * 2 + 1];
-                       tchar actual_hashstr[SHA1_HASH_SIZE * 2 + 1];
-                       sprint_hash(blob->hash, expected_hashstr);
-                       sprint_hash(hash, actual_hashstr);
-                       ERROR("The data is corrupted!\n"
-                             "        (Expected SHA-1=%"TS", got SHA-1=%"TS")",
-                             expected_hashstr, actual_hashstr);
-               }
-               ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
+               ret = report_sha1_mismatch(blob, hash,
+                                          ctx->flags & RECOVER_DATA);
                 goto out_next_cb;
         }
         ret = 0;
@@ -982,19 +1113,20 @@ out_next_cb:
   * SHA-1 message digest of the blob.  */
  int
  read_blob_with_sha1(struct blob_descriptor *blob,
-                   const struct read_blob_callbacks *cbs)
+                   const struct read_blob_callbacks *cbs, bool recover_data)
  {
         struct hasher_context hasher_ctx = {
-               .flags = VERIFY_BLOB_HASHES | COMPUTE_MISSING_BLOB_HASHES,
+               .flags = VERIFY_BLOB_HASHES | COMPUTE_MISSING_BLOB_HASHES |
+                        (recover_data ? RECOVER_DATA : 0),
                 .cbs = *cbs,
         };
         struct read_blob_callbacks hasher_cbs = {
                 .begin_blob     = hasher_begin_blob,
-               .consume_chunk  = hasher_consume_chunk,
+               .continue_blob  = hasher_continue_blob,
                 .end_blob       = hasher_end_blob,
                 .ctx            = &hasher_ctx,
         };
-       return read_blob_with_cbs(blob, &hasher_cbs);
+       return read_blob_with_cbs(blob, &hasher_cbs, recover_data);
  }
  
  static int
@@ -1002,7 +1134,8 @@ read_blobs_in_solid_resource(struct blob_descriptor *first_blob,
                              struct blob_descriptor *last_blob,
                              size_t blob_count,
                              size_t list_head_offset,
-                            const struct read_blob_callbacks *sink_cbs)
+                            const struct read_blob_callbacks *sink_cbs,
+                            bool recover_data)
  {
         struct data_range *ranges;
         bool ranges_malloced;
@@ -1046,13 +1179,13 @@ read_blobs_in_solid_resource(struct blob_descriptor *first_blob,
                 .final_blob             = last_blob,
                 .list_head_offset       = list_head_offset,
         };
-       struct read_blob_callbacks cbs = {
-               .consume_chunk  = blobifier_cb,
-               .ctx            = &blobifier_ctx,
+       struct consume_chunk_callback cb = {
+               .func   = blobifier_cb,
+               .ctx    = &blobifier_ctx,
         };
  
         ret = read_compressed_wim_resource(first_blob->rdesc, ranges,
-                                          blob_count, &cbs);
+                                          blob_count, &cb, recover_data);
  
         if (ranges_malloced)
                 FREE(ranges);
@@ -1089,7 +1222,8 @@ oom:
   *             For all blobs being read that have already had SHA-1 message
   *             digests computed, calculate the SHA-1 message digest of the read
   *             data and compare it with the previously computed value.  If they
- *             do not match, return WIMLIB_ERR_INVALID_RESOURCE_HASH.
+ *             do not match, return WIMLIB_ERR_INVALID_RESOURCE_HASH (unless
+ *             RECOVER_DATA is also set, in which case just issue a warning).
   *
   *     COMPUTE_MISSING_BLOB_HASHES
   *             For all blobs being read that have not yet had their SHA-1
@@ -1099,6 +1233,9 @@ oom:
   *     BLOB_LIST_ALREADY_SORTED
   *             @blob_list is already sorted in sequential order for reading.
   *
+ *     RECOVER_DATA
+ *             Don't consider corrupted blob data to be an error.
+ *
   * The callback functions are allowed to delete the current blob from the list
   * if necessary.
   *
@@ -1132,7 +1269,7 @@ read_blob_list(struct list_head *blob_list, size_t list_head_offset,
                 sink_cbs = alloca(sizeof(*sink_cbs));
                 *sink_cbs = (struct read_blob_callbacks) {
                         .begin_blob     = hasher_begin_blob,
-                       .consume_chunk  = hasher_consume_chunk,
+                       .continue_blob  = hasher_continue_blob,
                         .end_blob       = hasher_end_blob,
                         .ctx            = hasher_ctx,
                 };
@@ -1184,14 +1321,15 @@ read_blob_list(struct list_head *blob_list, size_t list_head_offset,
                                 ret = read_blobs_in_solid_resource(blob, blob_last,
                                                                    blob_count,
                                                                    list_head_offset,
-                                                                  sink_cbs);
+                                                                  sink_cbs,
+                                                                  flags & RECOVER_DATA);
                                 if (ret)
                                         return ret;
                                 continue;
                         }
                 }
  
-               ret = read_blob_with_cbs(blob, sink_cbs);
+               ret = read_blob_with_cbs(blob, sink_cbs, flags & RECOVER_DATA);
                 if (unlikely(ret && ret != BEGIN_BLOB_STATUS_SKIP_BLOB))
                         return ret;
         }
@@ -1208,38 +1346,46 @@ extract_chunk_to_fd(const void *chunk, size_t size, void *_fd)
         return ret;
  }
  
+static int
+extract_blob_chunk_to_fd(const struct blob_descriptor *blob, u64 offset,
+                        const void *chunk, size_t size, void *_fd)
+{
+       return extract_chunk_to_fd(chunk, size, _fd);
+}
+
  /* Extract the first @size bytes of the specified blob to the specified file
   * descriptor.  This does *not* check the SHA-1 message digest.  */
  int
  extract_blob_prefix_to_fd(struct blob_descriptor *blob, u64 size,
                           struct filedes *fd)
  {
-       struct read_blob_callbacks cbs = {
-               .consume_chunk  = extract_chunk_to_fd,
-               .ctx            = fd,
+       struct consume_chunk_callback cb = {
+               .func   = extract_chunk_to_fd,
+               .ctx    = fd,
         };
-       return read_blob_prefix(blob, size, &cbs);
+       return read_blob_prefix(blob, size, &cb, false);
  }
  
  /* Extract the full uncompressed contents of the specified blob to the specified
   * file descriptor.  This checks the SHA-1 message digest.  */
  int
-extract_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd)
+extract_blob_to_fd(struct blob_descriptor *blob, struct filedes *fd,
+                  bool recover_data)
  {
         struct read_blob_callbacks cbs = {
-               .consume_chunk  = extract_chunk_to_fd,
+               .continue_blob  = extract_blob_chunk_to_fd,
                 .ctx            = fd,
         };
-       return read_blob_with_sha1(blob, &cbs);
+       return read_blob_with_sha1(blob, &cbs, recover_data);
  }
  
  /* Calculate the SHA-1 message digest of a blob and store it in @blob->hash.  */
  int
  sha1_blob(struct blob_descriptor *blob)
  {
-       struct read_blob_callbacks cbs = {
+       static const struct read_blob_callbacks cbs = {
         };
-       return read_blob_with_sha1(blob, &cbs);
+       return read_blob_with_sha1(blob, &cbs, false);
  }
  
  /*
@@ -1267,6 +1413,21 @@ wim_reshdr_to_desc(const struct wim_reshdr *reshdr, WIMStruct *wim,
         }
  }
  
+/*
+ * Convert the short WIM resource header @reshdr to a stand-alone WIM resource
+ * descriptor @rdesc, then set @blob to consist of that entire resource.  This
+ * should only be used for non-solid resources!
+ */
+void
+wim_reshdr_to_desc_and_blob(const struct wim_reshdr *reshdr, WIMStruct *wim,
+                           struct wim_resource_descriptor *rdesc,
+                           struct blob_descriptor *blob)
+{
+       wim_reshdr_to_desc(reshdr, wim, rdesc);
+       blob->size = rdesc->uncompressed_size;
+       blob_set_is_located_in_wim_resource(blob, rdesc, 0);
+}
+
  /* Import a WIM resource header from the on-disk format.  */
  void
  get_wim_reshdr(const struct wim_reshdr_disk *disk_reshdr,