resource.c: fix bug in read_compressed_wim_resource()

[wimlib] / src / resource.c
diff --git a/src/resource.c b/src/resource.c

index 74c9bb8b9a9570aa52110139e73cc9a0907982bb..efdbdd40aab293825cbb9aebd96c5173b0154182 100644 (file)
--- a/src/resource.c
+++ b/src/resource.c
@@ -103,7 +103,7 @@ struct data_range {
   *
   *     WIMLIB_ERR_SUCCESS (0)
   *     WIMLIB_ERR_READ                   (errno set)
- *     WIMLIB_ERR_UNEXPECTED_END_OF_FILE (errno set to 0)
+ *     WIMLIB_ERR_UNEXPECTED_END_OF_FILE (errno set to EINVAL)
   *     WIMLIB_ERR_NOMEM                  (errno set to ENOMEM)
   *     WIMLIB_ERR_DECOMPRESSION          (errno set to EINVAL)
   *     WIMLIB_ERR_INVALID_CHUNK_SIZE     (errno set to EINVAL)
@@ -234,35 +234,34 @@ read_compressed_wim_resource(const struct wim_resource_descriptor * const rdesc,
                  * to initialize the chunk_offsets array.  */
  
                 u64 first_chunk_entry_to_read;
-               u64 last_chunk_entry_to_read;
+               u64 num_chunk_entries_to_read;
  
                 if (alt_chunk_table) {
                         /* The alternate chunk table contains chunk sizes, not
                          * offsets, so we always must read all preceding entries
                          * in order to determine offsets.  */
                         first_chunk_entry_to_read = 0;
-                       last_chunk_entry_to_read = last_needed_chunk;
+                       num_chunk_entries_to_read = last_needed_chunk + 1;
                 } else {
-                       /* Here we must account for the fact that the first
-                        * chunk has no explicit chunk table entry.  */
  
-                       if (read_start_chunk == 0)
+                       num_chunk_entries_to_read = last_needed_chunk - read_start_chunk + 1;
+
+                       /* The first chunk has no explicit chunk table entry.  */
+                       if (read_start_chunk == 0) {
+                               num_chunk_entries_to_read--;
                                 first_chunk_entry_to_read = 0;
-                       else
+                       } else {
                                 first_chunk_entry_to_read = read_start_chunk - 1;
+                       }
  
-                       if (last_needed_chunk == 0)
-                               last_chunk_entry_to_read = 0;
-                       else
-                               last_chunk_entry_to_read = last_needed_chunk - 1;
-
+                       /* Unless we're reading the final chunk of the resource,
+                        * we need the offset of the chunk following the last
+                        * needed chunk so that the compressed size of the last
+                        * needed chunk can be computed.  */
                         if (last_needed_chunk < num_chunks - 1)
-                               last_chunk_entry_to_read++;
+                               num_chunk_entries_to_read++;
                 }
  
-               const u64 num_chunk_entries_to_read =
-                       last_chunk_entry_to_read - first_chunk_entry_to_read + 1;
-
                 const u64 chunk_offsets_alloc_size =
                         max(num_chunk_entries_to_read,
                             num_needed_chunk_offsets) * sizeof(chunk_offsets[0]);
@@ -537,7 +536,8 @@ read_error:
   * data in nonempty chunks into the cbs->consume_chunk() function.  */
  static int
  read_raw_file_data(struct filedes *in_fd, u64 offset, u64 size,
-                  const struct read_blob_callbacks *cbs)
+                  const struct read_blob_callbacks *cbs,
+                  const tchar *filename)
  {
         u8 buf[BUFFER_SIZE];
         size_t bytes_to_read;
@@ -546,10 +546,8 @@ read_raw_file_data(struct filedes *in_fd, u64 offset, u64 size,
         while (size) {
                 bytes_to_read = min(sizeof(buf), size);
                 ret = full_pread(in_fd, buf, bytes_to_read, offset);
-               if (unlikely(ret)) {
-                       ERROR_WITH_ERRNO("Read error");
-                       return ret;
-               }
+               if (unlikely(ret))
+                       goto read_error;
                 ret = call_consume_chunk(buf, bytes_to_read, cbs);
                 if (unlikely(ret))
                         return ret;
@@ -557,6 +555,17 @@ read_raw_file_data(struct filedes *in_fd, u64 offset, u64 size,
                 offset += bytes_to_read;
         }
         return 0;
+
+read_error:
+       if (!filename) {
+               ERROR_WITH_ERRNO("Error reading data from WIM file");
+       } else if (ret == WIMLIB_ERR_UNEXPECTED_END_OF_FILE) {
+               ERROR("\"%"TS"\": File was concurrently truncated", filename);
+               ret = WIMLIB_ERR_CONCURRENT_MODIFICATION_DETECTED;
+       } else {
+               ERROR_WITH_ERRNO("\"%"TS"\": Error reading data", filename);
+       }
+       return ret;
  }
  
  /* A consume_chunk() implementation that simply concatenates all chunks into an
@@ -601,7 +610,7 @@ read_partial_wim_resource(const struct wim_resource_descriptor *rdesc,
         /* Uncompressed resource  */
         return read_raw_file_data(&rdesc->wim->in_fd,
                                   rdesc->offset_in_wim + offset,
-                                 size, cbs);
+                                 size, cbs, NULL);
  }
  
  /* Read the specified range of uncompressed data from the specified blob, which
@@ -660,7 +669,7 @@ read_file_on_disk_prefix(const struct blob_descriptor *blob, u64 size,
                 return WIMLIB_ERR_OPEN;
         }
         filedes_init(&fd, raw_fd);
-       ret = read_raw_file_data(&fd, 0, size, cbs);
+       ret = read_raw_file_data(&fd, 0, size, cbs, blob->file_on_disk);
         filedes_close(&fd);
         return ret;
  }
@@ -682,7 +691,7 @@ read_staging_file_prefix(const struct blob_descriptor *blob, u64 size,
                 return WIMLIB_ERR_OPEN;
         }
         filedes_init(&fd, raw_fd);
-       ret = read_raw_file_data(&fd, 0, size, cbs);
+       ret = read_raw_file_data(&fd, 0, size, cbs, blob->staging_file_name);
         filedes_close(&fd);
         return ret;
  }
@@ -808,8 +817,7 @@ wim_reshdr_to_data(const struct wim_reshdr *reshdr, WIMStruct *wim,
         struct wim_resource_descriptor rdesc;
         struct blob_descriptor blob;
  
-       wim_reshdr_to_desc(reshdr, wim, &rdesc);
-       blob_set_is_located_in_nonsolid_wim_resource(&blob, &rdesc);
+       wim_reshdr_to_desc_and_blob(reshdr, wim, &rdesc, &blob);
  
         return read_blob_into_alloc_buf(&blob, buf_ret);
  }
@@ -824,8 +832,7 @@ wim_reshdr_to_hash(const struct wim_reshdr *reshdr, WIMStruct *wim,
         struct blob_descriptor blob;
         int ret;
  
-       wim_reshdr_to_desc(reshdr, wim, &rdesc);
-       blob_set_is_located_in_nonsolid_wim_resource(&blob, &rdesc);
+       wim_reshdr_to_desc_and_blob(reshdr, wim, &rdesc, &blob);
         blob.unhashed = 1;
  
         ret = sha1_blob(&blob);
@@ -934,6 +941,62 @@ hasher_consume_chunk(const void *chunk, size_t size, void *_ctx)
         return call_consume_chunk(chunk, size, &ctx->cbs);
  }
  
+static int
+report_sha1_mismatch_error(const struct blob_descriptor *blob,
+                          const u8 actual_hash[SHA1_HASH_SIZE])
+{
+       tchar expected_hashstr[SHA1_HASH_SIZE * 2 + 1];
+       tchar actual_hashstr[SHA1_HASH_SIZE * 2 + 1];
+
+       wimlib_assert(blob->blob_location != BLOB_NONEXISTENT);
+       wimlib_assert(blob->blob_location != BLOB_IN_ATTACHED_BUFFER);
+
+       sprint_hash(blob->hash, expected_hashstr);
+       sprint_hash(actual_hash, actual_hashstr);
+
+       if (blob_is_in_file(blob)) {
+               ERROR("A file was concurrently modified!\n"
+                     "        Path: \"%"TS"\"\n"
+                     "        Expected SHA-1: %"TS"\n"
+                     "        Actual SHA-1: %"TS"\n",
+                     blob->file_on_disk, expected_hashstr, actual_hashstr);
+               return WIMLIB_ERR_CONCURRENT_MODIFICATION_DETECTED;
+       } else if (blob->blob_location == BLOB_IN_WIM) {
+               const struct wim_resource_descriptor *rdesc = blob->rdesc;
+               ERROR("A WIM resource is corrupted!\n"
+                     "        WIM file: \"%"TS"\"\n"
+                     "        Blob uncompressed size: %"PRIu64"\n"
+                     "        Resource offset in WIM: %"PRIu64"\n"
+                     "        Resource uncompressed size: %"PRIu64"\n"
+                     "        Resource size in WIM: %"PRIu64"\n"
+                     "        Resource flags: 0x%x%"TS"\n"
+                     "        Resource compression type: %"TS"\n"
+                     "        Resource compression chunk size: %"PRIu32"\n"
+                     "        Expected SHA-1: %"TS"\n"
+                     "        Actual SHA-1: %"TS"\n",
+                     rdesc->wim->filename,
+                     blob->size,
+                     rdesc->offset_in_wim,
+                     rdesc->uncompressed_size,
+                     rdesc->size_in_wim,
+                     (unsigned int)rdesc->flags,
+                     (rdesc->is_pipable ? T(", pipable") : T("")),
+                     wimlib_get_compression_type_string(
+                                               rdesc->compression_type),
+                     rdesc->chunk_size,
+                     expected_hashstr, actual_hashstr);
+               return WIMLIB_ERR_INVALID_RESOURCE_HASH;
+       } else {
+               ERROR("File data was concurrently modified!\n"
+                     "        Location ID: %d\n"
+                     "        Expected SHA-1: %"TS"\n"
+                     "        Actual SHA-1: %"TS"\n",
+                     (int)blob->blob_location,
+                     expected_hashstr, actual_hashstr);
+               return WIMLIB_ERR_CONCURRENT_MODIFICATION_DETECTED;
+       }
+}
+
  /* Callback for finishing reading a blob while calculating its SHA-1 message
   * digest.  */
  static int
@@ -960,16 +1023,7 @@ hasher_end_blob(struct blob_descriptor *blob, int status, void *_ctx)
         } else if ((ctx->flags & VERIFY_BLOB_HASHES) &&
                    unlikely(!hashes_equal(hash, blob->hash)))
         {
-               if (wimlib_print_errors) {
-                       tchar expected_hashstr[SHA1_HASH_SIZE * 2 + 1];
-                       tchar actual_hashstr[SHA1_HASH_SIZE * 2 + 1];
-                       sprint_hash(blob->hash, expected_hashstr);
-                       sprint_hash(hash, actual_hashstr);
-                       ERROR("The data is corrupted!\n"
-                             "        (Expected SHA-1=%"TS", got SHA-1=%"TS")",
-                             expected_hashstr, actual_hashstr);
-               }
-               ret = WIMLIB_ERR_INVALID_RESOURCE_HASH;
+               ret = report_sha1_mismatch_error(blob, hash);
                 goto out_next_cb;
         }
         ret = 0;
@@ -1267,6 +1321,21 @@ wim_reshdr_to_desc(const struct wim_reshdr *reshdr, WIMStruct *wim,
         }
  }
  
+/*
+ * Convert the short WIM resource header @reshdr to a stand-alone WIM resource
+ * descriptor @rdesc, then set @blob to consist of that entire resource.  This
+ * should only be used for non-solid resources!
+ */
+void
+wim_reshdr_to_desc_and_blob(const struct wim_reshdr *reshdr, WIMStruct *wim,
+                           struct wim_resource_descriptor *rdesc,
+                           struct blob_descriptor *blob)
+{
+       wim_reshdr_to_desc(reshdr, wim, rdesc);
+       blob->size = rdesc->uncompressed_size;
+       blob_set_is_located_in_wim_resource(blob, rdesc, 0);
+}
+
  /* Import a WIM resource header from the on-disk format.  */
  void
  get_wim_reshdr(const struct wim_reshdr_disk *disk_reshdr,