4 * WIM files can optionally contain a table of SHA1 message digests at the end,
5 * one digest for each chunk of the file of some specified size (often 10 MB).
6 * This file implements the checking and writing of this table.
10 * Copyright (C) 2012, 2013 Eric Biggers
12 * This file is part of wimlib, a library for working with WIM files.
14 * wimlib is free software; you can redistribute it and/or modify it under the
15 * terms of the GNU General Public License as published by the Free
16 * Software Foundation; either version 3 of the License, or (at your option)
19 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
20 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
21 * A PARTICULAR PURPOSE. See the GNU General Public License for more
24 * You should have received a copy of the GNU General Public License
25 * along with wimlib; if not, see http://www.gnu.org/licenses/.
32 #include "wimlib/assert.h"
33 #include "wimlib/endianness.h"
34 #include "wimlib/error.h"
35 #include "wimlib/file_io.h"
36 #include "wimlib/integrity.h"
37 #include "wimlib/resource.h"
38 #include "wimlib/sha1.h"
39 #include "wimlib/wim.h"
41 /* Size, in bytes, of each SHA1-summed chunk, when wimlib writes integrity
43 #define INTEGRITY_CHUNK_SIZE 10485760
45 /* Only use a different chunk size for compatiblity with an existing integrity
46 * table if the chunk size is between these two numbers. */
47 #define INTEGRITY_MIN_CHUNK_SIZE 4096
48 #define INTEGRITY_MAX_CHUNK_SIZE 134217728
50 struct integrity_table {
58 calculate_chunk_sha1(struct filedes *in_fd, size_t this_chunk_size,
59 off_t offset, u8 sha1_md[])
63 size_t bytes_remaining;
67 bytes_remaining = this_chunk_size;
70 bytes_to_read = min(bytes_remaining, sizeof(buf));
71 ret = full_pread(in_fd, buf, bytes_to_read, offset);
73 ERROR_WITH_ERRNO("Read error while calculating "
74 "integrity checksums");
77 sha1_update(&ctx, buf, bytes_to_read);
78 bytes_remaining -= bytes_to_read;
79 offset += bytes_to_read;
80 } while (bytes_remaining);
81 sha1_final(sha1_md, &ctx);
87 * read_integrity_table: - Reads the integrity table from a WIM file.
90 * WIMStruct for the WIM file; @wim->hdr.integrity specifies the location
91 * of the integrity table. The integrity table must exist (i.e.
92 * res_entry->offset must not be 0). @wim->in_fd is expected to be a
93 * seekable file descriptor to the WIM file opened for reading.
96 * Number of bytes of data that should be checked by the integrity table.
99 * On success, a pointer to an in-memory structure containing the integrity
100 * information is written to this location.
103 * WIMLIB_ERR_SUCCESS (0)
104 * WIMLIB_ERR_INVALID_INTEGRITY_TABLE
107 * WIMLIB_ERR_UNEXPECTED_END_OF_FILE
110 read_integrity_table(WIMStruct *wim, u64 num_checked_bytes,
111 struct integrity_table **table_ret)
114 struct integrity_table *table;
117 if (wim->hdr.integrity.size < 8)
120 DEBUG("Reading integrity table (offset %"PRIu64", "
121 "original_size %"PRIu64")",
122 wim->hdr.integrity.offset, wim->hdr.integrity.original_size);
124 ret = res_entry_to_data(&wim->hdr.integrity, wim, &buf);
129 table->size = le32_to_cpu(table->size);
130 table->num_entries = le32_to_cpu(table->num_entries);
131 table->chunk_size = le32_to_cpu(table->chunk_size);
133 DEBUG("table->size = %u, table->num_entries = %u, "
134 "table->chunk_size = %u",
135 table->size, table->num_entries, table->chunk_size);
137 if (table->size != wim->hdr.integrity.original_size ||
138 table->size != (u64)table->num_entries * SHA1_HASH_SIZE + 12 ||
139 table->chunk_size == 0 ||
140 table->num_entries != DIV_ROUND_UP(num_checked_bytes, table->chunk_size))
150 ERROR("Integrity table is invalid");
151 return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
155 * calculate_integrity_table():
157 * Calculates an integrity table for the data in a file beginning at offset 208
158 * (WIM_HEADER_DISK_SIZE).
161 * File descriptor for the file to be checked, opened for reading. Does
162 * not need to be at any specific location in the file.
165 * Offset of byte after the last byte to be checked.
168 * If non-NULL, a pointer to the table containing the previously calculated
169 * integrity data for a prefix of this file.
172 * If @old_table is non-NULL, the byte after the last byte that was checked
173 * in the old table. Must be less than or equal to new_check_end.
176 * If non-NULL, a progress function that will be called after every
179 * @integrity_table_ret:
180 * On success, a pointer to the calculated integrity table is written into
184 * WIMLIB_ERR_SUCCESS (0)
187 * WIMLIB_ERR_UNEXPECTED_END_OF_FILE
190 calculate_integrity_table(struct filedes *in_fd,
192 const struct integrity_table *old_table,
194 wimlib_progress_func_t progress_func,
195 struct integrity_table **integrity_table_ret)
198 size_t chunk_size = INTEGRITY_CHUNK_SIZE;
200 /* If an old table is provided, set the chunk size to be compatible with
201 * the old chunk size, unless the old chunk size was weird. */
202 if (old_table != NULL) {
203 if (old_table->num_entries == 0 ||
204 old_table->chunk_size < INTEGRITY_MIN_CHUNK_SIZE ||
205 old_table->chunk_size > INTEGRITY_MAX_CHUNK_SIZE)
208 chunk_size = old_table->chunk_size;
212 u64 old_check_bytes = old_check_end - WIM_HEADER_DISK_SIZE;
213 u64 new_check_bytes = new_check_end - WIM_HEADER_DISK_SIZE;
215 u32 old_num_chunks = DIV_ROUND_UP(old_check_bytes, chunk_size);
216 u32 new_num_chunks = DIV_ROUND_UP(new_check_bytes, chunk_size);
218 size_t old_last_chunk_size = MODULO_NONZERO(old_check_bytes, chunk_size);
219 size_t new_last_chunk_size = MODULO_NONZERO(new_check_bytes, chunk_size);
221 size_t new_table_size = 12 + new_num_chunks * SHA1_HASH_SIZE;
223 struct integrity_table *new_table = MALLOC(new_table_size);
225 return WIMLIB_ERR_NOMEM;
226 new_table->num_entries = new_num_chunks;
227 new_table->size = new_table_size;
228 new_table->chunk_size = chunk_size;
230 u64 offset = WIM_HEADER_DISK_SIZE;
231 union wimlib_progress_info progress;
234 progress.integrity.total_bytes = new_check_bytes;
235 progress.integrity.total_chunks = new_num_chunks;
236 progress.integrity.completed_chunks = 0;
237 progress.integrity.completed_bytes = 0;
238 progress.integrity.chunk_size = chunk_size;
239 progress.integrity.filename = NULL;
240 progress_func(WIMLIB_PROGRESS_MSG_CALC_INTEGRITY,
244 for (u32 i = 0; i < new_num_chunks; i++) {
245 size_t this_chunk_size;
246 if (i == new_num_chunks - 1)
247 this_chunk_size = new_last_chunk_size;
249 this_chunk_size = chunk_size;
251 ((this_chunk_size == chunk_size && i < old_num_chunks - 1) ||
252 (i == old_num_chunks - 1 && this_chunk_size == old_last_chunk_size)))
254 /* Can use SHA1 message digest from old integrity table
256 copy_hash(new_table->sha1sums[i], old_table->sha1sums[i]);
258 /* Calculate the SHA1 message digest of this chunk */
259 ret = calculate_chunk_sha1(in_fd, this_chunk_size,
260 offset, new_table->sha1sums[i]);
266 offset += this_chunk_size;
268 progress.integrity.completed_chunks++;
269 progress.integrity.completed_bytes += this_chunk_size;
270 progress_func(WIMLIB_PROGRESS_MSG_CALC_INTEGRITY,
274 *integrity_table_ret = new_table;
279 * write_integrity_table():
281 * Writes a WIM integrity table (a list of SHA1 message digests of raw 10 MiB
282 * chunks of the file).
284 * This function can optionally re-use entries from an older integrity table.
285 * To do this, make @integrity_res_entry point to the resource entry for the
286 * older table (note: this is an input-output parameter), and set
287 * @old_lookup_table_end to the offset of the byte directly following the last
288 * byte checked by the old table. If the old integrity table is invalid or
289 * cannot be read, a warning is printed and the integrity information is
293 * WIMStruct for the WIM file. @wim->out_fd must be a seekable descriptor
294 * to the new WIM file, opened read-write, positioned at the location at
295 * which the integrity table is to be written. Furthermore,
296 * @wim->hdr.integrity is expected to be a resource entry which will be set
297 * to the integrity table information on success. In addition, if
298 * @old_lookup_table_end != 0, @wim->hdr.integrity must initially contain
299 * information about the old integrity table, and @wim->in_fd must be a
300 * seekable descriptor to the original WIM file opened for reading.
302 * @new_lookup_table_end:
303 * The offset of the byte directly following the lookup table in the WIM
306 * @old_lookup_table_end:
307 * If nonzero, the offset of the byte directly following the old lookup
311 * If non-NULL, a progress function that will be called after every
315 * WIMLIB_ERR_SUCCESS (0)
317 * WIMLIB_ERR_UNEXPECTED_END_OF_FILE
321 write_integrity_table(WIMStruct *wim,
322 off_t new_lookup_table_end,
323 off_t old_lookup_table_end,
324 wimlib_progress_func_t progress_func)
326 struct integrity_table *old_table;
327 struct integrity_table *new_table;
331 DEBUG("Writing integrity table "
332 "(new_lookup_table_end=%"PRIu64", old_lookup_table_end=%"PRIu64")",
333 new_lookup_table_end, old_lookup_table_end);
335 wimlib_assert(old_lookup_table_end <= new_lookup_table_end);
338 if (wim_has_integrity_table(wim) && old_lookup_table_end != 0) {
339 ret = read_integrity_table(wim,
340 old_lookup_table_end - WIM_HEADER_DISK_SIZE,
342 if (ret == WIMLIB_ERR_INVALID_INTEGRITY_TABLE) {
343 WARNING("Old integrity table is invalid! "
345 } else if (ret != 0) {
346 WARNING("Can't read old integrity table! "
351 ret = calculate_integrity_table(&wim->out_fd, new_lookup_table_end,
352 old_table, old_lookup_table_end,
353 progress_func, &new_table);
355 goto out_free_old_table;
357 new_table_size = new_table->size;
359 new_table->size = cpu_to_le32(new_table->size);
360 new_table->num_entries = cpu_to_le32(new_table->num_entries);
361 new_table->chunk_size = cpu_to_le32(new_table->chunk_size);
363 ret = write_wim_resource_from_buffer(new_table,
367 WIMLIB_COMPRESSION_TYPE_NONE,
375 DEBUG("ret=%d", ret);
380 * verify_integrity():
382 * Checks a WIM for consistency with the integrity table.
385 * File descriptor to the WIM file, opened for reading.
388 * The integrity table for the WIM, read into memory.
391 * Number of bytes in the WIM that need to be checked (offset of end of the
392 * lookup table minus offset of end of the header).
395 * If non-NULL, a progress function that will be called after every
399 * > 0 (WIMLIB_ERR_READ, WIMLIB_ERR_UNEXPECTED_END_OF_FILE) on error
400 * 0 (WIM_INTEGRITY_OK) if the integrity was checked successfully and there
401 * were no inconsistencies.
402 * -1 (WIM_INTEGRITY_NOT_OK) if the WIM failed the integrity check.
405 verify_integrity(struct filedes *in_fd, const tchar *filename,
406 const struct integrity_table *table,
408 wimlib_progress_func_t progress_func)
411 u64 offset = WIM_HEADER_DISK_SIZE;
412 u8 sha1_md[SHA1_HASH_SIZE];
413 union wimlib_progress_info progress;
416 progress.integrity.total_bytes = bytes_to_check;
417 progress.integrity.total_chunks = table->num_entries;
418 progress.integrity.completed_chunks = 0;
419 progress.integrity.completed_bytes = 0;
420 progress.integrity.chunk_size = table->chunk_size;
421 progress.integrity.filename = filename;
422 progress_func(WIMLIB_PROGRESS_MSG_VERIFY_INTEGRITY,
425 for (u32 i = 0; i < table->num_entries; i++) {
426 size_t this_chunk_size;
427 if (i == table->num_entries - 1)
428 this_chunk_size = MODULO_NONZERO(bytes_to_check,
431 this_chunk_size = table->chunk_size;
433 ret = calculate_chunk_sha1(in_fd, this_chunk_size, offset, sha1_md);
437 if (!hashes_equal(sha1_md, table->sha1sums[i]))
438 return WIM_INTEGRITY_NOT_OK;
440 offset += this_chunk_size;
442 progress.integrity.completed_chunks++;
443 progress.integrity.completed_bytes += this_chunk_size;
444 progress_func(WIMLIB_PROGRESS_MSG_VERIFY_INTEGRITY,
448 return WIM_INTEGRITY_OK;
453 * check_wim_integrity():
455 * Verifies the integrity of the WIM by making sure the SHA1 message digests of
456 * ~10 MiB chunks of the WIM match up with the values given in the integrity
460 * The WIM, opened for reading.
463 * If non-NULL, a progress function that will be called after every
467 * > 0 (WIMLIB_ERR_INVALID_INTEGRITY_TABLE, WIMLIB_ERR_READ,
468 * WIMLIB_ERR_UNEXPECTED_END_OF_FILE) on error
469 * 0 (WIM_INTEGRITY_OK) if the integrity was checked successfully and there
470 * were no inconsistencies.
471 * -1 (WIM_INTEGRITY_NOT_OK) if the WIM failed the integrity check.
472 * -2 (WIM_INTEGRITY_NONEXISTENT) if the WIM contains no integrity
476 check_wim_integrity(WIMStruct *wim, wimlib_progress_func_t progress_func)
480 struct integrity_table *table;
481 u64 end_lookup_table_offset;
483 if (!wim_has_integrity_table(wim)) {
484 DEBUG("No integrity information.");
485 return WIM_INTEGRITY_NONEXISTENT;
488 end_lookup_table_offset = wim->hdr.lookup_table_res_entry.offset +
489 wim->hdr.lookup_table_res_entry.size;
491 if (end_lookup_table_offset < WIM_HEADER_DISK_SIZE) {
492 ERROR("WIM lookup table ends before WIM header ends!");
493 return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
496 bytes_to_check = end_lookup_table_offset - WIM_HEADER_DISK_SIZE;
498 ret = read_integrity_table(wim, bytes_to_check, &table);
501 ret = verify_integrity(&wim->in_fd, wim->filename, table,
502 bytes_to_check, progress_func);