]> wimlib.net Git - wimlib/blob - src/integrity.c
67f2133041f63434c04d542839821a2dd7fc2dfa
[wimlib] / src / integrity.c
1 /*
2  * integrity.c
3  *
4  * WIM files can optionally contain an array of SHA1 message digests at the end,
5  * one digest for each 1 MB of the file.  This file implements the checking of
6  * the digests, and the writing of the digests for new WIM files.
7  */
8
9 /*
10  * Copyright (C) 2012 Eric Biggers
11  *
12  * This file is part of wimlib, a library for working with WIM files.
13  *
14  * wimlib is free software; you can redistribute it and/or modify it under the
15  * terms of the GNU General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option)
17  * any later version.
18  *
19  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
20  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
21  * A PARTICULAR PURPOSE. See the GNU General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU General Public License
25  * along with wimlib; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #include "wimlib_internal.h"
29 #include "io.h"
30 #include "sha1.h"
31
32 /* Size, in bytes, of each SHA1-summed chunk, when wimlib writes integrity
33  * information. */
34 #define INTEGRITY_CHUNK_SIZE 10485760
35
36 /* Only use a different chunk size for compatiblity with an existing integrity
37  * table if the chunk size is between these two numbers. */
38 #define INTEGRITY_MIN_CHUNK_SIZE 4096
39 #define INTEGRITY_MAX_CHUNK_SIZE 134217728
40
41 struct integrity_table {
42         u32 size;
43         u32 num_entries;
44         u32 chunk_size;
45         u8  sha1sums[0][20];
46 };
47
48 static int calculate_chunk_sha1(FILE *fp, size_t this_chunk_size,
49                                 off_t offset, u8 sha1_md[])
50 {
51         int ret;
52         u8 buf[BUFFER_SIZE];
53         SHA_CTX ctx;
54         size_t bytes_remaining;
55         size_t bytes_to_read;
56         size_t bytes_read;
57
58         ret = fseeko(fp, offset, SEEK_SET);
59         if (ret != 0) {
60                 ERROR_WITH_ERRNO("Can't seek to offset "
61                                  "%"PRIu64" in WIM", offset);
62                 return WIMLIB_ERR_READ;
63         }
64         bytes_remaining = this_chunk_size;
65         sha1_init(&ctx);
66         do {
67                 bytes_to_read = min(bytes_remaining, sizeof(buf));
68                 bytes_read = fread(buf, 1, bytes_to_read, fp);
69                 if (bytes_read != bytes_to_read) {
70                         if (feof(fp)) {
71                                 ERROR("Unexpected EOF while calculating "
72                                       "integrity checksums");
73                         } else {
74                                 ERROR_WITH_ERRNO("File stream error while "
75                                                  "calculating integrity "
76                                                  "checksums");
77                         }
78                         return WIMLIB_ERR_READ;
79                 }
80                 sha1_update(&ctx, buf, bytes_read);
81                 bytes_remaining -= bytes_read;
82         } while (bytes_remaining);
83         sha1_final(sha1_md, &ctx);
84         return 0;
85 }
86
87
88 /*
89  * Reads the integrity table from a WIM file.
90  *
91  * @res_entry:
92  *      The resource entry that specifies the location of the integrity table.
93  *      The integrity table must exist (i.e. res_entry->offset must not be 0).
94  *
95  * @fp:
96  *      FILE * to the WIM file, opened for reading.
97  *
98  * @num_checked_bytes:
99  *      Number of bytes of data that should be checked by the integrity table.
100  *
101  * @table ret:
102  *      On success, a pointer to an in-memory structure containing the integrity
103  *      information is written to this location.
104  *
105  * Returns 0 on success; nonzero on failure.  The possible error codes are:
106  *
107  *     * WIMLIB_ERR_INVALID_INTEGRITY_TABLE:  The integrity table is invalid.
108  *     * WIMLIB_ERR_NOMEM:  Could not allocate memory to store the integrity
109  *                          data.
110  *     * WIMLIB_ERR_READ:   Could not read the integrity data from the WIM file.
111  */
112 static int read_integrity_table(const struct resource_entry *res_entry,
113                                 FILE *fp,
114                                 u64 num_checked_bytes,
115                                 struct integrity_table **table_ret)
116 {
117         struct integrity_table *table = NULL;
118         int ret = 0;
119         u64 expected_size;
120         u64 expected_num_entries;
121
122         if (res_entry->original_size < 12) {
123                 ERROR("Integrity table is too short (expected at least 12 bytes)");
124                 ret = WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
125                 goto out;
126         }
127
128         if (res_entry->flags & WIM_RESHDR_FLAG_COMPRESSED) {
129                 ERROR("Didn't expect a compressed integrity table");
130                 ret = WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
131                 goto out;
132         }
133
134         /* Read the integrity table into memory. */
135         if ((sizeof(size_t) < sizeof(u64)
136             && res_entry->size > ~(size_t)0)
137             || ((table = MALLOC(res_entry->size)) == NULL))
138         {
139                 ERROR("Out of memory (needed %zu bytes for integrity table)",
140                       (size_t)res_entry->size);
141                 ret = WIMLIB_ERR_NOMEM;
142                 goto out;
143         }
144
145         ret = read_uncompressed_resource(fp, res_entry->offset,
146                                          res_entry->size, (void*)table);
147
148         if (ret != 0) {
149                 ERROR("Failed to read integrity table (size = %"PRIu64", "
150                       " offset = %"PRIu64")",
151                       (u64)res_entry->size, res_entry->offset);
152                 goto out;
153         }
154
155         table->size        = le32_to_cpu(table->size);
156         table->num_entries = le32_to_cpu(table->num_entries);
157         table->chunk_size  = le32_to_cpu(table->chunk_size);
158
159         if (table->size != res_entry->size) {
160                 ERROR("Inconsistent integrity table sizes: Table header says "
161                       "%u bytes but resource entry says %"PRIu64" bytes",
162                       table->size, (u64)res_entry->size);
163                 ret = WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
164                 goto out;
165         }
166
167         DEBUG("table->size = %u, table->num_entries = %u, "
168               "table->chunk_size = %u",
169               table->size, table->num_entries, table->chunk_size);
170
171         expected_size = (u64)table->num_entries * SHA1_HASH_SIZE + 12;
172
173         if (table->size != expected_size) {
174                 ERROR("Integrity table is %u bytes, but expected %"PRIu64" "
175                       "bytes to hold %u entries",
176                       table->size, expected_size, table->num_entries);
177                 ret = WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
178                 goto out;
179         }
180
181         if (table->chunk_size == 0) {
182                 ERROR("Cannot use integrity chunk size of 0");
183                 ret = WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
184                 goto out;
185         }
186
187         expected_num_entries = DIV_ROUND_UP(num_checked_bytes, table->chunk_size);
188
189         if (table->num_entries != expected_num_entries) {
190                 ERROR("%"PRIu64" entries would be required to checksum "
191                       "the %"PRIu64" bytes from the end of the header to the",
192                       expected_num_entries, num_checked_bytes);
193                 ERROR("end of the lookup table with a chunk size of %u, but "
194                       "there were only %u entries",
195                       table->chunk_size, table->num_entries);
196                 ret = WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
197         }
198 out:
199         if (ret == 0)
200                 *table_ret = table;
201         else
202                 FREE(table);
203         return ret;
204 }
205
206 /*
207  * Calculates an integrity table for the data in a file beginning at offset 208
208  * (WIM_HEADER_DISK_SIZE).
209  *
210  * @fp:
211  *      FILE * for the file to be checked, opened for reading.  Does not need to
212  *      be at any specific location in the file.
213  *
214  * @new_check_end:
215  *      Offset of byte after the last byte to be checked.
216  *
217  * @old_table:
218  *      If non-NULL, a pointer to the table containing previously contained
219  *      integrity data for a prefix of this file.
220  *
221  * @old_check_end:
222  *      If @old_table is non-NULL, the byte after the last byte that was checked
223  *      in the old table.  Must be less than or equal to new_check_end.
224  *
225  * @progress_func:
226  *      If non-NULL, a progress function that will be called after every
227  *      calculated chunk.
228  *
229  * @integrity_table_ret:
230  *      On success, a pointer to the calculated integrity table is written into
231  *      this location.
232  *
233  * Returns 0 on success; nonzero on failure.
234  */
235 static int calculate_integrity_table(FILE *fp,
236                                      off_t new_check_end,
237                                      const struct integrity_table *old_table,
238                                      off_t old_check_end,
239                                      wimlib_progress_func_t progress_func,
240                                      struct integrity_table **integrity_table_ret)
241 {
242         int ret = 0;
243         size_t chunk_size = INTEGRITY_CHUNK_SIZE;
244
245         /* If an old table is provided, set the chunk size to be compatible with
246          * the old chunk size, unless the old chunk size was weird. */
247         if (old_table != NULL) {
248                 if (old_table->chunk_size < INTEGRITY_MIN_CHUNK_SIZE ||
249                     old_table->chunk_size > INTEGRITY_MAX_CHUNK_SIZE)
250                         old_table = NULL;
251                 else
252                         chunk_size = old_table->chunk_size;
253         }
254
255
256         u64 old_check_bytes = old_check_end - WIM_HEADER_DISK_SIZE;
257         u64 new_check_bytes = new_check_end - WIM_HEADER_DISK_SIZE;
258
259         u32 old_num_chunks = DIV_ROUND_UP(old_check_bytes, chunk_size);
260         u32 new_num_chunks = DIV_ROUND_UP(new_check_bytes, chunk_size);
261
262         size_t old_last_chunk_size = MODULO_NONZERO(old_check_bytes, chunk_size);
263         size_t new_last_chunk_size = MODULO_NONZERO(new_check_bytes, chunk_size);
264
265         size_t new_table_size = 12 + new_num_chunks * SHA1_HASH_SIZE;
266
267         struct integrity_table *new_table = MALLOC(new_table_size);
268         if (!new_table)
269                 return WIMLIB_ERR_NOMEM;
270         new_table->num_entries = new_num_chunks;
271         new_table->size = new_table_size;
272         new_table->chunk_size = chunk_size;
273
274         u64 offset = WIM_HEADER_DISK_SIZE;
275         union wimlib_progress_info progress;
276
277         if (progress_func) {
278                 progress.integrity.total_bytes      = new_check_bytes;
279                 progress.integrity.total_chunks     = new_num_chunks;
280                 progress.integrity.completed_chunks = 0;
281                 progress.integrity.completed_bytes  = 0;
282                 progress.integrity.chunk_size       = chunk_size;
283                 progress.integrity.filename         = NULL;
284                 progress_func(WIMLIB_PROGRESS_MSG_CALC_INTEGRITY,
285                               &progress);
286         }
287
288         for (u32 i = 0; i < new_num_chunks; i++) {
289                 size_t this_chunk_size;
290                 if (i == new_num_chunks - 1)
291                         this_chunk_size = new_last_chunk_size;
292                 else
293                         this_chunk_size = chunk_size;
294                 if (old_table &&
295                     ((this_chunk_size == chunk_size && i < old_num_chunks - 1) ||
296                       (i == old_num_chunks - 1 && this_chunk_size == old_last_chunk_size)))
297                 {
298                         /* Can use SHA1 message digest from old integrity table
299                          * */
300                         copy_hash(new_table->sha1sums[i], old_table->sha1sums[i]);
301                 } else {
302                         /* Calculate the SHA1 message digest of this chunk */
303                         ret = calculate_chunk_sha1(fp, this_chunk_size,
304                                                    offset, new_table->sha1sums[i]);
305                         if (ret != 0)
306                                 break;
307                 }
308                 offset += this_chunk_size;
309                 if (progress_func) {
310                         progress.integrity.completed_chunks++;
311                         progress.integrity.completed_bytes += this_chunk_size;
312                         progress_func(WIMLIB_PROGRESS_MSG_CALC_INTEGRITY,
313                                       &progress);
314                 }
315         }
316         if (ret == 0)
317                 *integrity_table_ret = new_table;
318         else
319                 FREE(new_table);
320         return ret;
321 }
322
323 /*
324  * Writes a WIM integrity table (a list of SHA1 message digests of raw 10 MiB
325  * chunks of the file).
326  *
327  * This function can optionally re-use entries from an older integrity table.
328  * To do this, make @integrity_res_entry point to the resource entry for the
329  * older table (note: this is an input-output parameter), and set
330  * @old_lookup_table_end to the offset of the byte directly following the last
331  * byte checked by the old table.  If the old integrity table is invalid or
332  * cannot be read, a warning is printed and the integrity information is
333  * re-calculated.
334  *
335  * @fp:
336  *      FILE * to the WIM file, opened read-write, positioned at the location at
337  *      which the integrity table is to be written.
338  *
339  * @integrity_res_entry:
340  *      Resource entry which will be set to point to the integrity table on
341  *      success.  In addition, if @old_lookup_table_end != 0, this initially
342  *      must point to the resource entry for the old integrity table for the
343  *      WIM.
344  *
345  * @new_lookup_table_end:
346  *      The offset of the byte directly following the lookup table in the WIM
347  *      being written.
348  *
349  * @old_lookup_table_end:
350  *      If nonzero, the offset of the byte directly following the old lookup
351  *      table in the WIM.
352  *
353  * @progress_func
354  *      If non-NULL, a progress function that will be called after every
355  *      calculated chunk.
356  *
357  * Returns:
358  *      0 on success, nonzero on failure.  The possible error codes are:
359  *         * WIMLIB_ERR_WRITE:  Could not write the integrity table.
360  *         * WIMLIB_ERR_READ:   Could not read a chunk of data that needed
361  *                              to be checked.
362  */
363 int write_integrity_table(FILE *fp,
364                           struct resource_entry *integrity_res_entry,
365                           off_t new_lookup_table_end,
366                           off_t old_lookup_table_end,
367                           wimlib_progress_func_t progress_func)
368 {
369         struct integrity_table *old_table;
370         struct integrity_table *new_table;
371         int ret;
372         off_t cur_offset;
373         u32 new_table_size;
374
375         wimlib_assert(old_lookup_table_end <= new_lookup_table_end);
376
377         cur_offset = ftello(fp);
378         if (cur_offset == -1) {
379                 ERROR_WITH_ERRNO("Failed to get offset in WIM");
380                 return WIMLIB_ERR_WRITE;
381         }
382
383         if (integrity_res_entry->offset == 0 || old_lookup_table_end == 0) {
384                 old_table = NULL;
385         } else {
386                 ret = read_integrity_table(integrity_res_entry, fp,
387                                            old_lookup_table_end - WIM_HEADER_DISK_SIZE,
388                                            &old_table);
389                 if (ret == WIMLIB_ERR_INVALID_INTEGRITY_TABLE) {
390                         WARNING("Old integrity table is invalid! "
391                                 "Ignoring it");
392                 } else if (ret != 0) {
393                         WARNING("Can't read old integrity table! "
394                                 "Ignoring it");
395                 }
396         }
397
398         ret = calculate_integrity_table(fp, new_lookup_table_end,
399                                         old_table, old_lookup_table_end,
400                                         progress_func, &new_table);
401         if (ret != 0)
402                 goto out_free_old_table;
403
404         new_table_size = new_table->size;
405
406         new_table->size        = cpu_to_le32(new_table->size);
407         new_table->num_entries = cpu_to_le32(new_table->num_entries);
408         new_table->chunk_size  = cpu_to_le32(new_table->chunk_size);
409
410         if (fseeko(fp, cur_offset, SEEK_SET) != 0) {
411                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" of WIM to "
412                                  "write integrity table", cur_offset);
413                 ret = WIMLIB_ERR_WRITE;
414                 goto out_free_new_table;
415         }
416
417         if (fwrite(new_table, 1, new_table_size, fp) != new_table_size) {
418                 ERROR_WITH_ERRNO("Failed to write WIM integrity table");
419                 ret = WIMLIB_ERR_WRITE;
420         } else {
421                 integrity_res_entry->offset        = cur_offset;
422                 integrity_res_entry->size          = new_table_size;
423                 integrity_res_entry->original_size = new_table_size;
424                 integrity_res_entry->flags         = 0;
425                 ret = 0;
426         }
427 out_free_new_table:
428         FREE(new_table);
429 out_free_old_table:
430         FREE(old_table);
431         return ret;
432 }
433
434 /*
435  * Checks a WIM for consistency with the integrity table.
436  *
437  * @fp:
438  *      FILE * to the WIM file, opened for reading.
439  *
440  * @table:
441  *      The integrity table for the WIM, read into memory.
442  *
443  * @bytes_to_check:
444  *      Number of bytes in the WIM that need to be checked (offset of end of the
445  *      lookup table minus offset of end of the header).
446  *
447  * @progress_func
448  *      If non-NULL, a progress function that will be called after every
449  *      verified chunk.
450  *
451  * Returns:
452  *      > 0 (WIMLIB_ERR_*) on error
453  *      0 (WIM_INTEGRITY_OK) if the integrity was checked successfully and there
454  *      were no inconsistencies.
455  *      -1 (WIM_INTEGRITY_NOT_OK) if the WIM failed the integrity check.
456  */
457 static int verify_integrity(FILE *fp, const char *filename,
458                             const struct integrity_table *table,
459                             u64 bytes_to_check,
460                             wimlib_progress_func_t progress_func)
461 {
462         int ret;
463         u64 offset = WIM_HEADER_DISK_SIZE;
464         u8 sha1_md[SHA1_HASH_SIZE];
465         union wimlib_progress_info progress;
466
467         if (progress_func) {
468                 progress.integrity.total_bytes      = bytes_to_check;
469                 progress.integrity.total_chunks     = table->num_entries;
470                 progress.integrity.completed_chunks = 0;
471                 progress.integrity.completed_bytes  = 0;
472                 progress.integrity.chunk_size       = table->chunk_size;
473                 progress.integrity.filename         = filename;
474                 progress_func(WIMLIB_PROGRESS_MSG_VERIFY_INTEGRITY,
475                               &progress);
476         }
477         for (u32 i = 0; i < table->num_entries; i++) {
478                 size_t this_chunk_size;
479                 if (i == table->num_entries - 1)
480                         this_chunk_size = MODULO_NONZERO(bytes_to_check,
481                                                          table->chunk_size);
482                 else
483                         this_chunk_size = table->chunk_size;
484
485                 ret = calculate_chunk_sha1(fp, this_chunk_size, offset, sha1_md);
486                 if (ret != 0)
487                         return ret;
488
489                 if (!hashes_equal(sha1_md, table->sha1sums[i]))
490                         return WIM_INTEGRITY_NOT_OK;
491
492                 offset += this_chunk_size;
493                 if (progress_func) {
494                         progress.integrity.completed_chunks++;
495                         progress.integrity.completed_bytes += this_chunk_size;
496                         progress_func(WIMLIB_PROGRESS_MSG_VERIFY_INTEGRITY,
497                                       &progress);
498                 }
499         }
500         return WIM_INTEGRITY_OK;
501 }
502
503
504 /*
505  * Verifies the integrity of the WIM by making sure the SHA1 message digests of
506  * ~10 MiB chunks of the WIM match up with the values given in the integrity
507  * tabel.
508  *
509  * @w:
510  *      The WIM, opened for reading, and with the header already read.
511  *
512  * @progress_func
513  *      If non-NULL, a progress function that will be called after every
514  *      verified chunk.
515  *
516  * Returns:
517  *      > 0 (WIMLIB_ERR_*) on error
518  *      0 (WIM_INTEGRITY_OK) if the integrity was checked successfully and there
519  *      were no inconsistencies.
520  *      -1 (WIM_INTEGRITY_NOT_OK) if the WIM failed the integrity check.
521  *      -2 (WIM_INTEGRITY_NONEXISTENT) if the WIM contains no integrity
522  *      information.
523  */
524 int check_wim_integrity(WIMStruct *w, wimlib_progress_func_t progress_func)
525 {
526         int ret;
527         u64 bytes_to_check;
528         struct integrity_table *table;
529         u64 end_lookup_table_offset;
530
531         if (w->hdr.integrity.offset == 0) {
532                 DEBUG("No integrity information.");
533                 return WIM_INTEGRITY_NONEXISTENT;
534         }
535
536         end_lookup_table_offset = w->hdr.lookup_table_res_entry.offset +
537                                   w->hdr.lookup_table_res_entry.size;
538
539         if (end_lookup_table_offset < WIM_HEADER_DISK_SIZE) {
540                 ERROR("WIM lookup table ends before WIM header ends!");
541                 return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
542         }
543
544         bytes_to_check = end_lookup_table_offset - WIM_HEADER_DISK_SIZE;
545
546         ret = read_integrity_table(&w->hdr.integrity, w->fp,
547                                    bytes_to_check, &table);
548         if (ret != 0)
549                 return ret;
550         ret = verify_integrity(w->fp, w->filename, table,
551                                bytes_to_check, progress_func);
552         FREE(table);
553         return ret;
554 }