]> wimlib.net Git - wimlib/blob - src/integrity.c
implement WIMLIB_INIT_FLAG_ASSUME_UTF8
[wimlib] / src / integrity.c
1 /*
2  * integrity.c
3  *
4  * WIM files can optionally contain a table of SHA1 message digests at the end,
5  * one digest for each chunk of the file of some specified size (often 10 MB).
6  * This file implements the checking and writing of this table.
7  */
8
9 /*
10  * Copyright (C) 2012, 2013 Eric Biggers
11  *
12  * This file is part of wimlib, a library for working with WIM files.
13  *
14  * wimlib is free software; you can redistribute it and/or modify it under the
15  * terms of the GNU General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option)
17  * any later version.
18  *
19  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
20  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
21  * A PARTICULAR PURPOSE. See the GNU General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU General Public License
25  * along with wimlib; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #include "wimlib_internal.h"
29 #include "buffer_io.h"
30 #include "sha1.h"
31
32 /* Size, in bytes, of each SHA1-summed chunk, when wimlib writes integrity
33  * information. */
34 #define INTEGRITY_CHUNK_SIZE 10485760
35
36 /* Only use a different chunk size for compatiblity with an existing integrity
37  * table if the chunk size is between these two numbers. */
38 #define INTEGRITY_MIN_CHUNK_SIZE 4096
39 #define INTEGRITY_MAX_CHUNK_SIZE 134217728
40
41 struct integrity_table {
42         u32 size;
43         u32 num_entries;
44         u32 chunk_size;
45         u8  sha1sums[0][20];
46 };
47
48 static int
49 calculate_chunk_sha1(FILE *fp, size_t this_chunk_size,
50                      off_t offset, u8 sha1_md[])
51 {
52         int ret;
53         u8 buf[BUFFER_SIZE];
54         SHA_CTX ctx;
55         size_t bytes_remaining;
56         size_t bytes_to_read;
57         size_t bytes_read;
58
59         ret = fseeko(fp, offset, SEEK_SET);
60         if (ret != 0) {
61                 ERROR_WITH_ERRNO("Can't seek to offset "
62                                  "%"PRIu64" in WIM", offset);
63                 return WIMLIB_ERR_READ;
64         }
65         bytes_remaining = this_chunk_size;
66         sha1_init(&ctx);
67         do {
68                 bytes_to_read = min(bytes_remaining, sizeof(buf));
69                 bytes_read = fread(buf, 1, bytes_to_read, fp);
70                 if (bytes_read != bytes_to_read) {
71                         if (feof(fp)) {
72                                 ERROR("Unexpected EOF while calculating "
73                                       "integrity checksums");
74                         } else {
75                                 ERROR_WITH_ERRNO("File stream error while "
76                                                  "calculating integrity "
77                                                  "checksums");
78                         }
79                         return WIMLIB_ERR_READ;
80                 }
81                 sha1_update(&ctx, buf, bytes_read);
82                 bytes_remaining -= bytes_read;
83         } while (bytes_remaining);
84         sha1_final(sha1_md, &ctx);
85         return 0;
86 }
87
88
89 /*
90  * read_integrity_table: -  Reads the integrity table from a WIM file.
91  *
92  * @res_entry:
93  *      The resource entry that specifies the location of the integrity table.
94  *      The integrity table must exist (i.e. res_entry->offset must not be 0).
95  *
96  * @fp:
97  *      FILE * to the WIM file, opened for reading.
98  *
99  * @num_checked_bytes:
100  *      Number of bytes of data that should be checked by the integrity table.
101  *
102  * @table ret:
103  *      On success, a pointer to an in-memory structure containing the integrity
104  *      information is written to this location.
105  *
106  * Returns 0 on success; nonzero on failure.  The possible error codes are:
107  *
108  *     * WIMLIB_ERR_INVALID_INTEGRITY_TABLE:  The integrity table is invalid.
109  *     * WIMLIB_ERR_NOMEM:  Could not allocate memory to store the integrity
110  *                          data.
111  *     * WIMLIB_ERR_READ:   Could not read the integrity data from the WIM file.
112  */
113 static int
114 read_integrity_table(const struct resource_entry *res_entry,
115                      FILE *fp,
116                      u64 num_checked_bytes,
117                      struct integrity_table **table_ret)
118 {
119         struct integrity_table *table = NULL;
120         int ret = 0;
121         u64 expected_size;
122         u64 expected_num_entries;
123
124         if (resource_is_compressed(res_entry)) {
125                 ERROR("Didn't expect a compressed integrity table");
126                 return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
127         }
128
129         if (res_entry->size < 8 || res_entry->size  > 0xffffffff) {
130                 ERROR("Integrity table resource header is invalid");
131                 return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
132         }
133
134         /* Read the integrity table into memory. */
135         if ((table = MALLOC(res_entry->size)) == NULL) {
136                 ERROR("Can't allocate %"PRIu64" bytes for integrity table",
137                       (u64)res_entry->size);
138                 return WIMLIB_ERR_NOMEM;
139         }
140
141         ret = read_uncompressed_resource(fp, res_entry->offset,
142                                          res_entry->size, (void*)table);
143
144         if (ret != 0) {
145                 ERROR("Failed to read integrity table (size = %u, "
146                       " offset = %"PRIu64")",
147                       (unsigned)res_entry->size, res_entry->offset);
148                 goto out;
149         }
150
151         table->size        = le32_to_cpu(table->size);
152         table->num_entries = le32_to_cpu(table->num_entries);
153         table->chunk_size  = le32_to_cpu(table->chunk_size);
154
155         if (table->size != res_entry->size) {
156                 ERROR("Inconsistent integrity table sizes: Table header says "
157                       "%u bytes but resource entry says %u bytes",
158                       table->size, (unsigned)res_entry->size);
159                 ret = WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
160                 goto out;
161         }
162
163         DEBUG("table->size = %u, table->num_entries = %u, "
164               "table->chunk_size = %u",
165               table->size, table->num_entries, table->chunk_size);
166
167         expected_size = (u64)table->num_entries * SHA1_HASH_SIZE + 12;
168
169         if (table->size != expected_size) {
170                 ERROR("Integrity table is %u bytes, but expected %"PRIu64" "
171                       "bytes to hold %u entries",
172                       table->size, expected_size, table->num_entries);
173                 ret = WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
174                 goto out;
175         }
176
177         if (table->chunk_size == 0) {
178                 ERROR("Cannot use integrity chunk size of 0");
179                 ret = WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
180                 goto out;
181         }
182
183         expected_num_entries = DIV_ROUND_UP(num_checked_bytes, table->chunk_size);
184
185         if (table->num_entries != expected_num_entries) {
186                 ERROR("%"PRIu64" integrity table entries would be required "
187                       "to checksum the %"PRIu64" bytes from the end of the "
188                       "header to the",
189                       expected_num_entries, num_checked_bytes);
190                 ERROR("end of the lookup table with a chunk size of %u, but "
191                       "there were only %u entries",
192                       table->chunk_size, table->num_entries);
193                 ret = WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
194         }
195 out:
196         if (ret == 0)
197                 *table_ret = table;
198         else
199                 FREE(table);
200         return ret;
201 }
202
203 /*
204  * calculate_integrity_table():
205  *
206  * Calculates an integrity table for the data in a file beginning at offset 208
207  * (WIM_HEADER_DISK_SIZE).
208  *
209  * @fp:
210  *      FILE * for the file to be checked, opened for reading.  Does not need to
211  *      be at any specific location in the file.
212  *
213  * @new_check_end:
214  *      Offset of byte after the last byte to be checked.
215  *
216  * @old_table:
217  *      If non-NULL, a pointer to the table containing the previously calculated
218  *      integrity data for a prefix of this file.
219  *
220  * @old_check_end:
221  *      If @old_table is non-NULL, the byte after the last byte that was checked
222  *      in the old table.  Must be less than or equal to new_check_end.
223  *
224  * @progress_func:
225  *      If non-NULL, a progress function that will be called after every
226  *      calculated chunk.
227  *
228  * @integrity_table_ret:
229  *      On success, a pointer to the calculated integrity table is written into
230  *      this location.
231  *
232  * Returns 0 on success; nonzero on failure.
233  */
234 static int
235 calculate_integrity_table(FILE *fp,
236                           off_t new_check_end,
237                           const struct integrity_table *old_table,
238                           off_t old_check_end,
239                           wimlib_progress_func_t progress_func,
240                           struct integrity_table **integrity_table_ret)
241 {
242         int ret = 0;
243         size_t chunk_size = INTEGRITY_CHUNK_SIZE;
244
245         /* If an old table is provided, set the chunk size to be compatible with
246          * the old chunk size, unless the old chunk size was weird. */
247         if (old_table != NULL) {
248                 if (old_table->num_entries == 0 ||
249                     old_table->chunk_size < INTEGRITY_MIN_CHUNK_SIZE ||
250                     old_table->chunk_size > INTEGRITY_MAX_CHUNK_SIZE)
251                         old_table = NULL;
252                 else
253                         chunk_size = old_table->chunk_size;
254         }
255
256
257         u64 old_check_bytes = old_check_end - WIM_HEADER_DISK_SIZE;
258         u64 new_check_bytes = new_check_end - WIM_HEADER_DISK_SIZE;
259
260         u32 old_num_chunks = DIV_ROUND_UP(old_check_bytes, chunk_size);
261         u32 new_num_chunks = DIV_ROUND_UP(new_check_bytes, chunk_size);
262
263         size_t old_last_chunk_size = MODULO_NONZERO(old_check_bytes, chunk_size);
264         size_t new_last_chunk_size = MODULO_NONZERO(new_check_bytes, chunk_size);
265
266         size_t new_table_size = 12 + new_num_chunks * SHA1_HASH_SIZE;
267
268         struct integrity_table *new_table = MALLOC(new_table_size);
269         if (!new_table)
270                 return WIMLIB_ERR_NOMEM;
271         new_table->num_entries = new_num_chunks;
272         new_table->size = new_table_size;
273         new_table->chunk_size = chunk_size;
274
275         u64 offset = WIM_HEADER_DISK_SIZE;
276         union wimlib_progress_info progress;
277
278         if (progress_func) {
279                 progress.integrity.total_bytes      = new_check_bytes;
280                 progress.integrity.total_chunks     = new_num_chunks;
281                 progress.integrity.completed_chunks = 0;
282                 progress.integrity.completed_bytes  = 0;
283                 progress.integrity.chunk_size       = chunk_size;
284                 progress.integrity.filename         = NULL;
285                 progress_func(WIMLIB_PROGRESS_MSG_CALC_INTEGRITY,
286                               &progress);
287         }
288
289         for (u32 i = 0; i < new_num_chunks; i++) {
290                 size_t this_chunk_size;
291                 if (i == new_num_chunks - 1)
292                         this_chunk_size = new_last_chunk_size;
293                 else
294                         this_chunk_size = chunk_size;
295                 if (old_table &&
296                     ((this_chunk_size == chunk_size && i < old_num_chunks - 1) ||
297                       (i == old_num_chunks - 1 && this_chunk_size == old_last_chunk_size)))
298                 {
299                         /* Can use SHA1 message digest from old integrity table
300                          * */
301                         copy_hash(new_table->sha1sums[i], old_table->sha1sums[i]);
302                 } else {
303                         /* Calculate the SHA1 message digest of this chunk */
304                         ret = calculate_chunk_sha1(fp, this_chunk_size,
305                                                    offset, new_table->sha1sums[i]);
306                         if (ret != 0)
307                                 break;
308                 }
309                 offset += this_chunk_size;
310                 if (progress_func) {
311                         progress.integrity.completed_chunks++;
312                         progress.integrity.completed_bytes += this_chunk_size;
313                         progress_func(WIMLIB_PROGRESS_MSG_CALC_INTEGRITY,
314                                       &progress);
315                 }
316         }
317         if (ret == 0)
318                 *integrity_table_ret = new_table;
319         else
320                 FREE(new_table);
321         return ret;
322 }
323
324 /*
325  * write_integrity_table():
326  *
327  * Writes a WIM integrity table (a list of SHA1 message digests of raw 10 MiB
328  * chunks of the file).
329  *
330  * This function can optionally re-use entries from an older integrity table.
331  * To do this, make @integrity_res_entry point to the resource entry for the
332  * older table (note: this is an input-output parameter), and set
333  * @old_lookup_table_end to the offset of the byte directly following the last
334  * byte checked by the old table.  If the old integrity table is invalid or
335  * cannot be read, a warning is printed and the integrity information is
336  * re-calculated.
337  *
338  * @fp:
339  *      FILE * to the WIM file, opened read-write, positioned at the location at
340  *      which the integrity table is to be written.
341  *
342  * @integrity_res_entry:
343  *      Resource entry which will be set to point to the integrity table on
344  *      success.  In addition, if @old_lookup_table_end != 0, this initially
345  *      must point to the resource entry for the old integrity table for the
346  *      WIM.
347  *
348  * @new_lookup_table_end:
349  *      The offset of the byte directly following the lookup table in the WIM
350  *      being written.
351  *
352  * @old_lookup_table_end:
353  *      If nonzero, the offset of the byte directly following the old lookup
354  *      table in the WIM.
355  *
356  * @progress_func
357  *      If non-NULL, a progress function that will be called after every
358  *      calculated chunk.
359  *
360  * Returns:
361  *      0 on success, nonzero on failure.  The possible error codes are:
362  *         * WIMLIB_ERR_WRITE:  Could not write the integrity table.
363  *         * WIMLIB_ERR_READ:   Could not read a chunk of data that needed
364  *                              to be checked.
365  */
366 int
367 write_integrity_table(FILE *fp,
368                       struct resource_entry *integrity_res_entry,
369                       off_t new_lookup_table_end,
370                       off_t old_lookup_table_end,
371                       wimlib_progress_func_t progress_func)
372 {
373         struct integrity_table *old_table;
374         struct integrity_table *new_table;
375         int ret;
376         off_t cur_offset;
377         u32 new_table_size;
378
379         wimlib_assert(old_lookup_table_end <= new_lookup_table_end);
380
381         cur_offset = ftello(fp);
382         if (cur_offset == -1)
383                 return WIMLIB_ERR_WRITE;
384
385         if (integrity_res_entry->offset == 0 || old_lookup_table_end == 0) {
386                 old_table = NULL;
387         } else {
388                 ret = read_integrity_table(integrity_res_entry, fp,
389                                            old_lookup_table_end - WIM_HEADER_DISK_SIZE,
390                                            &old_table);
391                 if (ret == WIMLIB_ERR_INVALID_INTEGRITY_TABLE) {
392                         WARNING("Old integrity table is invalid! "
393                                 "Ignoring it");
394                 } else if (ret != 0) {
395                         WARNING("Can't read old integrity table! "
396                                 "Ignoring it");
397                 }
398         }
399
400         ret = calculate_integrity_table(fp, new_lookup_table_end,
401                                         old_table, old_lookup_table_end,
402                                         progress_func, &new_table);
403         if (ret != 0)
404                 goto out_free_old_table;
405
406         new_table_size = new_table->size;
407
408         new_table->size        = cpu_to_le32(new_table->size);
409         new_table->num_entries = cpu_to_le32(new_table->num_entries);
410         new_table->chunk_size  = cpu_to_le32(new_table->chunk_size);
411
412         if (fseeko(fp, cur_offset, SEEK_SET) != 0) {
413                 ERROR_WITH_ERRNO("Failed to seek to byte %"PRIu64" of WIM to "
414                                  "write integrity table", cur_offset);
415                 ret = WIMLIB_ERR_WRITE;
416                 goto out_free_new_table;
417         }
418
419         if (fwrite(new_table, 1, new_table_size, fp) != new_table_size) {
420                 ERROR_WITH_ERRNO("Failed to write WIM integrity table");
421                 ret = WIMLIB_ERR_WRITE;
422         } else {
423                 integrity_res_entry->offset        = cur_offset;
424                 integrity_res_entry->size          = new_table_size;
425                 integrity_res_entry->original_size = new_table_size;
426                 integrity_res_entry->flags         = 0;
427                 ret = 0;
428         }
429 out_free_new_table:
430         FREE(new_table);
431 out_free_old_table:
432         FREE(old_table);
433         return ret;
434 }
435
436 /*
437  * verify_integrity():
438  *
439  * Checks a WIM for consistency with the integrity table.
440  *
441  * @fp:
442  *      FILE * to the WIM file, opened for reading.
443  *
444  * @table:
445  *      The integrity table for the WIM, read into memory.
446  *
447  * @bytes_to_check:
448  *      Number of bytes in the WIM that need to be checked (offset of end of the
449  *      lookup table minus offset of end of the header).
450  *
451  * @progress_func
452  *      If non-NULL, a progress function that will be called after every
453  *      verified chunk.
454  *
455  * Returns:
456  *      > 0 (WIMLIB_ERR_*) on error
457  *      0 (WIM_INTEGRITY_OK) if the integrity was checked successfully and there
458  *      were no inconsistencies.
459  *      -1 (WIM_INTEGRITY_NOT_OK) if the WIM failed the integrity check.
460  */
461 static int
462 verify_integrity(FILE *fp, const tchar *filename,
463                  const struct integrity_table *table,
464                  u64 bytes_to_check,
465                  wimlib_progress_func_t progress_func)
466 {
467         int ret;
468         u64 offset = WIM_HEADER_DISK_SIZE;
469         u8 sha1_md[SHA1_HASH_SIZE];
470         union wimlib_progress_info progress;
471
472         if (progress_func) {
473                 progress.integrity.total_bytes      = bytes_to_check;
474                 progress.integrity.total_chunks     = table->num_entries;
475                 progress.integrity.completed_chunks = 0;
476                 progress.integrity.completed_bytes  = 0;
477                 progress.integrity.chunk_size       = table->chunk_size;
478                 progress.integrity.filename         = filename;
479                 progress_func(WIMLIB_PROGRESS_MSG_VERIFY_INTEGRITY,
480                               &progress);
481         }
482         for (u32 i = 0; i < table->num_entries; i++) {
483                 size_t this_chunk_size;
484                 if (i == table->num_entries - 1)
485                         this_chunk_size = MODULO_NONZERO(bytes_to_check,
486                                                          table->chunk_size);
487                 else
488                         this_chunk_size = table->chunk_size;
489
490                 ret = calculate_chunk_sha1(fp, this_chunk_size, offset, sha1_md);
491                 if (ret != 0)
492                         return ret;
493
494                 if (!hashes_equal(sha1_md, table->sha1sums[i]))
495                         return WIM_INTEGRITY_NOT_OK;
496
497                 offset += this_chunk_size;
498                 if (progress_func) {
499                         progress.integrity.completed_chunks++;
500                         progress.integrity.completed_bytes += this_chunk_size;
501                         progress_func(WIMLIB_PROGRESS_MSG_VERIFY_INTEGRITY,
502                                       &progress);
503                 }
504         }
505         return WIM_INTEGRITY_OK;
506 }
507
508
509 /*
510  * check_wim_integrity():
511  *
512  * Verifies the integrity of the WIM by making sure the SHA1 message digests of
513  * ~10 MiB chunks of the WIM match up with the values given in the integrity
514  * table.
515  *
516  * @w:
517  *      The WIM, opened for reading, and with the header already read.
518  *
519  * @progress_func
520  *      If non-NULL, a progress function that will be called after every
521  *      verified chunk.
522  *
523  * Returns:
524  *      > 0 (WIMLIB_ERR_*) on error
525  *      0 (WIM_INTEGRITY_OK) if the integrity was checked successfully and there
526  *      were no inconsistencies.
527  *      -1 (WIM_INTEGRITY_NOT_OK) if the WIM failed the integrity check.
528  *      -2 (WIM_INTEGRITY_NONEXISTENT) if the WIM contains no integrity
529  *      information.
530  */
531 int
532 check_wim_integrity(WIMStruct *w, wimlib_progress_func_t progress_func)
533 {
534         int ret;
535         u64 bytes_to_check;
536         struct integrity_table *table;
537         u64 end_lookup_table_offset;
538
539         if (w->hdr.integrity.offset == 0) {
540                 DEBUG("No integrity information.");
541                 return WIM_INTEGRITY_NONEXISTENT;
542         }
543
544         end_lookup_table_offset = w->hdr.lookup_table_res_entry.offset +
545                                   w->hdr.lookup_table_res_entry.size;
546
547         if (end_lookup_table_offset < WIM_HEADER_DISK_SIZE) {
548                 ERROR("WIM lookup table ends before WIM header ends!");
549                 return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
550         }
551
552         bytes_to_check = end_lookup_table_offset - WIM_HEADER_DISK_SIZE;
553
554         ret = read_integrity_table(&w->hdr.integrity, w->fp,
555                                    bytes_to_check, &table);
556         if (ret != 0)
557                 return ret;
558         ret = verify_integrity(w->fp, w->filename, table,
559                                bytes_to_check, progress_func);
560         FREE(table);
561         return ret;
562 }