5cbe021e6f53ee0a4dc397abf51db33e68cb7d90
[wimlib] / src / integrity.c
1 /*
2  * integrity.c
3  *
4  * WIM files can optionally contain a table of SHA1 message digests at the end,
5  * one digest for each chunk of the file of some specified size (often 10 MB).
6  * This file implements the checking and writing of this table.
7  */
8
9 /*
10  * Copyright (C) 2012, 2013 Eric Biggers
11  *
12  * This file is part of wimlib, a library for working with WIM files.
13  *
14  * wimlib is free software; you can redistribute it and/or modify it under the
15  * terms of the GNU General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option)
17  * any later version.
18  *
19  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
20  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
21  * A PARTICULAR PURPOSE. See the GNU General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU General Public License
25  * along with wimlib; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #ifdef HAVE_CONFIG_H
29 #  include "config.h"
30 #endif
31
32 #include "wimlib/assert.h"
33 #include "wimlib/endianness.h"
34 #include "wimlib/error.h"
35 #include "wimlib/file_io.h"
36 #include "wimlib/integrity.h"
37 #include "wimlib/resource.h"
38 #include "wimlib/sha1.h"
39 #include "wimlib/wim.h"
40
41 /* Size, in bytes, of each SHA1-summed chunk, when wimlib writes integrity
42  * information. */
43 #define INTEGRITY_CHUNK_SIZE 10485760
44
45 /* Only use a different chunk size for compatiblity with an existing integrity
46  * table if the chunk size is between these two numbers. */
47 #define INTEGRITY_MIN_CHUNK_SIZE 4096
48 #define INTEGRITY_MAX_CHUNK_SIZE 134217728
49
50 struct integrity_table {
51         u32 size;
52         u32 num_entries;
53         u32 chunk_size;
54         u8  sha1sums[][20];
55 } _packed_attribute;
56
57 static int
58 calculate_chunk_sha1(struct filedes *in_fd, size_t this_chunk_size,
59                      off_t offset, u8 sha1_md[])
60 {
61         u8 buf[BUFFER_SIZE];
62         SHA_CTX ctx;
63         size_t bytes_remaining;
64         size_t bytes_to_read;
65         int ret;
66
67         bytes_remaining = this_chunk_size;
68         sha1_init(&ctx);
69         do {
70                 bytes_to_read = min(bytes_remaining, sizeof(buf));
71                 ret = full_pread(in_fd, buf, bytes_to_read, offset);
72                 if (ret) {
73                         ERROR_WITH_ERRNO("Read error while calculating "
74                                          "integrity checksums");
75                         return ret;
76                 }
77                 sha1_update(&ctx, buf, bytes_to_read);
78                 bytes_remaining -= bytes_to_read;
79                 offset += bytes_to_read;
80         } while (bytes_remaining);
81         sha1_final(sha1_md, &ctx);
82         return 0;
83 }
84
85
86 /*
87  * read_integrity_table: -  Reads the integrity table from a WIM file.
88  *
89  * @wim:
90  *      WIMStruct for the WIM file; @wim->hdr.integrity specifies the location
91  *      of the integrity table.  The integrity table must exist (i.e.
92  *      res_entry->offset must not be 0).  @wim->in_fd is expected to be a
93  *      seekable file descriptor to the WIM file opened for reading.
94  *
95  * @num_checked_bytes:
96  *      Number of bytes of data that should be checked by the integrity table.
97  *
98  * @table_ret:
99  *      On success, a pointer to an in-memory structure containing the integrity
100  *      information is written to this location.
101  *
102  * Return values:
103  *      WIMLIB_ERR_SUCCESS (0)
104  *      WIMLIB_ERR_INVALID_INTEGRITY_TABLE
105  *      WIMLIB_ERR_NOMEM
106  *      WIMLIB_ERR_READ
107  *      WIMLIB_ERR_UNEXPECTED_END_OF_FILE
108  */
109 static int
110 read_integrity_table(WIMStruct *wim, u64 num_checked_bytes,
111                      struct integrity_table **table_ret)
112 {
113         void *buf;
114         struct integrity_table *table;
115         int ret;
116
117         if (wim->hdr.integrity.size < 8)
118                 goto invalid;
119
120         DEBUG("Reading integrity table (offset %"PRIu64", "
121               "original_size %"PRIu64")",
122               wim->hdr.integrity.offset, wim->hdr.integrity.original_size);
123
124         ret = res_entry_to_data(&wim->hdr.integrity, wim, &buf);
125         if (ret)
126                 return ret;
127         table = buf;
128
129         table->size        = le32_to_cpu(table->size);
130         table->num_entries = le32_to_cpu(table->num_entries);
131         table->chunk_size  = le32_to_cpu(table->chunk_size);
132
133         DEBUG("table->size = %u, table->num_entries = %u, "
134               "table->chunk_size = %u",
135               table->size, table->num_entries, table->chunk_size);
136
137         if (table->size != wim->hdr.integrity.original_size ||
138             table->size != (u64)table->num_entries * SHA1_HASH_SIZE + 12 ||
139             table->chunk_size == 0 ||
140             table->num_entries != DIV_ROUND_UP(num_checked_bytes, table->chunk_size))
141         {
142                 FREE(table);
143                 goto invalid;
144         }
145
146         *table_ret = table;
147         return 0;
148
149 invalid:
150         ERROR("Integrity table is invalid");
151         return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
152 }
153
154 /*
155  * calculate_integrity_table():
156  *
157  * Calculates an integrity table for the data in a file beginning at offset 208
158  * (WIM_HEADER_DISK_SIZE).
159  *
160  * @in_fd:
161  *      File descriptor for the file to be checked, opened for reading.  Does
162  *      not need to be at any specific location in the file.
163  *
164  * @new_check_end:
165  *      Offset of byte after the last byte to be checked.
166  *
167  * @old_table:
168  *      If non-NULL, a pointer to the table containing the previously calculated
169  *      integrity data for a prefix of this file.
170  *
171  * @old_check_end:
172  *      If @old_table is non-NULL, the byte after the last byte that was checked
173  *      in the old table.  Must be less than or equal to new_check_end.
174  *
175  * @progress_func:
176  *      If non-NULL, a progress function that will be called after every
177  *      calculated chunk.
178  *
179  * @integrity_table_ret:
180  *      On success, a pointer to the calculated integrity table is written into
181  *      this location.
182  *
183  * Return values:
184  *      WIMLIB_ERR_SUCCESS (0)
185  *      WIMLIB_ERR_NOMEM
186  *      WIMLIB_ERR_READ
187  *      WIMLIB_ERR_UNEXPECTED_END_OF_FILE
188  */
189 static int
190 calculate_integrity_table(struct filedes *in_fd,
191                           off_t new_check_end,
192                           const struct integrity_table *old_table,
193                           off_t old_check_end,
194                           wimlib_progress_func_t progress_func,
195                           struct integrity_table **integrity_table_ret)
196 {
197         int ret;
198         size_t chunk_size = INTEGRITY_CHUNK_SIZE;
199
200         /* If an old table is provided, set the chunk size to be compatible with
201          * the old chunk size, unless the old chunk size was weird. */
202         if (old_table != NULL) {
203                 if (old_table->num_entries == 0 ||
204                     old_table->chunk_size < INTEGRITY_MIN_CHUNK_SIZE ||
205                     old_table->chunk_size > INTEGRITY_MAX_CHUNK_SIZE)
206                         old_table = NULL;
207                 else
208                         chunk_size = old_table->chunk_size;
209         }
210
211
212         u64 old_check_bytes = old_check_end - WIM_HEADER_DISK_SIZE;
213         u64 new_check_bytes = new_check_end - WIM_HEADER_DISK_SIZE;
214
215         u32 old_num_chunks = DIV_ROUND_UP(old_check_bytes, chunk_size);
216         u32 new_num_chunks = DIV_ROUND_UP(new_check_bytes, chunk_size);
217
218         size_t old_last_chunk_size = MODULO_NONZERO(old_check_bytes, chunk_size);
219         size_t new_last_chunk_size = MODULO_NONZERO(new_check_bytes, chunk_size);
220
221         size_t new_table_size = 12 + new_num_chunks * SHA1_HASH_SIZE;
222
223         struct integrity_table *new_table = MALLOC(new_table_size);
224         if (!new_table)
225                 return WIMLIB_ERR_NOMEM;
226         new_table->num_entries = new_num_chunks;
227         new_table->size = new_table_size;
228         new_table->chunk_size = chunk_size;
229
230         u64 offset = WIM_HEADER_DISK_SIZE;
231         union wimlib_progress_info progress;
232
233         if (progress_func) {
234                 progress.integrity.total_bytes      = new_check_bytes;
235                 progress.integrity.total_chunks     = new_num_chunks;
236                 progress.integrity.completed_chunks = 0;
237                 progress.integrity.completed_bytes  = 0;
238                 progress.integrity.chunk_size       = chunk_size;
239                 progress.integrity.filename         = NULL;
240                 progress_func(WIMLIB_PROGRESS_MSG_CALC_INTEGRITY,
241                               &progress);
242         }
243
244         for (u32 i = 0; i < new_num_chunks; i++) {
245                 size_t this_chunk_size;
246                 if (i == new_num_chunks - 1)
247                         this_chunk_size = new_last_chunk_size;
248                 else
249                         this_chunk_size = chunk_size;
250                 if (old_table &&
251                     ((this_chunk_size == chunk_size && i < old_num_chunks - 1) ||
252                       (i == old_num_chunks - 1 && this_chunk_size == old_last_chunk_size)))
253                 {
254                         /* Can use SHA1 message digest from old integrity table
255                          * */
256                         copy_hash(new_table->sha1sums[i], old_table->sha1sums[i]);
257                 } else {
258                         /* Calculate the SHA1 message digest of this chunk */
259                         ret = calculate_chunk_sha1(in_fd, this_chunk_size,
260                                                    offset, new_table->sha1sums[i]);
261                         if (ret) {
262                                 FREE(new_table);
263                                 return ret;
264                         }
265                 }
266                 offset += this_chunk_size;
267                 if (progress_func) {
268                         progress.integrity.completed_chunks++;
269                         progress.integrity.completed_bytes += this_chunk_size;
270                         progress_func(WIMLIB_PROGRESS_MSG_CALC_INTEGRITY,
271                                       &progress);
272                 }
273         }
274         *integrity_table_ret = new_table;
275         return 0;
276 }
277
278 /*
279  * write_integrity_table():
280  *
281  * Writes a WIM integrity table (a list of SHA1 message digests of raw 10 MiB
282  * chunks of the file).
283  *
284  * This function can optionally re-use entries from an older integrity table.
285  * To do this, make @integrity_res_entry point to the resource entry for the
286  * older table (note: this is an input-output parameter), and set
287  * @old_lookup_table_end to the offset of the byte directly following the last
288  * byte checked by the old table.  If the old integrity table is invalid or
289  * cannot be read, a warning is printed and the integrity information is
290  * re-calculated.
291  *
292  * @wim:
293  *      WIMStruct for the WIM file.  @wim->out_fd must be a seekable descriptor
294  *      to the new WIM file, opened read-write, positioned at the location at
295  *      which the integrity table is to be written.  Furthermore,
296  *      @wim->hdr.integrity is expected to be a resource entry which will be set
297  *      to the integrity table information on success.  In addition, if
298  *      @old_lookup_table_end != 0, @wim->hdr.integrity must initially contain
299  *      information about the old integrity table, and @wim->in_fd must be a
300  *      seekable descriptor to the original WIM file opened for reading.
301  *
302  * @new_lookup_table_end:
303  *      The offset of the byte directly following the lookup table in the WIM
304  *      being written.
305  *
306  * @old_lookup_table_end:
307  *      If nonzero, the offset of the byte directly following the old lookup
308  *      table in the WIM.
309  *
310  * @progress_func
311  *      If non-NULL, a progress function that will be called after every
312  *      calculated chunk.
313  *
314  * Return values:
315  *      WIMLIB_ERR_SUCCESS (0)
316  *      WIMLIB_ERR_INVALID_INTEGRITY_TABLE
317  *      WIMLIB_ERR_NOMEM
318  *      WIMLIB_ERR_READ
319  *      WIMLIB_ERR_UNEXPECTED_END_OF_FILE
320  *      WIMLIB_ERR_WRITE
321  */
322 int
323 write_integrity_table(WIMStruct *wim,
324                       off_t new_lookup_table_end,
325                       off_t old_lookup_table_end,
326                       wimlib_progress_func_t progress_func)
327 {
328         struct integrity_table *old_table;
329         struct integrity_table *new_table;
330         int ret;
331         u32 new_table_size;
332
333         wimlib_assert(old_lookup_table_end <= new_lookup_table_end);
334
335         old_table = NULL;
336         if (wim_has_integrity_table(wim) && old_lookup_table_end != 0) {
337                 ret = read_integrity_table(wim,
338                                            old_lookup_table_end - WIM_HEADER_DISK_SIZE,
339                                            &old_table);
340                 if (ret == WIMLIB_ERR_INVALID_INTEGRITY_TABLE) {
341                         WARNING("Old integrity table is invalid! "
342                                 "Ignoring it");
343                 } else if (ret != 0) {
344                         WARNING("Can't read old integrity table! "
345                                 "Ignoring it");
346                 }
347         }
348
349         ret = calculate_integrity_table(&wim->out_fd, new_lookup_table_end,
350                                         old_table, old_lookup_table_end,
351                                         progress_func, &new_table);
352         if (ret)
353                 goto out_free_old_table;
354
355         new_table_size = new_table->size;
356
357         new_table->size        = cpu_to_le32(new_table->size);
358         new_table->num_entries = cpu_to_le32(new_table->num_entries);
359         new_table->chunk_size  = cpu_to_le32(new_table->chunk_size);
360
361         ret = write_wim_resource_from_buffer(new_table,
362                                              new_table_size,
363                                              0,
364                                              &wim->out_fd,
365                                              WIMLIB_COMPRESSION_TYPE_NONE,
366                                              &wim->hdr.integrity,
367                                              NULL,
368                                              0);
369         FREE(new_table);
370 out_free_old_table:
371         FREE(old_table);
372         return ret;
373 }
374
375 /*
376  * verify_integrity():
377  *
378  * Checks a WIM for consistency with the integrity table.
379  *
380  * @in_fd:
381  *      File descriptor to the WIM file, opened for reading.
382  *
383  * @table:
384  *      The integrity table for the WIM, read into memory.
385  *
386  * @bytes_to_check:
387  *      Number of bytes in the WIM that need to be checked (offset of end of the
388  *      lookup table minus offset of end of the header).
389  *
390  * @progress_func
391  *      If non-NULL, a progress function that will be called after every
392  *      verified chunk.
393  *
394  * Returns:
395  *      > 0 (WIMLIB_ERR_READ, WIMLIB_ERR_UNEXPECTED_END_OF_FILE) on error
396  *      0 (WIM_INTEGRITY_OK) if the integrity was checked successfully and there
397  *      were no inconsistencies.
398  *      -1 (WIM_INTEGRITY_NOT_OK) if the WIM failed the integrity check.
399  */
400 static int
401 verify_integrity(struct filedes *in_fd, const tchar *filename,
402                  const struct integrity_table *table,
403                  u64 bytes_to_check,
404                  wimlib_progress_func_t progress_func)
405 {
406         int ret;
407         u64 offset = WIM_HEADER_DISK_SIZE;
408         u8 sha1_md[SHA1_HASH_SIZE];
409         union wimlib_progress_info progress;
410
411         if (progress_func) {
412                 progress.integrity.total_bytes      = bytes_to_check;
413                 progress.integrity.total_chunks     = table->num_entries;
414                 progress.integrity.completed_chunks = 0;
415                 progress.integrity.completed_bytes  = 0;
416                 progress.integrity.chunk_size       = table->chunk_size;
417                 progress.integrity.filename         = filename;
418                 progress_func(WIMLIB_PROGRESS_MSG_VERIFY_INTEGRITY,
419                               &progress);
420         }
421         for (u32 i = 0; i < table->num_entries; i++) {
422                 size_t this_chunk_size;
423                 if (i == table->num_entries - 1)
424                         this_chunk_size = MODULO_NONZERO(bytes_to_check,
425                                                          table->chunk_size);
426                 else
427                         this_chunk_size = table->chunk_size;
428
429                 ret = calculate_chunk_sha1(in_fd, this_chunk_size, offset, sha1_md);
430                 if (ret)
431                         return ret;
432
433                 if (!hashes_equal(sha1_md, table->sha1sums[i]))
434                         return WIM_INTEGRITY_NOT_OK;
435
436                 offset += this_chunk_size;
437                 if (progress_func) {
438                         progress.integrity.completed_chunks++;
439                         progress.integrity.completed_bytes += this_chunk_size;
440                         progress_func(WIMLIB_PROGRESS_MSG_VERIFY_INTEGRITY,
441                                       &progress);
442                 }
443         }
444         return WIM_INTEGRITY_OK;
445 }
446
447
448 /*
449  * check_wim_integrity():
450  *
451  * Verifies the integrity of the WIM by making sure the SHA1 message digests of
452  * ~10 MiB chunks of the WIM match up with the values given in the integrity
453  * table.
454  *
455  * @wim:
456  *      The WIM, opened for reading.
457  *
458  * @progress_func
459  *      If non-NULL, a progress function that will be called after every
460  *      verified chunk.
461  *
462  * Returns:
463  *      > 0 (WIMLIB_ERR_INVALID_INTEGRITY_TABLE, WIMLIB_ERR_READ,
464  *           WIMLIB_ERR_UNEXPECTED_END_OF_FILE) on error
465  *      0 (WIM_INTEGRITY_OK) if the integrity was checked successfully and there
466  *      were no inconsistencies.
467  *      -1 (WIM_INTEGRITY_NOT_OK) if the WIM failed the integrity check.
468  *      -2 (WIM_INTEGRITY_NONEXISTENT) if the WIM contains no integrity
469  *      information.
470  */
471 int
472 check_wim_integrity(WIMStruct *wim, wimlib_progress_func_t progress_func)
473 {
474         int ret;
475         u64 bytes_to_check;
476         struct integrity_table *table;
477         u64 end_lookup_table_offset;
478
479         if (!wim_has_integrity_table(wim)) {
480                 DEBUG("No integrity information.");
481                 return WIM_INTEGRITY_NONEXISTENT;
482         }
483
484         end_lookup_table_offset = wim->hdr.lookup_table_res_entry.offset +
485                                   wim->hdr.lookup_table_res_entry.size;
486
487         if (end_lookup_table_offset < WIM_HEADER_DISK_SIZE) {
488                 ERROR("WIM lookup table ends before WIM header ends!");
489                 return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
490         }
491
492         bytes_to_check = end_lookup_table_offset - WIM_HEADER_DISK_SIZE;
493
494         ret = read_integrity_table(wim, bytes_to_check, &table);
495         if (ret)
496                 return ret;
497         ret = verify_integrity(&wim->in_fd, wim->filename, table,
498                                bytes_to_check, progress_func);
499         FREE(table);
500         return ret;
501 }