350b3a202fa9dd2082fa340d70bf20bbd18d17d1
[wimlib] / src / integrity.c
1 /*
2  * integrity.c
3  *
4  * WIM files can optionally contain a table of SHA1 message digests at the end,
5  * one digest for each chunk of the file of some specified size (often 10 MB).
6  * This file implements the checking and writing of this table.
7  */
8
9 /*
10  * Copyright (C) 2012, 2013 Eric Biggers
11  *
12  * This file is part of wimlib, a library for working with WIM files.
13  *
14  * wimlib is free software; you can redistribute it and/or modify it under the
15  * terms of the GNU General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option)
17  * any later version.
18  *
19  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
20  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
21  * A PARTICULAR PURPOSE. See the GNU General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU General Public License
25  * along with wimlib; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #ifdef HAVE_CONFIG_H
29 #  include "config.h"
30 #endif
31
32 #include "wimlib/assert.h"
33 #include "wimlib/endianness.h"
34 #include "wimlib/error.h"
35 #include "wimlib/file_io.h"
36 #include "wimlib/integrity.h"
37 #include "wimlib/resource.h"
38 #include "wimlib/sha1.h"
39 #include "wimlib/wim.h"
40 #include "wimlib/write.h"
41
42 /* Size, in bytes, of each SHA1-summed chunk, when wimlib writes integrity
43  * information. */
44 #define INTEGRITY_CHUNK_SIZE 10485760
45
46 /* Only use a different chunk size for compatiblity with an existing integrity
47  * table if the chunk size is between these two numbers. */
48 #define INTEGRITY_MIN_CHUNK_SIZE 4096
49 #define INTEGRITY_MAX_CHUNK_SIZE 134217728
50
51 struct integrity_table {
52         u32 size;
53         u32 num_entries;
54         u32 chunk_size;
55         u8  sha1sums[][20];
56 } _packed_attribute;
57
58 static int
59 calculate_chunk_sha1(struct filedes *in_fd, size_t this_chunk_size,
60                      off_t offset, u8 sha1_md[])
61 {
62         u8 buf[BUFFER_SIZE];
63         SHA_CTX ctx;
64         size_t bytes_remaining;
65         size_t bytes_to_read;
66         int ret;
67
68         bytes_remaining = this_chunk_size;
69         sha1_init(&ctx);
70         do {
71                 bytes_to_read = min(bytes_remaining, sizeof(buf));
72                 ret = full_pread(in_fd, buf, bytes_to_read, offset);
73                 if (ret) {
74                         ERROR_WITH_ERRNO("Read error while calculating "
75                                          "integrity checksums");
76                         return ret;
77                 }
78                 sha1_update(&ctx, buf, bytes_to_read);
79                 bytes_remaining -= bytes_to_read;
80                 offset += bytes_to_read;
81         } while (bytes_remaining);
82         sha1_final(sha1_md, &ctx);
83         return 0;
84 }
85
86
87 /*
88  * read_integrity_table: -  Reads the integrity table from a WIM file.
89  *
90  * @wim:
91  *      WIMStruct for the WIM file; @wim->hdr.integrity_table_reshdr specifies
92  *      the location of the integrity table.  @wim->in_fd is expected to be a
93  *      seekable file descriptor to the WIM file opened for reading.
94  *
95  * @num_checked_bytes:
96  *      Number of bytes of data that should be checked by the integrity table.
97  *
98  * @table_ret:
99  *      On success, a pointer to an in-memory structure containing the integrity
100  *      information is written to this location.
101  *
102  * Return values:
103  *      WIMLIB_ERR_SUCCESS (0)
104  *      WIMLIB_ERR_INVALID_INTEGRITY_TABLE
105  *      WIMLIB_ERR_NOMEM
106  *      WIMLIB_ERR_READ
107  *      WIMLIB_ERR_UNEXPECTED_END_OF_FILE
108  */
109 static int
110 read_integrity_table(WIMStruct *wim, u64 num_checked_bytes,
111                      struct integrity_table **table_ret)
112 {
113         void *buf;
114         struct integrity_table *table;
115         int ret;
116
117         if (wim->hdr.integrity_table_reshdr.uncompressed_size < 8)
118                 goto invalid;
119
120         DEBUG("Reading integrity table.");
121
122         ret = wim_reshdr_to_data(&wim->hdr.integrity_table_reshdr, wim, &buf);
123         if (ret)
124                 return ret;
125         table = buf;
126
127         table->size        = le32_to_cpu(table->size);
128         table->num_entries = le32_to_cpu(table->num_entries);
129         table->chunk_size  = le32_to_cpu(table->chunk_size);
130
131         DEBUG("table->size = %u, table->num_entries = %u, "
132               "table->chunk_size = %u",
133               table->size, table->num_entries, table->chunk_size);
134
135         if (table->size != wim->hdr.integrity_table_reshdr.uncompressed_size ||
136             table->size != (u64)table->num_entries * SHA1_HASH_SIZE + 12 ||
137             table->chunk_size == 0 ||
138             table->num_entries != DIV_ROUND_UP(num_checked_bytes, table->chunk_size))
139         {
140                 FREE(table);
141                 goto invalid;
142         }
143
144         *table_ret = table;
145         return 0;
146
147 invalid:
148         ERROR("Integrity table is invalid");
149         return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
150 }
151
152 /*
153  * calculate_integrity_table():
154  *
155  * Calculates an integrity table for the data in a file beginning at offset 208
156  * (WIM_HEADER_DISK_SIZE).
157  *
158  * @in_fd:
159  *      File descriptor for the file to be checked, opened for reading.  Does
160  *      not need to be at any specific location in the file.
161  *
162  * @new_check_end:
163  *      Offset of byte after the last byte to be checked.
164  *
165  * @old_table:
166  *      If non-NULL, a pointer to the table containing the previously calculated
167  *      integrity data for a prefix of this file.
168  *
169  * @old_check_end:
170  *      If @old_table is non-NULL, the byte after the last byte that was checked
171  *      in the old table.  Must be less than or equal to new_check_end.
172  *
173  * @progress_func:
174  *      If non-NULL, a progress function that will be called after every
175  *      calculated chunk.
176  *
177  * @integrity_table_ret:
178  *      On success, a pointer to the calculated integrity table is written into
179  *      this location.
180  *
181  * Return values:
182  *      WIMLIB_ERR_SUCCESS (0)
183  *      WIMLIB_ERR_NOMEM
184  *      WIMLIB_ERR_READ
185  *      WIMLIB_ERR_UNEXPECTED_END_OF_FILE
186  */
187 static int
188 calculate_integrity_table(struct filedes *in_fd,
189                           off_t new_check_end,
190                           const struct integrity_table *old_table,
191                           off_t old_check_end,
192                           wimlib_progress_func_t progress_func,
193                           struct integrity_table **integrity_table_ret)
194 {
195         int ret;
196         size_t chunk_size = INTEGRITY_CHUNK_SIZE;
197
198         /* If an old table is provided, set the chunk size to be compatible with
199          * the old chunk size, unless the old chunk size was weird. */
200         if (old_table != NULL) {
201                 if (old_table->num_entries == 0 ||
202                     old_table->chunk_size < INTEGRITY_MIN_CHUNK_SIZE ||
203                     old_table->chunk_size > INTEGRITY_MAX_CHUNK_SIZE)
204                         old_table = NULL;
205                 else
206                         chunk_size = old_table->chunk_size;
207         }
208
209
210         u64 old_check_bytes = old_check_end - WIM_HEADER_DISK_SIZE;
211         u64 new_check_bytes = new_check_end - WIM_HEADER_DISK_SIZE;
212
213         u32 old_num_chunks = DIV_ROUND_UP(old_check_bytes, chunk_size);
214         u32 new_num_chunks = DIV_ROUND_UP(new_check_bytes, chunk_size);
215
216         size_t old_last_chunk_size = MODULO_NONZERO(old_check_bytes, chunk_size);
217         size_t new_last_chunk_size = MODULO_NONZERO(new_check_bytes, chunk_size);
218
219         size_t new_table_size = 12 + new_num_chunks * SHA1_HASH_SIZE;
220
221         struct integrity_table *new_table = MALLOC(new_table_size);
222         if (!new_table)
223                 return WIMLIB_ERR_NOMEM;
224         new_table->num_entries = new_num_chunks;
225         new_table->size = new_table_size;
226         new_table->chunk_size = chunk_size;
227
228         u64 offset = WIM_HEADER_DISK_SIZE;
229         union wimlib_progress_info progress;
230
231         if (progress_func) {
232                 progress.integrity.total_bytes      = new_check_bytes;
233                 progress.integrity.total_chunks     = new_num_chunks;
234                 progress.integrity.completed_chunks = 0;
235                 progress.integrity.completed_bytes  = 0;
236                 progress.integrity.chunk_size       = chunk_size;
237                 progress.integrity.filename         = NULL;
238                 progress_func(WIMLIB_PROGRESS_MSG_CALC_INTEGRITY,
239                               &progress);
240         }
241
242         for (u32 i = 0; i < new_num_chunks; i++) {
243                 size_t this_chunk_size;
244                 if (i == new_num_chunks - 1)
245                         this_chunk_size = new_last_chunk_size;
246                 else
247                         this_chunk_size = chunk_size;
248                 if (old_table &&
249                     ((this_chunk_size == chunk_size && i < old_num_chunks - 1) ||
250                       (i == old_num_chunks - 1 && this_chunk_size == old_last_chunk_size)))
251                 {
252                         /* Can use SHA1 message digest from old integrity table
253                          * */
254                         copy_hash(new_table->sha1sums[i], old_table->sha1sums[i]);
255                 } else {
256                         /* Calculate the SHA1 message digest of this chunk */
257                         ret = calculate_chunk_sha1(in_fd, this_chunk_size,
258                                                    offset, new_table->sha1sums[i]);
259                         if (ret) {
260                                 FREE(new_table);
261                                 return ret;
262                         }
263                 }
264                 offset += this_chunk_size;
265                 if (progress_func) {
266                         progress.integrity.completed_chunks++;
267                         progress.integrity.completed_bytes += this_chunk_size;
268                         progress_func(WIMLIB_PROGRESS_MSG_CALC_INTEGRITY,
269                                       &progress);
270                 }
271         }
272         *integrity_table_ret = new_table;
273         return 0;
274 }
275
276 /*
277  * write_integrity_table():
278  *
279  * Writes a WIM integrity table (a list of SHA1 message digests of raw 10 MiB
280  * chunks of the file).
281  *
282  * This function can optionally re-use entries from an older integrity table.
283  * To do this, ensure that @wim->hdr.integrity_table_reshdr is the resource
284  * header for the older table (note: this is an input-output parameter), and set
285  * @old_lookup_table_end to the offset of the byte directly following the last
286  * byte checked by the old table.  If the old integrity table is invalid or
287  * cannot be read, a warning is printed and the integrity information is
288  * re-calculated.
289  *
290  * @wim:
291  *      WIMStruct for the WIM file.  @wim->out_fd must be a seekable descriptor
292  *      to the new WIM file, opened read-write, positioned at the location at
293  *      which the integrity table is to be written.  Furthermore,
294  *      @wim->hdr.integrity is expected to be a resource entry which will be set
295  *      to the integrity table information on success.  In addition, if
296  *      @old_lookup_table_end != 0, @wim->hdr.integrity must initially contain
297  *      information about the old integrity table, and @wim->in_fd must be a
298  *      seekable descriptor to the original WIM file opened for reading.
299  *
300  * @new_lookup_table_end:
301  *      The offset of the byte directly following the lookup table in the WIM
302  *      being written.
303  *
304  * @old_lookup_table_end:
305  *      If nonzero, the offset of the byte directly following the old lookup
306  *      table in the WIM.
307  *
308  * @progress_func
309  *      If non-NULL, a progress function that will be called after every
310  *      calculated chunk.
311  *
312  * Return values:
313  *      WIMLIB_ERR_SUCCESS (0)
314  *      WIMLIB_ERR_NOMEM
315  *      WIMLIB_ERR_UNEXPECTED_END_OF_FILE
316  *      WIMLIB_ERR_WRITE
317  */
318 int
319 write_integrity_table(WIMStruct *wim,
320                       off_t new_lookup_table_end,
321                       off_t old_lookup_table_end,
322                       wimlib_progress_func_t progress_func)
323 {
324         struct integrity_table *old_table;
325         struct integrity_table *new_table;
326         int ret;
327         u32 new_table_size;
328
329         DEBUG("Writing integrity table "
330               "(new_lookup_table_end=%"PRIu64", old_lookup_table_end=%"PRIu64")",
331               new_lookup_table_end, old_lookup_table_end);
332
333         wimlib_assert(old_lookup_table_end <= new_lookup_table_end);
334
335         old_table = NULL;
336         if (wim_has_integrity_table(wim) && old_lookup_table_end != 0) {
337                 ret = read_integrity_table(wim,
338                                            old_lookup_table_end - WIM_HEADER_DISK_SIZE,
339                                            &old_table);
340                 if (ret == WIMLIB_ERR_INVALID_INTEGRITY_TABLE) {
341                         WARNING("Old integrity table is invalid! "
342                                 "Ignoring it");
343                 } else if (ret != 0) {
344                         WARNING("Can't read old integrity table! "
345                                 "Ignoring it");
346                 }
347         }
348
349         ret = calculate_integrity_table(&wim->out_fd, new_lookup_table_end,
350                                         old_table, old_lookup_table_end,
351                                         progress_func, &new_table);
352         if (ret)
353                 goto out_free_old_table;
354
355         new_table_size = new_table->size;
356
357         new_table->size        = cpu_to_le32(new_table->size);
358         new_table->num_entries = cpu_to_le32(new_table->num_entries);
359         new_table->chunk_size  = cpu_to_le32(new_table->chunk_size);
360
361         ret = write_wim_resource_from_buffer(new_table,
362                                              new_table_size,
363                                              0,
364                                              &wim->out_fd,
365                                              WIMLIB_COMPRESSION_TYPE_NONE,
366                                              0,
367                                              &wim->hdr.integrity_table_reshdr,
368                                              NULL,
369                                              0);
370         FREE(new_table);
371 out_free_old_table:
372         FREE(old_table);
373         DEBUG("ret=%d", ret);
374         return ret;
375 }
376
377 /*
378  * verify_integrity():
379  *
380  * Checks a WIM for consistency with the integrity table.
381  *
382  * @in_fd:
383  *      File descriptor to the WIM file, opened for reading.
384  *
385  * @table:
386  *      The integrity table for the WIM, read into memory.
387  *
388  * @bytes_to_check:
389  *      Number of bytes in the WIM that need to be checked (offset of end of the
390  *      lookup table minus offset of end of the header).
391  *
392  * @progress_func
393  *      If non-NULL, a progress function that will be called after every
394  *      verified chunk.
395  *
396  * Returns:
397  *      > 0 (WIMLIB_ERR_READ, WIMLIB_ERR_UNEXPECTED_END_OF_FILE) on error
398  *      0 (WIM_INTEGRITY_OK) if the integrity was checked successfully and there
399  *      were no inconsistencies.
400  *      -1 (WIM_INTEGRITY_NOT_OK) if the WIM failed the integrity check.
401  */
402 static int
403 verify_integrity(struct filedes *in_fd, const tchar *filename,
404                  const struct integrity_table *table,
405                  u64 bytes_to_check,
406                  wimlib_progress_func_t progress_func)
407 {
408         int ret;
409         u64 offset = WIM_HEADER_DISK_SIZE;
410         u8 sha1_md[SHA1_HASH_SIZE];
411         union wimlib_progress_info progress;
412
413         if (progress_func) {
414                 progress.integrity.total_bytes      = bytes_to_check;
415                 progress.integrity.total_chunks     = table->num_entries;
416                 progress.integrity.completed_chunks = 0;
417                 progress.integrity.completed_bytes  = 0;
418                 progress.integrity.chunk_size       = table->chunk_size;
419                 progress.integrity.filename         = filename;
420                 progress_func(WIMLIB_PROGRESS_MSG_VERIFY_INTEGRITY,
421                               &progress);
422         }
423         for (u32 i = 0; i < table->num_entries; i++) {
424                 size_t this_chunk_size;
425                 if (i == table->num_entries - 1)
426                         this_chunk_size = MODULO_NONZERO(bytes_to_check,
427                                                          table->chunk_size);
428                 else
429                         this_chunk_size = table->chunk_size;
430
431                 ret = calculate_chunk_sha1(in_fd, this_chunk_size, offset, sha1_md);
432                 if (ret)
433                         return ret;
434
435                 if (!hashes_equal(sha1_md, table->sha1sums[i]))
436                         return WIM_INTEGRITY_NOT_OK;
437
438                 offset += this_chunk_size;
439                 if (progress_func) {
440                         progress.integrity.completed_chunks++;
441                         progress.integrity.completed_bytes += this_chunk_size;
442                         progress_func(WIMLIB_PROGRESS_MSG_VERIFY_INTEGRITY,
443                                       &progress);
444                 }
445         }
446         return WIM_INTEGRITY_OK;
447 }
448
449
450 /*
451  * check_wim_integrity():
452  *
453  * Verifies the integrity of the WIM by making sure the SHA1 message digests of
454  * ~10 MiB chunks of the WIM match up with the values given in the integrity
455  * table.
456  *
457  * @wim:
458  *      The WIM, opened for reading.
459  *
460  * @progress_func
461  *      If non-NULL, a progress function that will be called after every
462  *      verified chunk.
463  *
464  * Returns:
465  *      > 0 (WIMLIB_ERR_INVALID_INTEGRITY_TABLE, WIMLIB_ERR_READ,
466  *           WIMLIB_ERR_UNEXPECTED_END_OF_FILE) on error
467  *      0 (WIM_INTEGRITY_OK) if the integrity was checked successfully and there
468  *      were no inconsistencies.
469  *      -1 (WIM_INTEGRITY_NOT_OK) if the WIM failed the integrity check.
470  *      -2 (WIM_INTEGRITY_NONEXISTENT) if the WIM contains no integrity
471  *      information.
472  */
473 int
474 check_wim_integrity(WIMStruct *wim, wimlib_progress_func_t progress_func)
475 {
476         int ret;
477         u64 bytes_to_check;
478         struct integrity_table *table;
479         u64 end_lookup_table_offset;
480
481         if (!wim_has_integrity_table(wim)) {
482                 DEBUG("No integrity information.");
483                 return WIM_INTEGRITY_NONEXISTENT;
484         }
485
486         end_lookup_table_offset = wim->hdr.lookup_table_reshdr.offset_in_wim +
487                                   wim->hdr.lookup_table_reshdr.size_in_wim;
488
489         if (end_lookup_table_offset < WIM_HEADER_DISK_SIZE) {
490                 ERROR("WIM lookup table ends before WIM header ends!");
491                 return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
492         }
493
494         bytes_to_check = end_lookup_table_offset - WIM_HEADER_DISK_SIZE;
495
496         ret = read_integrity_table(wim, bytes_to_check, &table);
497         if (ret)
498                 return ret;
499         ret = verify_integrity(&wim->in_fd, wim->filename, table,
500                                bytes_to_check, progress_func);
501         FREE(table);
502         return ret;
503 }