]> wimlib.net Git - wimlib/blob - src/integrity.c
30817cda74f3c4a57338a3fdfe92dd6c7d525452
[wimlib] / src / integrity.c
1 /*
2  * integrity.c
3  *
4  * WIM files can optionally contain a table of SHA1 message digests at the end,
5  * one digest for each chunk of the file of some specified size (often 10 MB).
6  * This file implements the checking and writing of this table.
7  */
8
9 /*
10  * Copyright (C) 2012, 2013 Eric Biggers
11  *
12  * This file is part of wimlib, a library for working with WIM files.
13  *
14  * wimlib is free software; you can redistribute it and/or modify it under the
15  * terms of the GNU General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option)
17  * any later version.
18  *
19  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
20  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
21  * A PARTICULAR PURPOSE. See the GNU General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU General Public License
25  * along with wimlib; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #ifdef HAVE_CONFIG_H
29 #  include "config.h"
30 #endif
31
32 #include "wimlib/assert.h"
33 #include "wimlib/endianness.h"
34 #include "wimlib/error.h"
35 #include "wimlib/file_io.h"
36 #include "wimlib/integrity.h"
37 #include "wimlib/progress.h"
38 #include "wimlib/resource.h"
39 #include "wimlib/sha1.h"
40 #include "wimlib/wim.h"
41 #include "wimlib/write.h"
42
43 /* Size, in bytes, of each SHA1-summed chunk, when wimlib writes integrity
44  * information. */
45 #define INTEGRITY_CHUNK_SIZE 10485760
46
47 /* Only use a different chunk size for compatiblity with an existing integrity
48  * table if the chunk size is between these two numbers. */
49 #define INTEGRITY_MIN_CHUNK_SIZE 4096
50 #define INTEGRITY_MAX_CHUNK_SIZE 134217728
51
52 struct integrity_table {
53         u32 size;
54         u32 num_entries;
55         u32 chunk_size;
56         u8  sha1sums[][20];
57 } _packed_attribute;
58
59 static int
60 calculate_chunk_sha1(struct filedes *in_fd, size_t this_chunk_size,
61                      off_t offset, u8 sha1_md[])
62 {
63         u8 buf[BUFFER_SIZE];
64         SHA_CTX ctx;
65         size_t bytes_remaining;
66         size_t bytes_to_read;
67         int ret;
68
69         bytes_remaining = this_chunk_size;
70         sha1_init(&ctx);
71         do {
72                 bytes_to_read = min(bytes_remaining, sizeof(buf));
73                 ret = full_pread(in_fd, buf, bytes_to_read, offset);
74                 if (ret) {
75                         ERROR_WITH_ERRNO("Read error while calculating "
76                                          "integrity checksums");
77                         return ret;
78                 }
79                 sha1_update(&ctx, buf, bytes_to_read);
80                 bytes_remaining -= bytes_to_read;
81                 offset += bytes_to_read;
82         } while (bytes_remaining);
83         sha1_final(sha1_md, &ctx);
84         return 0;
85 }
86
87
88 /*
89  * read_integrity_table: -  Reads the integrity table from a WIM file.
90  *
91  * @wim:
92  *      WIMStruct for the WIM file; @wim->hdr.integrity_table_reshdr specifies
93  *      the location of the integrity table.  @wim->in_fd is expected to be a
94  *      seekable file descriptor to the WIM file opened for reading.
95  *
96  * @num_checked_bytes:
97  *      Number of bytes of data that should be checked by the integrity table.
98  *
99  * @table_ret:
100  *      On success, a pointer to an in-memory structure containing the integrity
101  *      information is written to this location.
102  *
103  * Return values:
104  *      WIMLIB_ERR_SUCCESS (0)
105  *      WIMLIB_ERR_INVALID_INTEGRITY_TABLE
106  *      WIMLIB_ERR_NOMEM
107  *      WIMLIB_ERR_READ
108  *      WIMLIB_ERR_UNEXPECTED_END_OF_FILE
109  */
110 int
111 read_integrity_table(WIMStruct *wim, u64 num_checked_bytes,
112                      struct integrity_table **table_ret)
113 {
114         void *buf;
115         struct integrity_table *table;
116         int ret;
117
118         if (wim->hdr.integrity_table_reshdr.uncompressed_size < 8)
119                 goto invalid;
120
121         DEBUG("Reading integrity table.");
122
123         ret = wim_reshdr_to_data(&wim->hdr.integrity_table_reshdr, wim, &buf);
124         if (ret)
125                 return ret;
126         table = buf;
127
128         table->size        = le32_to_cpu(table->size);
129         table->num_entries = le32_to_cpu(table->num_entries);
130         table->chunk_size  = le32_to_cpu(table->chunk_size);
131
132         DEBUG("table->size = %u, table->num_entries = %u, "
133               "table->chunk_size = %u",
134               table->size, table->num_entries, table->chunk_size);
135
136         if (table->size != wim->hdr.integrity_table_reshdr.uncompressed_size ||
137             table->size != (u64)table->num_entries * SHA1_HASH_SIZE + 12 ||
138             table->chunk_size == 0 ||
139             table->num_entries != DIV_ROUND_UP(num_checked_bytes, table->chunk_size))
140         {
141                 FREE(table);
142                 goto invalid;
143         }
144
145         *table_ret = table;
146         return 0;
147
148 invalid:
149         return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
150 }
151
152 /*
153  * calculate_integrity_table():
154  *
155  * Calculates an integrity table for the data in a file beginning at offset 208
156  * (WIM_HEADER_DISK_SIZE).
157  *
158  * @in_fd:
159  *      File descriptor for the file to be checked, opened for reading.  Does
160  *      not need to be at any specific location in the file.
161  *
162  * @new_check_end:
163  *      Offset of byte after the last byte to be checked.
164  *
165  * @old_table:
166  *      If non-NULL, a pointer to the table containing the previously calculated
167  *      integrity data for a prefix of this file.
168  *
169  * @old_check_end:
170  *      If @old_table is non-NULL, the byte after the last byte that was checked
171  *      in the old table.  Must be less than or equal to new_check_end.
172  *
173  * @integrity_table_ret:
174  *      On success, a pointer to the calculated integrity table is written into
175  *      this location.
176  *
177  * Return values:
178  *      WIMLIB_ERR_SUCCESS (0)
179  *      WIMLIB_ERR_NOMEM
180  *      WIMLIB_ERR_READ
181  *      WIMLIB_ERR_UNEXPECTED_END_OF_FILE
182  */
183 static int
184 calculate_integrity_table(struct filedes *in_fd,
185                           off_t new_check_end,
186                           const struct integrity_table *old_table,
187                           off_t old_check_end,
188                           struct integrity_table **integrity_table_ret,
189                           wimlib_progress_func_t progfunc,
190                           void *progctx)
191 {
192         int ret;
193         size_t chunk_size = INTEGRITY_CHUNK_SIZE;
194
195         /* If an old table is provided, set the chunk size to be compatible with
196          * the old chunk size, unless the old chunk size was weird. */
197         if (old_table != NULL) {
198                 if (old_table->num_entries == 0 ||
199                     old_table->chunk_size < INTEGRITY_MIN_CHUNK_SIZE ||
200                     old_table->chunk_size > INTEGRITY_MAX_CHUNK_SIZE)
201                         old_table = NULL;
202                 else
203                         chunk_size = old_table->chunk_size;
204         }
205
206
207         u64 old_check_bytes = old_check_end - WIM_HEADER_DISK_SIZE;
208         u64 new_check_bytes = new_check_end - WIM_HEADER_DISK_SIZE;
209
210         u32 old_num_chunks = DIV_ROUND_UP(old_check_bytes, chunk_size);
211         u32 new_num_chunks = DIV_ROUND_UP(new_check_bytes, chunk_size);
212
213         size_t old_last_chunk_size = MODULO_NONZERO(old_check_bytes, chunk_size);
214         size_t new_last_chunk_size = MODULO_NONZERO(new_check_bytes, chunk_size);
215
216         size_t new_table_size = 12 + new_num_chunks * SHA1_HASH_SIZE;
217
218         struct integrity_table *new_table = MALLOC(new_table_size);
219         if (!new_table)
220                 return WIMLIB_ERR_NOMEM;
221         new_table->num_entries = new_num_chunks;
222         new_table->size = new_table_size;
223         new_table->chunk_size = chunk_size;
224
225         u64 offset = WIM_HEADER_DISK_SIZE;
226         union wimlib_progress_info progress;
227
228         progress.integrity.total_bytes      = new_check_bytes;
229         progress.integrity.total_chunks     = new_num_chunks;
230         progress.integrity.completed_chunks = 0;
231         progress.integrity.completed_bytes  = 0;
232         progress.integrity.chunk_size       = chunk_size;
233         progress.integrity.filename         = NULL;
234
235         ret = call_progress(progfunc, WIMLIB_PROGRESS_MSG_CALC_INTEGRITY,
236                             &progress, progctx);
237         if (ret)
238                 goto out_free_new_table;
239
240         for (u32 i = 0; i < new_num_chunks; i++) {
241                 size_t this_chunk_size;
242                 if (i == new_num_chunks - 1)
243                         this_chunk_size = new_last_chunk_size;
244                 else
245                         this_chunk_size = chunk_size;
246                 if (old_table &&
247                     ((this_chunk_size == chunk_size && i < old_num_chunks - 1) ||
248                       (i == old_num_chunks - 1 && this_chunk_size == old_last_chunk_size)))
249                 {
250                         /* Can use SHA1 message digest from old integrity table
251                          * */
252                         copy_hash(new_table->sha1sums[i], old_table->sha1sums[i]);
253                 } else {
254                         /* Calculate the SHA1 message digest of this chunk */
255                         ret = calculate_chunk_sha1(in_fd, this_chunk_size,
256                                                    offset, new_table->sha1sums[i]);
257                         if (ret)
258                                 goto out_free_new_table;
259                 }
260                 offset += this_chunk_size;
261
262                 progress.integrity.completed_chunks++;
263                 progress.integrity.completed_bytes += this_chunk_size;
264                 ret = call_progress(progfunc, WIMLIB_PROGRESS_MSG_CALC_INTEGRITY,
265                                     &progress, progctx);
266                 if (ret)
267                         goto out_free_new_table;
268         }
269         *integrity_table_ret = new_table;
270         return 0;
271
272 out_free_new_table:
273         FREE(new_table);
274         return ret;
275 }
276
277 /*
278  * write_integrity_table():
279  *
280  * Writes a WIM integrity table (a list of SHA1 message digests of raw 10 MiB
281  * chunks of the file).
282  *
283  * This function can optionally re-use entries from an older integrity table.
284  * To do this, specify old_lookup_table_end and old_table.
285  *
286  * @wim:
287  *      WIMStruct for the WIM file.  @wim->out_fd must be a seekable descriptor
288  *      to the new WIM file, opened read-write, positioned at the location at
289  *      which the integrity table is to be written.  Furthermore,
290  *      @wim->hdr.integrity is expected to be a resource entry which will be set
291  *      to the integrity table information on success.  In addition, if
292  *      @old_lookup_table_end != 0, @wim->hdr.integrity must initially contain
293  *      information about the old integrity table, and @wim->in_fd must be a
294  *      seekable descriptor to the original WIM file opened for reading.
295  *
296  * @new_lookup_table_end:
297  *      The offset of the byte directly following the lookup table in the WIM
298  *      being written.
299  *
300  * @old_lookup_table_end:
301  *      If nonzero, the offset of the byte directly following the old lookup
302  *      table in the WIM.
303  *
304  * @old_table
305  *      Pointer to the old integrity table read into memory, or NULL if not
306  *      specified.
307  */
308 int
309 write_integrity_table(WIMStruct *wim,
310                       off_t new_lookup_table_end,
311                       off_t old_lookup_table_end,
312                       struct integrity_table *old_table)
313 {
314         struct integrity_table *new_table;
315         int ret;
316         u32 new_table_size;
317
318         DEBUG("Writing integrity table "
319               "(new_lookup_table_end=%"PRIu64", old_lookup_table_end=%"PRIu64")",
320               new_lookup_table_end, old_lookup_table_end);
321
322         wimlib_assert(old_lookup_table_end <= new_lookup_table_end);
323
324         ret = calculate_integrity_table(&wim->out_fd, new_lookup_table_end,
325                                         old_table, old_lookup_table_end,
326                                         &new_table, wim->progfunc, wim->progctx);
327         if (ret)
328                 return ret;
329
330         new_table_size = new_table->size;
331
332         new_table->size        = cpu_to_le32(new_table->size);
333         new_table->num_entries = cpu_to_le32(new_table->num_entries);
334         new_table->chunk_size  = cpu_to_le32(new_table->chunk_size);
335
336         ret = write_wim_resource_from_buffer(new_table,
337                                              new_table_size,
338                                              0,
339                                              &wim->out_fd,
340                                              WIMLIB_COMPRESSION_TYPE_NONE,
341                                              0,
342                                              &wim->hdr.integrity_table_reshdr,
343                                              NULL,
344                                              0);
345         FREE(new_table);
346         DEBUG("ret=%d", ret);
347         return ret;
348 }
349
350 /*
351  * verify_integrity():
352  *
353  * Checks a WIM for consistency with the integrity table.
354  *
355  * @in_fd:
356  *      File descriptor to the WIM file, opened for reading.
357  *
358  * @table:
359  *      The integrity table for the WIM, read into memory.
360  *
361  * @bytes_to_check:
362  *      Number of bytes in the WIM that need to be checked (offset of end of the
363  *      lookup table minus offset of end of the header).
364  *
365  * Returns:
366  *      > 0 (WIMLIB_ERR_READ, WIMLIB_ERR_UNEXPECTED_END_OF_FILE) on error
367  *      0 (WIM_INTEGRITY_OK) if the integrity was checked successfully and there
368  *      were no inconsistencies.
369  *      -1 (WIM_INTEGRITY_NOT_OK) if the WIM failed the integrity check.
370  */
371 static int
372 verify_integrity(struct filedes *in_fd, const tchar *filename,
373                  const struct integrity_table *table,
374                  u64 bytes_to_check,
375                  wimlib_progress_func_t progfunc, void *progctx)
376 {
377         int ret;
378         u64 offset = WIM_HEADER_DISK_SIZE;
379         u8 sha1_md[SHA1_HASH_SIZE];
380         union wimlib_progress_info progress;
381
382         progress.integrity.total_bytes      = bytes_to_check;
383         progress.integrity.total_chunks     = table->num_entries;
384         progress.integrity.completed_chunks = 0;
385         progress.integrity.completed_bytes  = 0;
386         progress.integrity.chunk_size       = table->chunk_size;
387         progress.integrity.filename         = filename;
388
389         ret = call_progress(progfunc, WIMLIB_PROGRESS_MSG_VERIFY_INTEGRITY,
390                             &progress, progctx);
391         if (ret)
392                 return ret;
393
394         for (u32 i = 0; i < table->num_entries; i++) {
395                 size_t this_chunk_size;
396                 if (i == table->num_entries - 1)
397                         this_chunk_size = MODULO_NONZERO(bytes_to_check,
398                                                          table->chunk_size);
399                 else
400                         this_chunk_size = table->chunk_size;
401
402                 ret = calculate_chunk_sha1(in_fd, this_chunk_size, offset, sha1_md);
403                 if (ret)
404                         return ret;
405
406                 if (!hashes_equal(sha1_md, table->sha1sums[i]))
407                         return WIM_INTEGRITY_NOT_OK;
408
409                 offset += this_chunk_size;
410                 progress.integrity.completed_chunks++;
411                 progress.integrity.completed_bytes += this_chunk_size;
412
413                 ret = call_progress(progfunc, WIMLIB_PROGRESS_MSG_VERIFY_INTEGRITY,
414                                     &progress, progctx);
415                 if (ret)
416                         return ret;
417         }
418         return WIM_INTEGRITY_OK;
419 }
420
421
422 /*
423  * check_wim_integrity():
424  *
425  * Verifies the integrity of the WIM by making sure the SHA1 message digests of
426  * ~10 MiB chunks of the WIM match up with the values given in the integrity
427  * table.
428  *
429  * @wim:
430  *      The WIM, opened for reading.
431  *
432  * Returns:
433  *      > 0 (WIMLIB_ERR_INVALID_INTEGRITY_TABLE, WIMLIB_ERR_READ,
434  *           WIMLIB_ERR_UNEXPECTED_END_OF_FILE) on error
435  *      0 (WIM_INTEGRITY_OK) if the integrity was checked successfully and there
436  *      were no inconsistencies.
437  *      -1 (WIM_INTEGRITY_NOT_OK) if the WIM failed the integrity check.
438  *      -2 (WIM_INTEGRITY_NONEXISTENT) if the WIM contains no integrity
439  *      information.
440  */
441 int
442 check_wim_integrity(WIMStruct *wim)
443 {
444         int ret;
445         u64 bytes_to_check;
446         struct integrity_table *table;
447         u64 end_lookup_table_offset;
448
449         if (!wim_has_integrity_table(wim)) {
450                 DEBUG("No integrity information.");
451                 return WIM_INTEGRITY_NONEXISTENT;
452         }
453
454         end_lookup_table_offset = wim->hdr.lookup_table_reshdr.offset_in_wim +
455                                   wim->hdr.lookup_table_reshdr.size_in_wim;
456
457         if (end_lookup_table_offset < WIM_HEADER_DISK_SIZE) {
458                 ERROR("WIM lookup table ends before WIM header ends!");
459                 return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
460         }
461
462         bytes_to_check = end_lookup_table_offset - WIM_HEADER_DISK_SIZE;
463
464         ret = read_integrity_table(wim, bytes_to_check, &table);
465         if (ret)
466                 return ret;
467         ret = verify_integrity(&wim->in_fd, wim->filename, table,
468                                bytes_to_check, wim->progfunc, wim->progctx);
469         FREE(table);
470         return ret;
471 }