]> wimlib.net Git - wimlib/blob - src/integrity.c
0d1e13782ffad783e44c2e8b26ec944a6ef50c92
[wimlib] / src / integrity.c
1 /*
2  * integrity.c
3  *
4  * WIM files can optionally contain a table of SHA1 message digests at the end,
5  * one digest for each chunk of the file of some specified size (often 10 MB).
6  * This file implements the checking and writing of this table.
7  */
8
9 /*
10  * Copyright (C) 2012, 2013 Eric Biggers
11  *
12  * This file is free software; you can redistribute it and/or modify it under
13  * the terms of the GNU Lesser General Public License as published by the Free
14  * Software Foundation; either version 3 of the License, or (at your option) any
15  * later version.
16  *
17  * This file is distributed in the hope that it will be useful, but WITHOUT
18  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19  * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
20  * details.
21  *
22  * You should have received a copy of the GNU Lesser General Public License
23  * along with this file; if not, see http://www.gnu.org/licenses/.
24  */
25
26 #ifdef HAVE_CONFIG_H
27 #  include "config.h"
28 #endif
29
30 #include "wimlib/assert.h"
31 #include "wimlib/endianness.h"
32 #include "wimlib/error.h"
33 #include "wimlib/file_io.h"
34 #include "wimlib/integrity.h"
35 #include "wimlib/progress.h"
36 #include "wimlib/resource.h"
37 #include "wimlib/sha1.h"
38 #include "wimlib/wim.h"
39 #include "wimlib/write.h"
40
41 /* Size, in bytes, of each SHA1-summed chunk, when wimlib writes integrity
42  * information. */
43 #define INTEGRITY_CHUNK_SIZE 10485760
44
45 /* Only use a different chunk size for compatiblity with an existing integrity
46  * table if the chunk size is between these two numbers. */
47 #define INTEGRITY_MIN_CHUNK_SIZE 4096
48 #define INTEGRITY_MAX_CHUNK_SIZE 134217728
49
50 struct integrity_table {
51         u32 size;
52         u32 num_entries;
53         u32 chunk_size;
54         u8  sha1sums[][20];
55 } _packed_attribute;
56
57 static int
58 calculate_chunk_sha1(struct filedes *in_fd, size_t this_chunk_size,
59                      off_t offset, u8 sha1_md[])
60 {
61         u8 buf[BUFFER_SIZE];
62         SHA_CTX ctx;
63         size_t bytes_remaining;
64         size_t bytes_to_read;
65         int ret;
66
67         bytes_remaining = this_chunk_size;
68         sha1_init(&ctx);
69         do {
70                 bytes_to_read = min(bytes_remaining, sizeof(buf));
71                 ret = full_pread(in_fd, buf, bytes_to_read, offset);
72                 if (ret) {
73                         ERROR_WITH_ERRNO("Read error while calculating "
74                                          "integrity checksums");
75                         return ret;
76                 }
77                 sha1_update(&ctx, buf, bytes_to_read);
78                 bytes_remaining -= bytes_to_read;
79                 offset += bytes_to_read;
80         } while (bytes_remaining);
81         sha1_final(sha1_md, &ctx);
82         return 0;
83 }
84
85
86 /*
87  * read_integrity_table: -  Reads the integrity table from a WIM file.
88  *
89  * @wim:
90  *      WIMStruct for the WIM file; @wim->hdr.integrity_table_reshdr specifies
91  *      the location of the integrity table.  @wim->in_fd is expected to be a
92  *      seekable file descriptor to the WIM file opened for reading.
93  *
94  * @num_checked_bytes:
95  *      Number of bytes of data that should be checked by the integrity table.
96  *
97  * @table_ret:
98  *      On success, a pointer to an in-memory structure containing the integrity
99  *      information is written to this location.
100  *
101  * Return values:
102  *      WIMLIB_ERR_SUCCESS (0)
103  *      WIMLIB_ERR_INVALID_INTEGRITY_TABLE
104  *      WIMLIB_ERR_NOMEM
105  *      WIMLIB_ERR_READ
106  *      WIMLIB_ERR_UNEXPECTED_END_OF_FILE
107  */
108 int
109 read_integrity_table(WIMStruct *wim, u64 num_checked_bytes,
110                      struct integrity_table **table_ret)
111 {
112         void *buf;
113         struct integrity_table *table;
114         int ret;
115
116         if (wim->hdr.integrity_table_reshdr.uncompressed_size < 8)
117                 goto invalid;
118
119         DEBUG("Reading integrity table.");
120
121         ret = wim_reshdr_to_data(&wim->hdr.integrity_table_reshdr, wim, &buf);
122         if (ret)
123                 return ret;
124         table = buf;
125
126         table->size        = le32_to_cpu(table->size);
127         table->num_entries = le32_to_cpu(table->num_entries);
128         table->chunk_size  = le32_to_cpu(table->chunk_size);
129
130         DEBUG("table->size = %u, table->num_entries = %u, "
131               "table->chunk_size = %u",
132               table->size, table->num_entries, table->chunk_size);
133
134         if (table->size != wim->hdr.integrity_table_reshdr.uncompressed_size ||
135             table->size != (u64)table->num_entries * SHA1_HASH_SIZE + 12 ||
136             table->chunk_size == 0 ||
137             table->num_entries != DIV_ROUND_UP(num_checked_bytes, table->chunk_size))
138         {
139                 FREE(table);
140                 goto invalid;
141         }
142
143         *table_ret = table;
144         return 0;
145
146 invalid:
147         return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
148 }
149
150 /*
151  * calculate_integrity_table():
152  *
153  * Calculates an integrity table for the data in a file beginning at offset 208
154  * (WIM_HEADER_DISK_SIZE).
155  *
156  * @in_fd:
157  *      File descriptor for the file to be checked, opened for reading.  Does
158  *      not need to be at any specific location in the file.
159  *
160  * @new_check_end:
161  *      Offset of byte after the last byte to be checked.
162  *
163  * @old_table:
164  *      If non-NULL, a pointer to the table containing the previously calculated
165  *      integrity data for a prefix of this file.
166  *
167  * @old_check_end:
168  *      If @old_table is non-NULL, the byte after the last byte that was checked
169  *      in the old table.  Must be less than or equal to new_check_end.
170  *
171  * @integrity_table_ret:
172  *      On success, a pointer to the calculated integrity table is written into
173  *      this location.
174  *
175  * Return values:
176  *      WIMLIB_ERR_SUCCESS (0)
177  *      WIMLIB_ERR_NOMEM
178  *      WIMLIB_ERR_READ
179  *      WIMLIB_ERR_UNEXPECTED_END_OF_FILE
180  */
181 static int
182 calculate_integrity_table(struct filedes *in_fd,
183                           off_t new_check_end,
184                           const struct integrity_table *old_table,
185                           off_t old_check_end,
186                           struct integrity_table **integrity_table_ret,
187                           wimlib_progress_func_t progfunc,
188                           void *progctx)
189 {
190         int ret;
191         size_t chunk_size = INTEGRITY_CHUNK_SIZE;
192
193         /* If an old table is provided, set the chunk size to be compatible with
194          * the old chunk size, unless the old chunk size was weird. */
195         if (old_table != NULL) {
196                 if (old_table->num_entries == 0 ||
197                     old_table->chunk_size < INTEGRITY_MIN_CHUNK_SIZE ||
198                     old_table->chunk_size > INTEGRITY_MAX_CHUNK_SIZE)
199                         old_table = NULL;
200                 else
201                         chunk_size = old_table->chunk_size;
202         }
203
204
205         u64 old_check_bytes = old_check_end - WIM_HEADER_DISK_SIZE;
206         u64 new_check_bytes = new_check_end - WIM_HEADER_DISK_SIZE;
207
208         u32 old_num_chunks = DIV_ROUND_UP(old_check_bytes, chunk_size);
209         u32 new_num_chunks = DIV_ROUND_UP(new_check_bytes, chunk_size);
210
211         size_t old_last_chunk_size = MODULO_NONZERO(old_check_bytes, chunk_size);
212         size_t new_last_chunk_size = MODULO_NONZERO(new_check_bytes, chunk_size);
213
214         size_t new_table_size = 12 + new_num_chunks * SHA1_HASH_SIZE;
215
216         struct integrity_table *new_table = MALLOC(new_table_size);
217         if (!new_table)
218                 return WIMLIB_ERR_NOMEM;
219         new_table->num_entries = new_num_chunks;
220         new_table->size = new_table_size;
221         new_table->chunk_size = chunk_size;
222
223         u64 offset = WIM_HEADER_DISK_SIZE;
224         union wimlib_progress_info progress;
225
226         progress.integrity.total_bytes      = new_check_bytes;
227         progress.integrity.total_chunks     = new_num_chunks;
228         progress.integrity.completed_chunks = 0;
229         progress.integrity.completed_bytes  = 0;
230         progress.integrity.chunk_size       = chunk_size;
231         progress.integrity.filename         = NULL;
232
233         ret = call_progress(progfunc, WIMLIB_PROGRESS_MSG_CALC_INTEGRITY,
234                             &progress, progctx);
235         if (ret)
236                 goto out_free_new_table;
237
238         for (u32 i = 0; i < new_num_chunks; i++) {
239                 size_t this_chunk_size;
240                 if (i == new_num_chunks - 1)
241                         this_chunk_size = new_last_chunk_size;
242                 else
243                         this_chunk_size = chunk_size;
244                 if (old_table &&
245                     ((this_chunk_size == chunk_size && i < old_num_chunks - 1) ||
246                       (i == old_num_chunks - 1 && this_chunk_size == old_last_chunk_size)))
247                 {
248                         /* Can use SHA1 message digest from old integrity table
249                          * */
250                         copy_hash(new_table->sha1sums[i], old_table->sha1sums[i]);
251                 } else {
252                         /* Calculate the SHA1 message digest of this chunk */
253                         ret = calculate_chunk_sha1(in_fd, this_chunk_size,
254                                                    offset, new_table->sha1sums[i]);
255                         if (ret)
256                                 goto out_free_new_table;
257                 }
258                 offset += this_chunk_size;
259
260                 progress.integrity.completed_chunks++;
261                 progress.integrity.completed_bytes += this_chunk_size;
262                 ret = call_progress(progfunc, WIMLIB_PROGRESS_MSG_CALC_INTEGRITY,
263                                     &progress, progctx);
264                 if (ret)
265                         goto out_free_new_table;
266         }
267         *integrity_table_ret = new_table;
268         return 0;
269
270 out_free_new_table:
271         FREE(new_table);
272         return ret;
273 }
274
275 /*
276  * write_integrity_table():
277  *
278  * Writes a WIM integrity table (a list of SHA1 message digests of raw 10 MiB
279  * chunks of the file).
280  *
281  * This function can optionally re-use entries from an older integrity table.
282  * To do this, specify old_blob_table_end and old_table.
283  *
284  * @wim:
285  *      WIMStruct for the WIM file.  @wim->out_fd must be a seekable descriptor
286  *      to the new WIM file, opened read-write, positioned at the location at
287  *      which the integrity table is to be written.  Furthermore,
288  *      @wim->hdr.integrity is expected to be a resource entry which will be set
289  *      to the integrity table information on success.  In addition, if
290  *      @old_blob_table_end != 0, @wim->hdr.integrity must initially contain
291  *      information about the old integrity table, and @wim->in_fd must be a
292  *      seekable descriptor to the original WIM file opened for reading.
293  *
294  * @new_blob_table_end:
295  *      The offset of the byte directly following the blob table in the WIM
296  *      being written.
297  *
298  * @old_blob_table_end:
299  *      If nonzero, the offset of the byte directly following the old blob table
300  *      in the WIM.
301  *
302  * @old_table
303  *      Pointer to the old integrity table read into memory, or NULL if not
304  *      specified.
305  */
306 int
307 write_integrity_table(WIMStruct *wim,
308                       off_t new_blob_table_end,
309                       off_t old_blob_table_end,
310                       struct integrity_table *old_table)
311 {
312         struct integrity_table *new_table;
313         int ret;
314         u32 new_table_size;
315
316         DEBUG("Writing integrity table "
317               "(new_blob_table_end=%"PRIu64", old_blob_table_end=%"PRIu64")",
318               new_blob_table_end, old_blob_table_end);
319
320         wimlib_assert(old_blob_table_end <= new_blob_table_end);
321
322         ret = calculate_integrity_table(&wim->out_fd, new_blob_table_end,
323                                         old_table, old_blob_table_end,
324                                         &new_table, wim->progfunc, wim->progctx);
325         if (ret)
326                 return ret;
327
328         new_table_size = new_table->size;
329
330         new_table->size        = cpu_to_le32(new_table->size);
331         new_table->num_entries = cpu_to_le32(new_table->num_entries);
332         new_table->chunk_size  = cpu_to_le32(new_table->chunk_size);
333
334         ret = write_wim_resource_from_buffer(new_table,
335                                              new_table_size,
336                                              false,
337                                              &wim->out_fd,
338                                              WIMLIB_COMPRESSION_TYPE_NONE,
339                                              0,
340                                              &wim->hdr.integrity_table_reshdr,
341                                              NULL,
342                                              0);
343         FREE(new_table);
344         DEBUG("ret=%d", ret);
345         return ret;
346 }
347
348 /*
349  * verify_integrity():
350  *
351  * Checks a WIM for consistency with the integrity table.
352  *
353  * @in_fd:
354  *      File descriptor to the WIM file, opened for reading.
355  *
356  * @table:
357  *      The integrity table for the WIM, read into memory.
358  *
359  * @bytes_to_check:
360  *      Number of bytes in the WIM that need to be checked (offset of end of the
361  *      blob table minus offset of end of the header).
362  *
363  * Returns:
364  *      > 0 (WIMLIB_ERR_READ, WIMLIB_ERR_UNEXPECTED_END_OF_FILE) on error
365  *      0 (WIM_INTEGRITY_OK) if the integrity was checked successfully and there
366  *      were no inconsistencies.
367  *      -1 (WIM_INTEGRITY_NOT_OK) if the WIM failed the integrity check.
368  */
369 static int
370 verify_integrity(struct filedes *in_fd, const tchar *filename,
371                  const struct integrity_table *table,
372                  u64 bytes_to_check,
373                  wimlib_progress_func_t progfunc, void *progctx)
374 {
375         int ret;
376         u64 offset = WIM_HEADER_DISK_SIZE;
377         u8 sha1_md[SHA1_HASH_SIZE];
378         union wimlib_progress_info progress;
379
380         progress.integrity.total_bytes      = bytes_to_check;
381         progress.integrity.total_chunks     = table->num_entries;
382         progress.integrity.completed_chunks = 0;
383         progress.integrity.completed_bytes  = 0;
384         progress.integrity.chunk_size       = table->chunk_size;
385         progress.integrity.filename         = filename;
386
387         ret = call_progress(progfunc, WIMLIB_PROGRESS_MSG_VERIFY_INTEGRITY,
388                             &progress, progctx);
389         if (ret)
390                 return ret;
391
392         for (u32 i = 0; i < table->num_entries; i++) {
393                 size_t this_chunk_size;
394                 if (i == table->num_entries - 1)
395                         this_chunk_size = MODULO_NONZERO(bytes_to_check,
396                                                          table->chunk_size);
397                 else
398                         this_chunk_size = table->chunk_size;
399
400                 ret = calculate_chunk_sha1(in_fd, this_chunk_size, offset, sha1_md);
401                 if (ret)
402                         return ret;
403
404                 if (!hashes_equal(sha1_md, table->sha1sums[i]))
405                         return WIM_INTEGRITY_NOT_OK;
406
407                 offset += this_chunk_size;
408                 progress.integrity.completed_chunks++;
409                 progress.integrity.completed_bytes += this_chunk_size;
410
411                 ret = call_progress(progfunc, WIMLIB_PROGRESS_MSG_VERIFY_INTEGRITY,
412                                     &progress, progctx);
413                 if (ret)
414                         return ret;
415         }
416         return WIM_INTEGRITY_OK;
417 }
418
419
420 /*
421  * check_wim_integrity():
422  *
423  * Verifies the integrity of the WIM by making sure the SHA1 message digests of
424  * ~10 MiB chunks of the WIM match up with the values given in the integrity
425  * table.
426  *
427  * @wim:
428  *      The WIM, opened for reading.
429  *
430  * Returns:
431  *      > 0 (WIMLIB_ERR_INVALID_INTEGRITY_TABLE, WIMLIB_ERR_READ,
432  *           WIMLIB_ERR_UNEXPECTED_END_OF_FILE) on error
433  *      0 (WIM_INTEGRITY_OK) if the integrity was checked successfully and there
434  *      were no inconsistencies.
435  *      -1 (WIM_INTEGRITY_NOT_OK) if the WIM failed the integrity check.
436  *      -2 (WIM_INTEGRITY_NONEXISTENT) if the WIM contains no integrity
437  *      information.
438  */
439 int
440 check_wim_integrity(WIMStruct *wim)
441 {
442         int ret;
443         u64 bytes_to_check;
444         struct integrity_table *table;
445         u64 end_blob_table_offset;
446
447         if (!wim_has_integrity_table(wim)) {
448                 DEBUG("No integrity information.");
449                 return WIM_INTEGRITY_NONEXISTENT;
450         }
451
452         end_blob_table_offset = wim->hdr.blob_table_reshdr.offset_in_wim +
453                                 wim->hdr.blob_table_reshdr.size_in_wim;
454
455         if (end_blob_table_offset < WIM_HEADER_DISK_SIZE) {
456                 ERROR("WIM blob table ends before WIM header ends!");
457                 return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
458         }
459
460         bytes_to_check = end_blob_table_offset - WIM_HEADER_DISK_SIZE;
461
462         ret = read_integrity_table(wim, bytes_to_check, &table);
463         if (ret)
464                 return ret;
465         ret = verify_integrity(&wim->in_fd, wim->filename, table,
466                                bytes_to_check, wim->progfunc, wim->progctx);
467         FREE(table);
468         return ret;
469 }