Preliminary support for native fds (UNIX only so far)
[wimlib] / src / integrity.c
1 /*
2  * integrity.c
3  *
4  * WIM files can optionally contain a table of SHA1 message digests at the end,
5  * one digest for each chunk of the file of some specified size (often 10 MB).
6  * This file implements the checking and writing of this table.
7  */
8
9 /*
10  * Copyright (C) 2012, 2013 Eric Biggers
11  *
12  * This file is part of wimlib, a library for working with WIM files.
13  *
14  * wimlib is free software; you can redistribute it and/or modify it under the
15  * terms of the GNU General Public License as published by the Free
16  * Software Foundation; either version 3 of the License, or (at your option)
17  * any later version.
18  *
19  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
20  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
21  * A PARTICULAR PURPOSE. See the GNU General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU General Public License
25  * along with wimlib; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #include "wimlib_internal.h"
29 #include "buffer_io.h"
30 #include "sha1.h"
31
32 /* Size, in bytes, of each SHA1-summed chunk, when wimlib writes integrity
33  * information. */
34 #define INTEGRITY_CHUNK_SIZE 10485760
35
36 /* Only use a different chunk size for compatiblity with an existing integrity
37  * table if the chunk size is between these two numbers. */
38 #define INTEGRITY_MIN_CHUNK_SIZE 4096
39 #define INTEGRITY_MAX_CHUNK_SIZE 134217728
40
41 struct integrity_table {
42         u32 size;
43         u32 num_entries;
44         u32 chunk_size;
45         u8  sha1sums[0][20];
46 };
47
48 static int
49 calculate_chunk_sha1(filedes_t in_fd, size_t this_chunk_size,
50                      off_t offset, u8 sha1_md[])
51 {
52         u8 buf[BUFFER_SIZE];
53         SHA_CTX ctx;
54         size_t bytes_remaining;
55         size_t bytes_to_read;
56         size_t bytes_read;
57
58         bytes_remaining = this_chunk_size;
59         sha1_init(&ctx);
60         do {
61                 bytes_to_read = min(bytes_remaining, sizeof(buf));
62                 bytes_read = full_pread(in_fd, buf, bytes_to_read, offset);
63                 if (bytes_read != bytes_to_read) {
64                         ERROR_WITH_ERRNO("Read error while calculating "
65                                          "integrity checksums");
66                         return WIMLIB_ERR_READ;
67                 }
68                 sha1_update(&ctx, buf, bytes_read);
69                 bytes_remaining -= bytes_read;
70                 offset += bytes_read;
71         } while (bytes_remaining);
72         sha1_final(sha1_md, &ctx);
73         return 0;
74 }
75
76
77 /*
78  * read_integrity_table: -  Reads the integrity table from a WIM file.
79  *
80  * @res_entry:
81  *      The resource entry that specifies the location of the integrity table.
82  *      The integrity table must exist (i.e. res_entry->offset must not be 0).
83  *
84  * @in_fd:
85  *      File descriptor to the WIM file, opened for reading.
86  *
87  * @num_checked_bytes:
88  *      Number of bytes of data that should be checked by the integrity table.
89  *
90  * @table ret:
91  *      On success, a pointer to an in-memory structure containing the integrity
92  *      information is written to this location.
93  *
94  * Returns 0 on success; nonzero on failure.  The possible error codes are:
95  *
96  *     * WIMLIB_ERR_INVALID_INTEGRITY_TABLE:  The integrity table is invalid.
97  *     * WIMLIB_ERR_NOMEM:  Could not allocate memory to store the integrity
98  *                          data.
99  *     * WIMLIB_ERR_READ:   Could not read the integrity data from the WIM file.
100  */
101 static int
102 read_integrity_table(const struct resource_entry *res_entry,
103                      filedes_t in_fd,
104                      u64 num_checked_bytes,
105                      struct integrity_table **table_ret)
106 {
107         struct integrity_table *table;
108         int ret;
109         u64 expected_size;
110         u64 expected_num_entries;
111
112         if (resource_is_compressed(res_entry)) {
113                 ERROR("Didn't expect a compressed integrity table");
114                 return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
115         }
116
117         if (res_entry->size < 8 || res_entry->size  > 0xffffffff) {
118                 ERROR("Integrity table resource header is invalid");
119                 return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
120         }
121
122         /* Read the integrity table into memory. */
123         table = MALLOC((size_t)res_entry->size);
124         if (table == NULL) {
125                 ERROR("Can't allocate %zu bytes for integrity table",
126                       (size_t)res_entry->size);
127                 return WIMLIB_ERR_NOMEM;
128         }
129
130         if (full_pread(in_fd, table, res_entry->size,
131                        res_entry->offset) != res_entry->size)
132         {
133                 ERROR("Failed to read integrity table (size = %zu, "
134                       " offset = %"PRIu64")",
135                       (size_t)res_entry->size, res_entry->offset);
136                 ret = WIMLIB_ERR_READ;
137                 goto out_free_table;
138         }
139
140         table->size        = le32_to_cpu(table->size);
141         table->num_entries = le32_to_cpu(table->num_entries);
142         table->chunk_size  = le32_to_cpu(table->chunk_size);
143
144         if (table->size != res_entry->size) {
145                 ERROR("Inconsistent integrity table sizes: Table header says "
146                       "%u bytes but resource entry says %u bytes",
147                       table->size, (unsigned)res_entry->size);
148                 ret = WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
149                 goto out_free_table;
150         }
151
152         DEBUG("table->size = %u, table->num_entries = %u, "
153               "table->chunk_size = %u",
154               table->size, table->num_entries, table->chunk_size);
155
156         expected_size = (u64)table->num_entries * SHA1_HASH_SIZE + 12;
157
158         if (table->size != expected_size) {
159                 ERROR("Integrity table is %u bytes, but expected %"PRIu64" "
160                       "bytes to hold %u entries",
161                       table->size, expected_size, table->num_entries);
162                 ret = WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
163                 goto out_free_table;
164         }
165
166         if (table->chunk_size == 0) {
167                 ERROR("Cannot use integrity chunk size of 0");
168                 ret = WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
169                 goto out_free_table;
170         }
171
172         expected_num_entries = DIV_ROUND_UP(num_checked_bytes, table->chunk_size);
173
174         if (table->num_entries != expected_num_entries) {
175                 ERROR("%"PRIu64" integrity table entries would be required "
176                       "to checksum the %"PRIu64" bytes from the end of the "
177                       "header to the",
178                       expected_num_entries, num_checked_bytes);
179                 ERROR("end of the lookup table with a chunk size of %u, but "
180                       "there were only %u entries",
181                       table->chunk_size, table->num_entries);
182                 ret = WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
183                 goto out_free_table;
184         }
185         *table_ret = table;
186         ret = 0;
187         goto out;
188 out_free_table:
189         FREE(table);
190 out:
191         return ret;
192 }
193
194 /*
195  * calculate_integrity_table():
196  *
197  * Calculates an integrity table for the data in a file beginning at offset 208
198  * (WIM_HEADER_DISK_SIZE).
199  *
200  * @in_fd:
201  *      File descriptor for the file to be checked, opened for reading.  Does
202  *      not need to be at any specific location in the file.
203  *
204  * @new_check_end:
205  *      Offset of byte after the last byte to be checked.
206  *
207  * @old_table:
208  *      If non-NULL, a pointer to the table containing the previously calculated
209  *      integrity data for a prefix of this file.
210  *
211  * @old_check_end:
212  *      If @old_table is non-NULL, the byte after the last byte that was checked
213  *      in the old table.  Must be less than or equal to new_check_end.
214  *
215  * @progress_func:
216  *      If non-NULL, a progress function that will be called after every
217  *      calculated chunk.
218  *
219  * @integrity_table_ret:
220  *      On success, a pointer to the calculated integrity table is written into
221  *      this location.
222  *
223  * Returns 0 on success; nonzero on failure.
224  */
225 static int
226 calculate_integrity_table(filedes_t in_fd,
227                           off_t new_check_end,
228                           const struct integrity_table *old_table,
229                           off_t old_check_end,
230                           wimlib_progress_func_t progress_func,
231                           struct integrity_table **integrity_table_ret)
232 {
233         int ret;
234         size_t chunk_size = INTEGRITY_CHUNK_SIZE;
235
236         /* If an old table is provided, set the chunk size to be compatible with
237          * the old chunk size, unless the old chunk size was weird. */
238         if (old_table != NULL) {
239                 if (old_table->num_entries == 0 ||
240                     old_table->chunk_size < INTEGRITY_MIN_CHUNK_SIZE ||
241                     old_table->chunk_size > INTEGRITY_MAX_CHUNK_SIZE)
242                         old_table = NULL;
243                 else
244                         chunk_size = old_table->chunk_size;
245         }
246
247
248         u64 old_check_bytes = old_check_end - WIM_HEADER_DISK_SIZE;
249         u64 new_check_bytes = new_check_end - WIM_HEADER_DISK_SIZE;
250
251         u32 old_num_chunks = DIV_ROUND_UP(old_check_bytes, chunk_size);
252         u32 new_num_chunks = DIV_ROUND_UP(new_check_bytes, chunk_size);
253
254         size_t old_last_chunk_size = MODULO_NONZERO(old_check_bytes, chunk_size);
255         size_t new_last_chunk_size = MODULO_NONZERO(new_check_bytes, chunk_size);
256
257         size_t new_table_size = 12 + new_num_chunks * SHA1_HASH_SIZE;
258
259         struct integrity_table *new_table = MALLOC(new_table_size);
260         if (!new_table)
261                 return WIMLIB_ERR_NOMEM;
262         new_table->num_entries = new_num_chunks;
263         new_table->size = new_table_size;
264         new_table->chunk_size = chunk_size;
265
266         u64 offset = WIM_HEADER_DISK_SIZE;
267         union wimlib_progress_info progress;
268
269         if (progress_func) {
270                 progress.integrity.total_bytes      = new_check_bytes;
271                 progress.integrity.total_chunks     = new_num_chunks;
272                 progress.integrity.completed_chunks = 0;
273                 progress.integrity.completed_bytes  = 0;
274                 progress.integrity.chunk_size       = chunk_size;
275                 progress.integrity.filename         = NULL;
276                 progress_func(WIMLIB_PROGRESS_MSG_CALC_INTEGRITY,
277                               &progress);
278         }
279
280         for (u32 i = 0; i < new_num_chunks; i++) {
281                 size_t this_chunk_size;
282                 if (i == new_num_chunks - 1)
283                         this_chunk_size = new_last_chunk_size;
284                 else
285                         this_chunk_size = chunk_size;
286                 if (old_table &&
287                     ((this_chunk_size == chunk_size && i < old_num_chunks - 1) ||
288                       (i == old_num_chunks - 1 && this_chunk_size == old_last_chunk_size)))
289                 {
290                         /* Can use SHA1 message digest from old integrity table
291                          * */
292                         copy_hash(new_table->sha1sums[i], old_table->sha1sums[i]);
293                 } else {
294                         /* Calculate the SHA1 message digest of this chunk */
295                         ret = calculate_chunk_sha1(in_fd, this_chunk_size,
296                                                    offset, new_table->sha1sums[i]);
297                         if (ret) {
298                                 FREE(new_table);
299                                 return ret;
300                         }
301                 }
302                 offset += this_chunk_size;
303                 if (progress_func) {
304                         progress.integrity.completed_chunks++;
305                         progress.integrity.completed_bytes += this_chunk_size;
306                         progress_func(WIMLIB_PROGRESS_MSG_CALC_INTEGRITY,
307                                       &progress);
308                 }
309         }
310         *integrity_table_ret = new_table;
311         return 0;
312 }
313
314 /*
315  * write_integrity_table():
316  *
317  * Writes a WIM integrity table (a list of SHA1 message digests of raw 10 MiB
318  * chunks of the file).
319  *
320  * This function can optionally re-use entries from an older integrity table.
321  * To do this, make @integrity_res_entry point to the resource entry for the
322  * older table (note: this is an input-output parameter), and set
323  * @old_lookup_table_end to the offset of the byte directly following the last
324  * byte checked by the old table.  If the old integrity table is invalid or
325  * cannot be read, a warning is printed and the integrity information is
326  * re-calculated.
327  *
328  * @fd:
329  *      File descriptor to the WIM file, opened read-write, positioned at the
330  *      location at which the integrity table is to be written.
331  *
332  * @integrity_res_entry:
333  *      Resource entry which will be set to point to the integrity table on
334  *      success.  In addition, if @old_lookup_table_end != 0, this initially
335  *      must point to the resource entry for the old integrity table for the
336  *      WIM.
337  *
338  * @new_lookup_table_end:
339  *      The offset of the byte directly following the lookup table in the WIM
340  *      being written.
341  *
342  * @old_lookup_table_end:
343  *      If nonzero, the offset of the byte directly following the old lookup
344  *      table in the WIM.
345  *
346  * @progress_func
347  *      If non-NULL, a progress function that will be called after every
348  *      calculated chunk.
349  *
350  * Returns:
351  *      0 on success, nonzero on failure.  The possible error codes are:
352  *         * WIMLIB_ERR_WRITE:  Could not write the integrity table.
353  *         * WIMLIB_ERR_READ:   Could not read a chunk of data that needed
354  *                              to be checked.
355  */
356 int
357 write_integrity_table(filedes_t fd,
358                       struct resource_entry *integrity_res_entry,
359                       off_t new_lookup_table_end,
360                       off_t old_lookup_table_end,
361                       wimlib_progress_func_t progress_func)
362 {
363         struct integrity_table *old_table;
364         struct integrity_table *new_table;
365         int ret;
366         off_t cur_offset;
367         u32 new_table_size;
368
369         wimlib_assert(old_lookup_table_end <= new_lookup_table_end);
370
371         cur_offset = filedes_offset(fd);
372         if (cur_offset == -1)
373                 return WIMLIB_ERR_WRITE;
374
375         if (integrity_res_entry->offset == 0 || old_lookup_table_end == 0) {
376                 old_table = NULL;
377         } else {
378                 ret = read_integrity_table(integrity_res_entry, fd,
379                                            old_lookup_table_end - WIM_HEADER_DISK_SIZE,
380                                            &old_table);
381                 if (ret == WIMLIB_ERR_INVALID_INTEGRITY_TABLE) {
382                         WARNING("Old integrity table is invalid! "
383                                 "Ignoring it");
384                 } else if (ret != 0) {
385                         WARNING("Can't read old integrity table! "
386                                 "Ignoring it");
387                 }
388         }
389
390         ret = calculate_integrity_table(fd, new_lookup_table_end,
391                                         old_table, old_lookup_table_end,
392                                         progress_func, &new_table);
393         if (ret)
394                 goto out_free_old_table;
395
396         new_table_size = new_table->size;
397
398         new_table->size        = cpu_to_le32(new_table->size);
399         new_table->num_entries = cpu_to_le32(new_table->num_entries);
400         new_table->chunk_size  = cpu_to_le32(new_table->chunk_size);
401
402         if (full_write(fd, new_table, new_table_size) != new_table_size) {
403                 ERROR_WITH_ERRNO("Failed to write WIM integrity table");
404                 ret = WIMLIB_ERR_WRITE;
405         } else {
406                 integrity_res_entry->offset        = cur_offset;
407                 integrity_res_entry->size          = new_table_size;
408                 integrity_res_entry->original_size = new_table_size;
409                 integrity_res_entry->flags         = 0;
410                 ret = 0;
411         }
412         FREE(new_table);
413 out_free_old_table:
414         FREE(old_table);
415         return ret;
416 }
417
418 /*
419  * verify_integrity():
420  *
421  * Checks a WIM for consistency with the integrity table.
422  *
423  * @in_fd:
424  *      File descriptor to the WIM file, opened for reading.
425  *
426  * @table:
427  *      The integrity table for the WIM, read into memory.
428  *
429  * @bytes_to_check:
430  *      Number of bytes in the WIM that need to be checked (offset of end of the
431  *      lookup table minus offset of end of the header).
432  *
433  * @progress_func
434  *      If non-NULL, a progress function that will be called after every
435  *      verified chunk.
436  *
437  * Returns:
438  *      > 0 (WIMLIB_ERR_*) on error
439  *      0 (WIM_INTEGRITY_OK) if the integrity was checked successfully and there
440  *      were no inconsistencies.
441  *      -1 (WIM_INTEGRITY_NOT_OK) if the WIM failed the integrity check.
442  */
443 static int
444 verify_integrity(filedes_t in_fd, const tchar *filename,
445                  const struct integrity_table *table,
446                  u64 bytes_to_check,
447                  wimlib_progress_func_t progress_func)
448 {
449         int ret;
450         u64 offset = WIM_HEADER_DISK_SIZE;
451         u8 sha1_md[SHA1_HASH_SIZE];
452         union wimlib_progress_info progress;
453
454         if (progress_func) {
455                 progress.integrity.total_bytes      = bytes_to_check;
456                 progress.integrity.total_chunks     = table->num_entries;
457                 progress.integrity.completed_chunks = 0;
458                 progress.integrity.completed_bytes  = 0;
459                 progress.integrity.chunk_size       = table->chunk_size;
460                 progress.integrity.filename         = filename;
461                 progress_func(WIMLIB_PROGRESS_MSG_VERIFY_INTEGRITY,
462                               &progress);
463         }
464         for (u32 i = 0; i < table->num_entries; i++) {
465                 size_t this_chunk_size;
466                 if (i == table->num_entries - 1)
467                         this_chunk_size = MODULO_NONZERO(bytes_to_check,
468                                                          table->chunk_size);
469                 else
470                         this_chunk_size = table->chunk_size;
471
472                 ret = calculate_chunk_sha1(in_fd, this_chunk_size, offset, sha1_md);
473                 if (ret)
474                         return ret;
475
476                 if (!hashes_equal(sha1_md, table->sha1sums[i]))
477                         return WIM_INTEGRITY_NOT_OK;
478
479                 offset += this_chunk_size;
480                 if (progress_func) {
481                         progress.integrity.completed_chunks++;
482                         progress.integrity.completed_bytes += this_chunk_size;
483                         progress_func(WIMLIB_PROGRESS_MSG_VERIFY_INTEGRITY,
484                                       &progress);
485                 }
486         }
487         return WIM_INTEGRITY_OK;
488 }
489
490
491 /*
492  * check_wim_integrity():
493  *
494  * Verifies the integrity of the WIM by making sure the SHA1 message digests of
495  * ~10 MiB chunks of the WIM match up with the values given in the integrity
496  * table.
497  *
498  * @w:
499  *      The WIM, opened for reading, and with the header already read.
500  *
501  * @progress_func
502  *      If non-NULL, a progress function that will be called after every
503  *      verified chunk.
504  *
505  * Returns:
506  *      > 0 (WIMLIB_ERR_*) on error
507  *      0 (WIM_INTEGRITY_OK) if the integrity was checked successfully and there
508  *      were no inconsistencies.
509  *      -1 (WIM_INTEGRITY_NOT_OK) if the WIM failed the integrity check.
510  *      -2 (WIM_INTEGRITY_NONEXISTENT) if the WIM contains no integrity
511  *      information.
512  */
513 int
514 check_wim_integrity(WIMStruct *w, wimlib_progress_func_t progress_func)
515 {
516         int ret;
517         u64 bytes_to_check;
518         struct integrity_table *table;
519         u64 end_lookup_table_offset;
520
521         if (w->hdr.integrity.offset == 0) {
522                 DEBUG("No integrity information.");
523                 return WIM_INTEGRITY_NONEXISTENT;
524         }
525
526         end_lookup_table_offset = w->hdr.lookup_table_res_entry.offset +
527                                   w->hdr.lookup_table_res_entry.size;
528
529         if (end_lookup_table_offset < WIM_HEADER_DISK_SIZE) {
530                 ERROR("WIM lookup table ends before WIM header ends!");
531                 return WIMLIB_ERR_INVALID_INTEGRITY_TABLE;
532         }
533
534         bytes_to_check = end_lookup_table_offset - WIM_HEADER_DISK_SIZE;
535
536         ret = read_integrity_table(&w->hdr.integrity, w->in_fd,
537                                    bytes_to_check, &table);
538         if (ret)
539                 return ret;
540         ret = verify_integrity(w->in_fd, w->filename, table,
541                                bytes_to_check, progress_func);
542         FREE(table);
543         return ret;
544 }