be0366218199b1c0bf1a8898c4e01706784fb850
[wimlib] / src / verify.c
1 /*
2  * verify.c
3  *
4  * Some functions to verify that stuff in the WIM is valid.  Of course, not
5  * *all* the verifications of the input data are in this file.
6  */
7
8 /*
9  * Copyright (C) 2012, 2013 Biggers
10  *
11  * wimlib - Library for working with WIM files
12  *
13  * This file is part of wimlib, a library for working with WIM files.
14  *
15  * wimlib is free software; you can redistribute it and/or modify it under the
16  * terms of the GNU General Public License as published by the Free
17  * Software Foundation; either version 3 of the License, or (at your option)
18  * any later version.
19  *
20  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
21  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
22  * A PARTICULAR PURPOSE. See the GNU General Public License for more
23  * details.
24  *
25  * You should have received a copy of the GNU General Public License
26  * along with wimlib; if not, see http://www.gnu.org/licenses/.
27  */
28
29 #include "wimlib_internal.h"
30 #include "dentry.h"
31 #include "lookup_table.h"
32
33 static int verify_inode(struct wim_inode *inode, const WIMStruct *w)
34 {
35         const struct wim_lookup_table *table = w->lookup_table;
36         const struct wim_security_data *sd = wim_const_security_data(w);
37         const struct wim_dentry *first_dentry = inode_first_dentry(inode);
38         const struct wim_dentry *dentry;
39         int ret = WIMLIB_ERR_INVALID_DENTRY;
40
41         /* Check the security ID.  -1 is valid and means "no security
42          * descriptor".  Anything else has to be a valid index into the WIM
43          * image's security descriptors table. */
44         if (inode->i_security_id < -1) {
45                 ERROR("Dentry `%s' has an invalid security ID (%d)",
46                         first_dentry->full_path_utf8, inode->i_security_id);
47                 goto out;
48         }
49
50         if (inode->i_security_id >= sd->num_entries) {
51                 ERROR("Dentry `%s' has an invalid security ID (%d) "
52                       "(there are only %u entries in the security table)",
53                         first_dentry->full_path_utf8, inode->i_security_id,
54                         sd->num_entries);
55                 goto out;
56         }
57
58         /* Check that lookup table entries for all the inode's stream exist,
59          * except if the SHA1 message digest is all 0's, which indicates an
60          * empty stream.
61          *
62          * This check is skipped on split WIMs. */
63         if (w->hdr.total_parts == 1) {
64                 for (unsigned i = 0; i <= inode->i_num_ads; i++) {
65                         struct wim_lookup_table_entry *lte;
66                         const u8 *hash;
67                         hash = inode_stream_hash_unresolved(inode, i);
68                         lte = __lookup_resource(table, hash);
69                         if (!lte && !is_zero_hash(hash)) {
70                                 ERROR("Could not find lookup table entry for stream "
71                                       "%u of dentry `%s'", i, first_dentry->full_path_utf8);
72                                 goto out;
73                         }
74                         if (lte)
75                                 lte->real_refcnt += inode->i_nlink;
76
77                         /* The following is now done when required by
78                          * wim_run_full_verifications(). */
79
80                 #if 0
81                         if (lte && !w->full_verification_in_progress &&
82                             lte->real_refcnt > lte->refcnt)
83                         {
84                         #ifdef ENABLE_ERROR_MESSAGES
85                                 WARNING("The following lookup table entry "
86                                         "has a reference count of %u, but",
87                                         lte->refcnt);
88                                 WARNING("We found %u references to it",
89                                         lte->real_refcnt);
90                                 WARNING("(One dentry referencing it is at `%s')",
91                                          first_dentry->full_path_utf8);
92
93                                 print_lookup_table_entry(lte);
94                         #endif
95                                 /* Guess what!  install.wim for Windows 8
96                                  * contains many streams referenced by more
97                                  * dentries than the refcnt stated in the lookup
98                                  * table entry.  So we will need to handle this
99                                  * case and not just make it be an error...  I'm
100                                  * just setting the reference count to the
101                                  * number of references we found.
102                                  * (Unfortunately, even after doing this, the
103                                  * reference count could be too low if it's also
104                                  * referenced in other WIM images) */
105
106                         #if 1
107                                 lte->refcnt = lte->real_refcnt;
108                                 WARNING("Fixing reference count");
109                         #else
110                                 goto out;
111                         #endif
112                         }
113                 #endif
114                 }
115         }
116
117         /* Make sure there is only one unnamed data stream. */
118         unsigned num_unnamed_streams = 0;
119         for (unsigned i = 0; i <= inode->i_num_ads; i++) {
120                 const u8 *hash;
121                 hash = inode_stream_hash_unresolved(inode, i);
122                 if (inode_stream_name_len(inode, i) == 0 && !is_zero_hash(hash))
123                         num_unnamed_streams++;
124         }
125         if (num_unnamed_streams > 1) {
126                 ERROR("Dentry `%s' has multiple (%u) un-named streams",
127                       first_dentry->full_path_utf8, num_unnamed_streams);
128                 goto out;
129         }
130
131         /* Files cannot have multiple DOS names, even if they have multiple
132          * names in multiple directories (i.e. hard links).
133          * Source: NTFS-3g authors. */
134         const struct wim_dentry *dentry_with_dos_name = NULL;
135         inode_for_each_dentry(dentry, inode) {
136                 if (dentry->short_name_len) {
137                         if (dentry_with_dos_name) {
138                                 ERROR("Hard-linked file has a DOS name at "
139                                       "both `%s' and `%s'",
140                                       dentry_with_dos_name->full_path_utf8,
141                                       dentry->full_path_utf8);
142                                 goto out;
143                         }
144                         dentry_with_dos_name = dentry;
145                 }
146         }
147
148         /* Directories with multiple links have not been tested. XXX */
149         if (inode->i_nlink > 1 && inode->i_attributes & FILE_ATTRIBUTE_DIRECTORY) {
150                 ERROR("Hard-linked directory `%s' is unsupported",
151                       first_dentry->full_path_utf8);
152                 goto out;
153         }
154
155         inode->i_verified = 1;
156         ret = 0;
157 out:
158         return ret;
159 }
160
161 /* Run some miscellaneous verifications on a WIM dentry */
162 int verify_dentry(struct wim_dentry *dentry, void *wim)
163 {
164         int ret;
165
166         /* Verify the associated inode, but only one time no matter how many
167          * dentries it has. */
168         if (!dentry->d_inode->i_verified) {
169                 ret = verify_inode(dentry->d_inode, wim);
170                 if (ret != 0)
171                         return ret;
172         }
173
174         /* Make sure root dentry is unnamed, while every other dentry has at
175          * least a long name.
176          *
177          * I am assuming that dentries having only a DOS name is illegal; i.e.,
178          * Windows will always combine the Win32 name and DOS name for a file
179          * into a single WIM dentry, even if they are stored separately on NTFS.
180          * (This seems to be the case...) */
181         if (dentry_is_root(dentry)) {
182                 if (dentry->file_name_len || dentry->short_name_len) {
183                         ERROR("The root dentry is named `%s', but it must "
184                               "be unnamed", dentry->file_name_utf8);
185                         return WIMLIB_ERR_INVALID_DENTRY;
186                 }
187         } else {
188                 if (!dentry->file_name_len) {
189                         ERROR("Dentry `%s' has no long name",
190                               dentry->full_path_utf8);
191                         return WIMLIB_ERR_INVALID_DENTRY;
192                 }
193         }
194
195 #if 0
196         /* Check timestamps */
197         if (inode->i_last_access_time < inode->i_creation_time ||
198             inode->i_last_write_time < inode->i_creation_time) {
199                 WARNING("Dentry `%s' was created after it was last accessed or "
200                       "written to", dentry->full_path_utf8);
201         }
202 #endif
203
204         return 0;
205 }
206
207 static int image_run_full_verifications(WIMStruct *w)
208 {
209         return for_dentry_in_tree(wim_root_dentry(w), verify_dentry, w);
210 }
211
212 static int lte_fix_refcnt(struct wim_lookup_table_entry *lte, void *ctr)
213 {
214         if (lte->refcnt != lte->real_refcnt) {
215         #ifdef ENABLE_ERROR_MESSAGES
216                 WARNING("The following lookup table entry has a reference "
217                         "count of %u, but", lte->refcnt);
218                 WARNING("We found %u references to it",
219                         lte->real_refcnt);
220                 print_lookup_table_entry(lte, stderr);
221         #endif
222                 lte->refcnt = lte->real_refcnt;
223                 ++*(unsigned long *)ctr;
224         }
225         return 0;
226 }
227
228 /* Ideally this would be unnecessary... however, the WIMs for Windows 8 are
229  * screwed up because some lookup table entries are referenced more times than
230  * their stated reference counts.  So theoretically, if we delete all the
231  * references to a stream and then remove it, it might still be referenced
232  * somewhere else, making a file be missing from the WIM... So, work around this
233  * problem by looking at ALL the images to re-calculate the reference count of
234  * EVERY lookup table entry.  This only absolutely has to be done before an image
235  * is deleted or before an image is mounted read-write. */
236 int wim_run_full_verifications(WIMStruct *w)
237 {
238         int ret;
239
240         for_lookup_table_entry(w->lookup_table, lte_zero_real_refcnt, NULL);
241         w->all_images_verified = 1;
242         w->full_verification_in_progress = 1;
243         ret = for_image(w, WIMLIB_ALL_IMAGES, image_run_full_verifications);
244         w->full_verification_in_progress = 0;
245         if (ret == 0) {
246                 unsigned long num_ltes_with_bogus_refcnt = 0;
247                 for (int i = 0; i < w->hdr.image_count; i++)
248                         w->image_metadata[i].metadata_lte->real_refcnt++;
249                 for_lookup_table_entry(w->lookup_table, lte_fix_refcnt,
250                                        &num_ltes_with_bogus_refcnt);
251                 if (num_ltes_with_bogus_refcnt != 0) {
252                         WARNING("A total of %lu entries in the WIM's stream "
253                                 "lookup table had to have\n"
254                                 "          their reference counts fixed.",
255                                 num_ltes_with_bogus_refcnt);
256                 }
257         } else {
258                 w->all_images_verified = 0;
259         }
260         return ret;
261 }
262
263 /*
264  * verify_swm_set: - Sanity checks to make sure a set of WIMs correctly
265  *                   correspond to a spanned set.
266  *
267  * @w:
268  *      Part 1 of the set.
269  *
270  * @additional_swms:
271  *      All parts of the set other than part 1.
272  *
273  * @num_additional_swms:
274  *      Number of WIMStructs in @additional_swms.  Or, the total number of parts
275  *      in the set minus 1.
276  *
277  * @return:
278  *      0 on success; WIMLIB_ERR_SPLIT_INVALID if the set is not valid.
279  */
280 int verify_swm_set(WIMStruct *w, WIMStruct **additional_swms,
281                    unsigned num_additional_swms)
282 {
283         unsigned total_parts = w->hdr.total_parts;
284         int ctype;
285         const u8 *guid;
286
287         if (total_parts != num_additional_swms + 1) {
288                 ERROR("`%s' says there are %u parts in the spanned set, "
289                       "but %s%u part%s provided",
290                       w->filename, total_parts,
291                       (num_additional_swms + 1 < total_parts) ? "only " : "",
292                       num_additional_swms + 1,
293                       (num_additional_swms) ? "s were" : " was");
294                 return WIMLIB_ERR_SPLIT_INVALID;
295         }
296         if (w->hdr.part_number != 1) {
297                 ERROR("WIM `%s' is not the first part of the split WIM.",
298                       w->filename);
299                 return WIMLIB_ERR_SPLIT_INVALID;
300         }
301         for (unsigned i = 0; i < num_additional_swms; i++) {
302                 if (additional_swms[i]->hdr.total_parts != total_parts) {
303                         ERROR("WIM `%s' says there are %u parts in the spanned set, "
304                               "but %u parts were provided",
305                               additional_swms[i]->filename,
306                               additional_swms[i]->hdr.total_parts,
307                               total_parts);
308                         return WIMLIB_ERR_SPLIT_INVALID;
309                 }
310         }
311
312         /* keep track of ctype and guid just to make sure they are the same for
313          * all the WIMs. */
314         ctype = wimlib_get_compression_type(w);
315         guid = w->hdr.guid;
316
317         {
318                 /* parts_to_swms is not allocated at function scope because it
319                  * should only be allocated after num_additional_swms was
320                  * checked to be the same as w->hdr.total_parts.  Otherwise, it
321                  * could be unexpectedly high and cause a stack overflow. */
322                 WIMStruct *parts_to_swms[num_additional_swms];
323                 ZERO_ARRAY(parts_to_swms);
324                 for (unsigned i = 0; i < num_additional_swms; i++) {
325
326                         WIMStruct *swm = additional_swms[i];
327
328                         if (wimlib_get_compression_type(swm) != ctype) {
329                                 ERROR("The split WIMs do not all have the same "
330                                       "compression type");
331                                 return WIMLIB_ERR_SPLIT_INVALID;
332                         }
333                         if (memcmp(guid, swm->hdr.guid, WIM_GID_LEN) != 0) {
334                                 ERROR("The split WIMs do not all have the same "
335                                       "GUID");
336                                 return WIMLIB_ERR_SPLIT_INVALID;
337                         }
338                         if (swm->hdr.part_number == 1) {
339                                 ERROR("WIMs `%s' and `%s' both are marked as the "
340                                       "first WIM in the spanned set",
341                                       w->filename, swm->filename);
342                                 return WIMLIB_ERR_SPLIT_INVALID;
343                         }
344                         if (swm->hdr.part_number == 0 ||
345                             swm->hdr.part_number > total_parts)
346                         {
347                                 ERROR("WIM `%s' says it is part %u in the spanned set, "
348                                       "but the part number must be in the range "
349                                       "[1, %u]",
350                                       swm->filename, swm->hdr.part_number, total_parts);
351                                 return WIMLIB_ERR_SPLIT_INVALID;
352                         }
353                         if (parts_to_swms[swm->hdr.part_number - 2])
354                         {
355                                 ERROR("`%s' and `%s' are both marked as part %u of %u "
356                                       "in the spanned set",
357                                       parts_to_swms[swm->hdr.part_number - 2]->filename,
358                                       swm->filename,
359                                       swm->hdr.part_number,
360                                       total_parts);
361                                 return WIMLIB_ERR_SPLIT_INVALID;
362                         } else {
363                                 parts_to_swms[swm->hdr.part_number - 2] = swm;
364                         }
365                 }
366         }
367         return 0;
368 }
369