NTFS capture (IN PROGRESS)
[wimlib] / src / ntfs-capture.c
1 /*
2  * ntfs-capture.c
3  *
4  * Capture a WIM image from a NTFS volume.  We capture everything we can,
5  * including security data and alternate data streams.  There should be no loss
6  * of information.
7  */
8
9 /*
10  * Copyright (C) 2012 Eric Biggers
11  *
12  * This file is part of wimlib, a library for working with WIM files.
13  *
14  * wimlib is free software; you can redistribute it and/or modify it under the
15  * terms of the GNU Lesser General Public License as published by the Free
16  * Software Foundation; either version 2.1 of the License, or (at your option)
17  * any later version.
18  *
19  * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
20  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
21  * A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
22  * details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with wimlib; if not, see http://www.gnu.org/licenses/.
26  */
27
28 #include "config.h"
29 #include "wimlib_internal.h"
30
31
32 #ifdef WITH_NTFS_3G
33 #include "dentry.h"
34 #include "lookup_table.h"
35 #include "io.h"
36 #include <ntfs-3g/layout.h>
37 #include <ntfs-3g/acls.h>
38 #include <ntfs-3g/attrib.h>
39 #include <ntfs-3g/misc.h>
40 #include <ntfs-3g/reparse.h>
41 #include <ntfs-3g/security.h>
42 #include <ntfs-3g/volume.h>
43 #include <stdlib.h>
44 #include <unistd.h>
45
46 extern int ntfs_inode_get_security(ntfs_inode *ni, u32 selection, char *buf,
47                                    u32 buflen, u32 *psize);
48
49 extern int ntfs_inode_get_attributes(ntfs_inode *ni);
50
51 struct sd_tree {
52         u32 num_sds;
53         struct wim_security_data *sd;
54         struct sd_node *root;
55 };
56
57 struct sd_node {
58         int security_id;
59         u8 hash[SHA1_HASH_SIZE];
60         struct sd_node *left;
61         struct sd_node *right;
62 };
63
64 static void free_sd_tree(struct sd_node *root)
65 {
66         if (root) {
67                 free_sd_tree(root->left);
68                 free_sd_tree(root->right);
69                 FREE(root);
70         }
71 }
72
73 static void insert_sd_node(struct sd_node *new, struct sd_node *root)
74 {
75         int cmp = hashes_cmp(root->hash, new->hash);
76         if (cmp < 0) {
77                 if (root->left)
78                         insert_sd_node(new, root->left);
79                 else 
80                         root->left = new;
81         } else if (cmp > 0) {
82                 if (root->right)
83                         insert_sd_node(new, root->right);
84                 else 
85                         root->right = new;
86         } else {
87                 wimlib_assert(0);
88         }
89 }
90
91 static int lookup_sd(const u8 hash[SHA1_HASH_SIZE], struct sd_node *node)
92 {
93         int cmp;
94         if (!node)
95                 return -1;
96         cmp = hashes_cmp(hash, node->hash);
97         if (cmp < 0)
98                 return lookup_sd(hash, node->left);
99         else if (cmp > 0)
100                 return lookup_sd(hash, node->right);
101         else
102                 return node->security_id;
103 }
104
105 static int tree_add_sd(struct sd_tree *tree, const u8 *descriptor,
106                        size_t size)
107 {
108         u8 hash[SHA1_HASH_SIZE];
109         int security_id;
110         struct sd_node *new;
111         u8 **descriptors;
112         u64 *sizes;
113         u8 *descr_copy;
114         struct wim_security_data *sd = tree->sd;
115         sha1_buffer(descriptor, size, hash);
116
117         security_id = lookup_sd(hash, tree->root);
118         if (security_id >= 0)
119                 return security_id;
120
121         new = MALLOC(sizeof(struct sd_node));
122         if (!new)
123                 return -1;
124         descr_copy = MALLOC(size);
125         if (!descr_copy)
126                 goto out_free_node;
127         memcpy(descr_copy, descriptor, size);
128         new->security_id = tree->num_sds++;
129         new->left = NULL;
130         new->right = NULL;
131         copy_hash(new->hash, hash);
132
133         descriptors = REALLOC(sd->descriptors,
134                               (sd->num_entries + 1) * sizeof(sd->descriptors[0]));
135         if (!descriptors)
136                 goto out_free_descr;
137         sd->descriptors = descriptors;
138         sizes = REALLOC(sd->sizes,
139                         (sd->num_entries + 1) * sizeof(sd->sizes[0]));
140         if (!sizes)
141                 goto out_free_descr;
142         sd->sizes = sizes;
143         sd->descriptors[sd->num_entries] = descr_copy;
144         sd->sizes[sd->num_entries] = size;
145         sd->num_entries++;
146         sd->total_length += size + 8;
147
148         if (tree->root)
149                 insert_sd_node(tree->root, new);
150         else
151                 tree->root = new;
152         return new->security_id;
153 out_free_descr:
154         FREE(descr_copy);
155 out_free_node:
156         FREE(new);
157         return -1;
158 }
159
160 #if 0
161 static int build_sd_tree(struct wim_security_data *sd, struct sd_tree *tree)
162 {
163         int ret;
164         u32 orig_num_entries = sd->num_entries;
165         u32 orig_total_length = sd->total_length;
166
167         tree->num_sds = 0;
168         tree->sd = sd;
169         tree->root = NULL;
170
171         for (u32 i = 0; i < sd->num_entries; i++) {
172                 ret = tree_add_sd(tree, sd->descriptors[i], sd->sizes[i]);
173                 if (ret < 0)
174                         goto out_revert;
175         }
176         return 0;
177 out_revert:
178         sd->num_entries = orig_num_entries;
179         sd->total_length = orig_total_length;
180         free_sd_tree(tree->root);
181         return ret;
182 }
183 #endif
184
185 static int ntfs_attr_sha1sum(ntfs_inode *ni, ATTR_RECORD *ar,
186                              u8 md[SHA1_HASH_SIZE])
187 {
188         s64 pos = 0;
189         s64 bytes_remaining;
190         char buf[4096];
191         ntfs_attr *na;
192         SHA_CTX ctx;
193
194         na = ntfs_attr_open(ni, ar->type,
195                             (ntfschar*)((u8*)ar + le16_to_cpu(ar->name_offset)),
196                             ar->name_length);
197         if (!na) {
198                 ERROR_WITH_ERRNO("Failed to open NTFS attribute");
199                 return WIMLIB_ERR_NTFS_3G;
200         }
201
202         bytes_remaining = na->data_size;
203         sha1_init(&ctx);
204
205         while (bytes_remaining) {
206                 s64 to_read = min(bytes_remaining, sizeof(buf));
207                 if (ntfs_attr_pread(na, pos, to_read, buf) != to_read) {
208                         ERROR_WITH_ERRNO("Error reading NTFS attribute");
209                         return WIMLIB_ERR_NTFS_3G;
210                 }
211                 sha1_update(&ctx, buf, to_read);
212                 pos += to_read;
213                 bytes_remaining -= to_read;
214         }
215         sha1_final(md, &ctx);
216         ntfs_attr_close(na);
217         return 0;
218 }
219
220 /* Load a normal file in the NTFS volume into the WIM lookup table */
221 static int capture_normal_ntfs_file(struct dentry *dentry, ntfs_inode *ni,
222                                     char path[], size_t path_len,
223                                     struct lookup_table *lookup_table,
224                                     ntfs_volume **ntfs_vol_p)
225 {
226
227         ntfs_attr_search_ctx *actx;
228         u8 attr_hash[SHA1_HASH_SIZE];
229         struct ntfs_location *ntfs_loc;
230         struct lookup_table_entry *lte;
231         int ret = 0;
232
233         actx = ntfs_attr_get_search_ctx(ni, NULL);
234         if (!actx) {
235                 ERROR_WITH_ERRNO("Cannot get attribute search "
236                                  "context");
237                 return WIMLIB_ERR_NTFS_3G;
238         }
239         while (!ntfs_attr_lookup(AT_DATA, NULL, 0,
240                                  CASE_SENSITIVE, 0, NULL, 0, actx))
241         {
242                 ret = ntfs_attr_sha1sum(ni, actx->attr, attr_hash);
243                 if (ret != 0)
244                         goto out_put_actx;
245                 lte = __lookup_resource(lookup_table, attr_hash);
246                 if (lte) {
247                         lte->refcnt++;
248                 } else {
249                         struct ntfs_location *ntfs_loc;
250
251                         ret = WIMLIB_ERR_NOMEM;
252
253                         ntfs_loc = CALLOC(1, sizeof(*ntfs_loc));
254                         if (!ntfs_loc) {
255                                 goto out_put_actx;
256                         }
257                         ntfs_loc->path_utf8 = MALLOC(path_len + 1);
258                         if (!ntfs_loc->path_utf8)
259                                 goto out_put_actx;
260                         memcpy(ntfs_loc->path_utf8, path, path_len + 1);
261                         ntfs_loc->stream_name_utf16 = MALLOC(actx->attr->name_length * 2);
262                         if (!ntfs_loc->stream_name_utf16)
263                                 goto out_put_actx;
264                         memcpy(ntfs_loc->stream_name_utf16,
265                                (u8*)actx->attr +
266                                         le16_to_cpu(actx->attr->name_offset),
267                                actx->attr->name_length * 2);
268
269                         ntfs_loc->stream_name_utf16_num_chars = actx->attr->name_length;
270                         lte = new_lookup_table_entry();
271                         if (!lte)
272                                 goto out_put_actx;
273                         lte->ntfs_loc = ntfs_loc;
274                         lte->resource_location = RESOURCE_IN_NTFS_VOLUME;
275                         lte->resource_entry.original_size = actx->attr->data_size;
276                         lte->resource_entry.size = actx->attr->data_size;
277                         copy_hash(lte->hash, attr_hash);
278                         lookup_table_insert(lookup_table, lte);
279                 }
280                 dentry->lte = lte;
281         }
282         goto out_put_actx;
283 out_free_ntfs_loc:
284         if (ntfs_loc) {
285                 FREE(ntfs_loc->path_utf8);
286                 FREE(ntfs_loc->stream_name_utf16);
287                 FREE(ntfs_loc);
288         }
289 out_put_actx:
290         ntfs_attr_put_search_ctx(actx);
291         return ret;
292 }
293
294 static int __build_dentry_tree_ntfs(struct dentry *dentry, ntfs_inode *ni,
295                                     char path[], size_t path_len,
296                                     struct lookup_table *lookup_table,
297                                     struct sd_tree *tree,
298                                     ntfs_volume **ntfs_vol_p)
299 {
300         u32 attributes = ntfs_inode_get_attributes(ni);
301         int mrec_flags = ni->mrec->flags;
302         u32 sd_size;
303         int ret = 0;
304
305         dentry->creation_time    = le64_to_cpu(ni->creation_time);
306         dentry->last_write_time  = le64_to_cpu(ni->last_data_change_time);
307         dentry->last_access_time = le64_to_cpu(ni->last_access_time);
308         dentry->security_id      = le32_to_cpu(ni->security_id);
309         dentry->attributes       = le32_to_cpu(attributes);
310         dentry->resolved = true;
311
312         if (attributes & FILE_ATTR_REPARSE_POINT) {
313                 /* Junction point, symbolic link, or other reparse point */
314         } else if (mrec_flags & MFT_RECORD_IS_DIRECTORY) {
315                 /* Normal directory */
316         } else {
317                 /* Normal file */
318                 ret = capture_normal_ntfs_file(dentry, ni, path, path_len,
319                                                lookup_table, ntfs_vol_p);
320         }
321         if (ret != 0)
322                 return ret;
323         ret = ntfs_inode_get_security(ni,
324                                       OWNER_SECURITY_INFORMATION |
325                                       GROUP_SECURITY_INFORMATION |
326                                       DACL_SECURITY_INFORMATION  |
327                                       SACL_SECURITY_INFORMATION,
328                                       NULL, 0, &sd_size);
329         u8 sd[sd_size];
330         ret = ntfs_inode_get_security(ni,
331                                       OWNER_SECURITY_INFORMATION |
332                                       GROUP_SECURITY_INFORMATION |
333                                       DACL_SECURITY_INFORMATION  |
334                                       SACL_SECURITY_INFORMATION,
335                                       sd, sd_size, &sd_size);
336         dentry->security_id = tree_add_sd(tree, sd, sd_size);
337         return 0;
338 }
339
340 static int build_dentry_tree_ntfs(struct dentry *root_dentry,
341                                   const char *device,
342                                   struct lookup_table *lookup_table,
343                                   struct wim_security_data *sd,
344                                   int flags,
345                                   void *extra_arg)
346 {
347         ntfs_volume *vol;
348         ntfs_inode *root_ni;
349         int ret = 0;
350         struct sd_tree tree;
351         tree.sd = sd;
352         tree.root = NULL;
353         ntfs_volume **ntfs_vol_p = extra_arg;
354         
355         vol = ntfs_mount(device, MS_RDONLY);
356         if (!vol) {
357                 ERROR_WITH_ERRNO("Failed to mount NTFS volume `%s' read-only",
358                                  device);
359                 return WIMLIB_ERR_NTFS_3G;
360         }
361         root_ni = ntfs_inode_open(vol, FILE_root);
362         if (!root_ni) {
363                 ERROR_WITH_ERRNO("Failed to open root inode of NTFS volume "
364                                  "`%s'", device);
365                 ret = WIMLIB_ERR_NTFS_3G;
366                 goto out;
367         }
368         char path[4096];
369         path[0] = '/';
370         path[1] = '\0';
371         ret = __build_dentry_tree_ntfs(root_dentry, root_ni, path, 1,
372                                        lookup_table, &tree, ntfs_vol_p);
373         ntfs_inode_close(root_ni);
374
375 out:
376         if (ntfs_umount(vol, FALSE) != 0) {
377                 ERROR_WITH_ERRNO("Failed to unmount NTFS volume `%s'", device);
378                 if (ret == 0)
379                         ret = WIMLIB_ERR_NTFS_3G;
380         }
381         return ret;
382 }
383
384 WIMLIBAPI int wimlib_add_image_from_ntfs_volume(WIMStruct *w,
385                                                 const char *device,
386                                                 const char *name,
387                                                 const char *description,
388                                                 const char *flags_element,
389                                                 int flags)
390 {
391         if (flags & (WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE)) {
392                 ERROR("Cannot dereference files when capturing directly from NTFS");
393                 return WIMLIB_ERR_INVALID_PARAM;
394         }
395         return do_add_image(w, device, name, description, flags_element, flags,
396                             build_dentry_tree_ntfs,
397                             &w->ntfs_vol);
398 }
399
400 #else /* WITH_NTFS_3G */
401 WIMLIBAPI int wimlib_add_image_from_ntfs_volume(WIMStruct *w,
402                                                 const char *device,
403                                                 const char *name,
404                                                 const char *description,
405                                                 const char *flags_element,
406                                                 int flags)
407 {
408         ERROR("wimlib was compiled without support for NTFS-3g, so");
409         ERROR("we cannot capture a WIM image directly from a NTFS volume");
410         return WIMLIB_ERR_UNSUPPORTED;
411 }
412 #endif /* WITH_NTFS_3G */