4 * Capture a WIM image from a NTFS volume. We capture everything we can,
5 * including security data and alternate data streams.
9 * Copyright (C) 2012, 2013 Eric Biggers
11 * This file is part of wimlib, a library for working with WIM files.
13 * wimlib is free software; you can redistribute it and/or modify it under the
14 * terms of the GNU General Public License as published by the Free
15 * Software Foundation; either version 3 of the License, or (at your option)
18 * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
19 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
20 * A PARTICULAR PURPOSE. See the GNU General Public License for more
23 * You should have received a copy of the GNU General Public License
24 * along with wimlib; if not, see http://www.gnu.org/licenses/.
30 #include <ntfs-3g/endians.h>
31 #include <ntfs-3g/types.h>
33 #include "wimlib_internal.h"
37 #include "lookup_table.h"
38 #include "buffer_io.h"
39 #include <ntfs-3g/layout.h>
40 #include <ntfs-3g/acls.h>
41 #include <ntfs-3g/attrib.h>
42 #include <ntfs-3g/misc.h>
43 #include <ntfs-3g/reparse.h>
44 #include <ntfs-3g/security.h> /* security.h before xattrs.h */
45 #include <ntfs-3g/xattrs.h>
46 #include <ntfs-3g/volume.h>
52 static inline ntfschar *attr_record_name(ATTR_RECORD *ar)
54 return (ntfschar*)((u8*)ar + le16_to_cpu(ar->name_offset));
57 /* Calculates the SHA1 message digest of a NTFS attribute.
59 * @ni: The NTFS inode containing the attribute.
60 * @ar: The ATTR_RECORD describing the attribute.
61 * @md: If successful, the returned SHA1 message digest.
62 * @reparse_tag_ret: Optional pointer into which the first 4 bytes of the
63 * attribute will be written (to get the reparse
66 * Return 0 on success or nonzero on error.
68 static int ntfs_attr_sha1sum(ntfs_inode *ni, ATTR_RECORD *ar,
69 u8 md[SHA1_HASH_SIZE],
70 bool is_reparse_point,
75 char buf[BUFFER_SIZE];
79 na = ntfs_attr_open(ni, ar->type, attr_record_name(ar),
82 ERROR_WITH_ERRNO("Failed to open NTFS attribute");
83 return WIMLIB_ERR_NTFS_3G;
86 bytes_remaining = na->data_size;
88 if (is_reparse_point) {
89 if (ntfs_attr_pread(na, 0, 8, buf) != 8)
91 *reparse_tag_ret = le32_to_cpu(*(u32*)buf);
92 DEBUG("ReparseTag = %#x", *reparse_tag_ret);
98 while (bytes_remaining) {
99 s64 to_read = min(bytes_remaining, sizeof(buf));
100 if (ntfs_attr_pread(na, pos, to_read, buf) != to_read)
102 sha1_update(&ctx, buf, to_read);
104 bytes_remaining -= to_read;
106 sha1_final(md, &ctx);
110 ERROR_WITH_ERRNO("Error reading NTFS attribute");
111 return WIMLIB_ERR_NTFS_3G;
114 /* Load the streams from a file or reparse point in the NTFS volume into the WIM
116 static int capture_ntfs_streams(struct wim_dentry *dentry, ntfs_inode *ni,
117 char path[], size_t path_len,
118 struct wim_lookup_table *lookup_table,
119 ntfs_volume **ntfs_vol_p,
122 ntfs_attr_search_ctx *actx;
123 u8 attr_hash[SHA1_HASH_SIZE];
124 struct ntfs_location *ntfs_loc = NULL;
126 struct wim_lookup_table_entry *lte;
128 DEBUG2("Capturing NTFS data streams from `%s'", path);
130 /* Get context to search the streams of the NTFS file. */
131 actx = ntfs_attr_get_search_ctx(ni, NULL);
133 ERROR_WITH_ERRNO("Cannot get NTFS attribute search "
135 return WIMLIB_ERR_NTFS_3G;
138 /* Capture each data stream or reparse data stream. */
139 while (!ntfs_attr_lookup(type, NULL, 0,
140 CASE_SENSITIVE, 0, NULL, 0, actx))
142 char *stream_name_utf8;
144 u64 data_size = ntfs_get_attribute_value_length(actx->attr);
145 u64 name_length = actx->attr->name_length;
147 if (data_size == 0) {
149 ERROR_WITH_ERRNO("Failed to get size of attribute of "
151 ret = WIMLIB_ERR_NTFS_3G;
154 /* Empty stream. No lookup table entry is needed. */
157 if (type == AT_REPARSE_POINT && data_size < 8) {
158 ERROR("`%s': reparse point buffer too small",
160 ret = WIMLIB_ERR_NTFS_3G;
163 /* Checksum the stream. */
164 ret = ntfs_attr_sha1sum(ni, actx->attr, attr_hash,
165 type == AT_REPARSE_POINT, &reparse_tag);
169 if (type == AT_REPARSE_POINT)
170 dentry->d_inode->i_reparse_tag = reparse_tag;
172 /* Make a lookup table entry for the stream, or use an existing
173 * one if there's already an identical stream. */
174 lte = __lookup_resource(lookup_table, attr_hash);
175 ret = WIMLIB_ERR_NOMEM;
179 ntfs_loc = CALLOC(1, sizeof(*ntfs_loc));
182 ntfs_loc->ntfs_vol_p = ntfs_vol_p;
183 ntfs_loc->path_utf8 = MALLOC(path_len + 1);
184 if (!ntfs_loc->path_utf8)
185 goto out_free_ntfs_loc;
186 memcpy(ntfs_loc->path_utf8, path, path_len + 1);
188 ntfs_loc->stream_name_utf16 = MALLOC(name_length * 2);
189 if (!ntfs_loc->stream_name_utf16)
190 goto out_free_ntfs_loc;
191 memcpy(ntfs_loc->stream_name_utf16,
192 attr_record_name(actx->attr),
193 actx->attr->name_length * 2);
194 ntfs_loc->stream_name_utf16_num_chars = name_length;
197 lte = new_lookup_table_entry();
199 goto out_free_ntfs_loc;
200 lte->ntfs_loc = ntfs_loc;
201 lte->resource_location = RESOURCE_IN_NTFS_VOLUME;
202 if (type == AT_REPARSE_POINT) {
203 ntfs_loc->is_reparse_point = true;
204 lte->resource_entry.original_size = data_size - 8;
205 lte->resource_entry.size = data_size - 8;
207 ntfs_loc->is_reparse_point = false;
208 lte->resource_entry.original_size = data_size;
209 lte->resource_entry.size = data_size;
212 DEBUG("Add resource for `%s' (size = %"PRIu64")",
213 dentry->file_name_utf8,
214 lte->resource_entry.original_size);
215 copy_hash(lte->hash, attr_hash);
216 lookup_table_insert(lookup_table, lte);
219 if (name_length == 0) {
220 /* Unnamed data stream. Put the reference to it in the
223 if (dentry->d_inode->i_lte) {
224 ERROR("Found two un-named data streams for "
226 ret = WIMLIB_ERR_NTFS_3G;
229 dentry->d_inode->i_lte = lte;
231 if (dentry->d_inode->i_lte) {
232 WARNING("Found two un-named data streams for "
234 free_lookup_table_entry(lte);
236 dentry->d_inode->i_lte = lte;
240 /* Named data stream. Put the reference to it in the
241 * alternate data stream entries */
242 struct wim_ads_entry *new_ads_entry;
243 size_t stream_name_utf8_len;
245 ret = utf16_to_utf8((const char*)attr_record_name(actx->attr),
248 &stream_name_utf8_len);
251 new_ads_entry = inode_add_ads(dentry->d_inode, stream_name_utf8);
252 FREE(stream_name_utf8);
256 wimlib_assert(new_ads_entry->stream_name_len == name_length * 2);
258 new_ads_entry->lte = lte;
264 free_lookup_table_entry(lte);
267 FREE(ntfs_loc->path_utf8);
268 FREE(ntfs_loc->stream_name_utf16);
272 ntfs_attr_put_search_ctx(actx);
274 DEBUG2("Successfully captured NTFS streams from `%s'", path);
276 ERROR("Failed to capture NTFS streams from `%s", path);
280 /* Red-black tree that maps NTFS inode numbers to DOS names */
281 struct dos_name_map {
282 struct rb_root rb_root;
285 struct dos_name_node {
286 struct rb_node rb_node;
292 /* Inserts a new DOS name into the map */
293 static int insert_dos_name(struct dos_name_map *map,
294 const ntfschar *dos_name, int name_len,
297 struct dos_name_node *new_node;
299 struct rb_root *root;
300 struct rb_node *rb_parent;
302 DEBUG("DOS name_len = %d", name_len);
303 new_node = MALLOC(sizeof(struct dos_name_node));
307 /* DOS names are supposed to be 12 characters max (that's 24 bytes,
308 * assuming 2-byte ntfs characters) */
309 wimlib_assert(name_len * sizeof(ntfschar) <= sizeof(new_node->dos_name));
311 /* Initialize the DOS name, DOS name length, and NTFS inode number of
312 * the red-black tree node */
313 memcpy(new_node->dos_name, dos_name, name_len * sizeof(ntfschar));
314 new_node->name_len_bytes = name_len * sizeof(ntfschar);
315 new_node->ntfs_ino = ntfs_ino;
317 /* Insert the red-black tree node */
318 root = &map->rb_root;
322 struct dos_name_node *this;
324 this = container_of(*p, struct dos_name_node, rb_node);
326 if (new_node->ntfs_ino < this->ntfs_ino)
327 p = &((*p)->rb_left);
328 else if (new_node->ntfs_ino > this->ntfs_ino)
329 p = &((*p)->rb_right);
331 /* This should be impossible since a NTFS inode cannot
332 * have multiple DOS names, and we only should get each
333 * DOS name entry once from the ntfs_readdir() calls. */
334 ERROR("NTFS inode %"PRIu64" has multiple DOS names",
339 rb_link_node(&new_node->rb_node, rb_parent, p);
340 rb_insert_color(&new_node->rb_node, root);
341 DEBUG("Inserted DOS name for inode %"PRIu64, ntfs_ino);
345 /* Returns a structure that contains the DOS name and its length for a NTFS
346 * inode, or NULL if the inode has no DOS name. */
347 static struct dos_name_node *
348 lookup_dos_name(const struct dos_name_map *map, u64 ntfs_ino)
350 struct rb_node *node = map->rb_root.rb_node;
352 struct dos_name_node *this;
353 this = container_of(node, struct dos_name_node, rb_node);
354 if (ntfs_ino < this->ntfs_ino)
355 node = node->rb_left;
356 else if (ntfs_ino > this->ntfs_ino)
357 node = node->rb_right;
364 static int set_dentry_dos_name(struct wim_dentry *dentry, void *arg)
366 const struct dos_name_map *map = arg;
367 const struct dos_name_node *node;
369 if (dentry->is_win32_name) {
370 node = lookup_dos_name(map, dentry->d_inode->i_ino);
372 dentry->short_name = MALLOC(node->name_len_bytes);
373 if (!dentry->short_name)
374 return WIMLIB_ERR_NOMEM;
375 memcpy(dentry->short_name, node->dos_name,
376 node->name_len_bytes);
377 dentry->short_name_len = node->name_len_bytes;
378 DEBUG("Assigned DOS name to ino %"PRIu64,
379 dentry->d_inode->i_ino);
381 WARNING("NTFS inode %"PRIu64" has Win32 name with no "
382 "corresponding DOS name",
383 dentry->d_inode->i_ino);
389 static void free_dos_name_tree(struct rb_node *node) {
391 free_dos_name_tree(node->rb_left);
392 free_dos_name_tree(node->rb_right);
393 FREE(container_of(node, struct dos_name_node, rb_node));
397 static void destroy_dos_name_map(struct dos_name_map *map)
399 free_dos_name_tree(map->rb_root.rb_node);
403 struct wim_dentry *parent;
407 struct wim_lookup_table *lookup_table;
408 struct sd_set *sd_set;
409 struct dos_name_map *dos_name_map;
410 const struct capture_config *config;
411 ntfs_volume **ntfs_vol_p;
413 wimlib_progress_func_t progress_func;
417 build_dentry_tree_ntfs_recursive(struct wim_dentry **root_p, ntfs_inode *dir_ni,
418 ntfs_inode *ni, char path[], size_t path_len,
420 struct wim_lookup_table *lookup_table,
421 struct sd_set *sd_set,
422 const struct capture_config *config,
423 ntfs_volume **ntfs_vol_p,
425 wimlib_progress_func_t progress_func);
427 static int wim_ntfs_capture_filldir(void *dirent, const ntfschar *name,
428 const int name_len, const int name_type,
429 const s64 pos, const MFT_REF mref,
430 const unsigned dt_type)
432 struct readdir_ctx *ctx;
433 size_t utf8_name_len;
435 struct wim_dentry *child;
440 if (name_type & FILE_NAME_DOS) {
441 /* If this is the entry for a DOS name, store it for later. */
442 ret = insert_dos_name(ctx->dos_name_map, name,
443 name_len, mref & MFT_REF_MASK_CPU);
445 /* Return now if an error occurred or if this is just a DOS name
446 * and not a Win32+DOS name. */
447 if (ret != 0 || name_type == FILE_NAME_DOS)
450 ret = utf16_to_utf8((const char*)name, name_len * 2,
451 &utf8_name, &utf8_name_len);
455 if (utf8_name[0] == '.' &&
456 (utf8_name[1] == '\0' ||
457 (utf8_name[1] == '.' && utf8_name[2] == '\0'))) {
460 * note: name_type is POSIX for these, so DOS names will not
461 * have been inserted for them. */
463 goto out_free_utf8_name;
466 /* Open the inode for this directory entry and recursively capture the
467 * directory tree rooted at it */
468 ntfs_inode *ni = ntfs_inode_open(ctx->dir_ni->vol, mref);
470 ERROR_WITH_ERRNO("Failed to open NTFS inode");
471 goto out_free_utf8_name;
473 path_len = ctx->path_len;
475 ctx->path[path_len++] = '/';
476 memcpy(ctx->path + path_len, utf8_name, utf8_name_len + 1);
477 path_len += utf8_name_len;
479 ret = build_dentry_tree_ntfs_recursive(&child, ctx->dir_ni,
480 ni, ctx->path, path_len, name_type,
481 ctx->lookup_table, ctx->sd_set,
482 ctx->config, ctx->ntfs_vol_p,
483 ctx->add_image_flags,
486 dentry_add_child(ctx->parent, child);
487 ntfs_inode_close(ni);
493 /* Recursively build a WIM dentry tree corresponding to a NTFS volume.
494 * At the same time, update the WIM lookup table with lookup table entries for
495 * the NTFS streams, and build an array of security descriptors.
497 static int build_dentry_tree_ntfs_recursive(struct wim_dentry **root_p,
503 struct wim_lookup_table *lookup_table,
504 struct sd_set *sd_set,
505 const struct capture_config *config,
506 ntfs_volume **ntfs_vol_p,
508 wimlib_progress_func_t progress_func)
512 struct wim_dentry *root;
514 if (exclude_path(path, config, false)) {
515 /* Exclude a file or directory tree based on the capture
516 * configuration file */
517 if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
520 union wimlib_progress_info info;
521 info.scan.cur_path = path;
522 info.scan.excluded = true;
523 progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
529 /* Get file attributes */
530 struct SECURITY_CONTEXT ctx;
531 memset(&ctx, 0, sizeof(ctx));
533 ret = ntfs_xattr_system_getxattr(&ctx, XATTR_NTFS_ATTRIB,
534 ni, dir_ni, (char *)&attributes,
537 ERROR_WITH_ERRNO("Failed to get NTFS attributes from `%s'",
539 return WIMLIB_ERR_NTFS_3G;
542 if ((add_image_flags & WIMLIB_ADD_IMAGE_FLAG_VERBOSE)
545 union wimlib_progress_info info;
546 info.scan.cur_path = path;
547 info.scan.excluded = false;
548 progress_func(WIMLIB_PROGRESS_MSG_SCAN_DENTRY, &info);
551 /* Create the new WIM dentry */
552 root = new_dentry_with_timeless_inode(path_basename(path));
555 return WIMLIB_ERR_INVALID_UTF8_STRING;
556 else if (errno == ENOMEM)
557 return WIMLIB_ERR_NOMEM;
559 return WIMLIB_ERR_ICONV_NOT_AVAILABLE;
563 if (name_type & FILE_NAME_WIN32) /* Win32 or Win32+DOS name */
564 root->is_win32_name = 1;
565 root->d_inode->i_creation_time = le64_to_cpu(ni->creation_time);
566 root->d_inode->i_last_write_time = le64_to_cpu(ni->last_data_change_time);
567 root->d_inode->i_last_access_time = le64_to_cpu(ni->last_access_time);
568 root->d_inode->i_attributes = le32_to_cpu(attributes);
569 root->d_inode->i_ino = ni->mft_no;
570 root->d_inode->i_resolved = 1;
572 if (attributes & FILE_ATTR_REPARSE_POINT) {
573 /* Junction point, symbolic link, or other reparse point */
574 ret = capture_ntfs_streams(root, ni, path, path_len,
575 lookup_table, ntfs_vol_p,
577 } else if (ni->mrec->flags & MFT_RECORD_IS_DIRECTORY) {
579 /* Normal directory */
581 struct dos_name_map dos_name_map = { .rb_root = {.rb_node = NULL} };
582 struct readdir_ctx ctx = {
586 .path_len = path_len,
587 .lookup_table = lookup_table,
589 .dos_name_map = &dos_name_map,
591 .ntfs_vol_p = ntfs_vol_p,
592 .add_image_flags = add_image_flags,
593 .progress_func = progress_func,
595 ret = ntfs_readdir(ni, &pos, &ctx, wim_ntfs_capture_filldir);
597 ERROR_WITH_ERRNO("ntfs_readdir()");
598 ret = WIMLIB_ERR_NTFS_3G;
600 ret = for_dentry_child(root, set_dentry_dos_name,
603 destroy_dos_name_map(&dos_name_map);
606 ret = capture_ntfs_streams(root, ni, path, path_len,
607 lookup_table, ntfs_vol_p,
613 /* Get security descriptor */
617 ret = ntfs_xattr_system_getxattr(&ctx, XATTR_NTFS_ACL,
620 if (ret > sizeof(sd)) {
622 ret = ntfs_xattr_system_getxattr(&ctx, XATTR_NTFS_ACL,
623 ni, dir_ni, sd, ret);
626 root->d_inode->i_security_id = sd_set_add_sd(sd_set, sd, ret);
627 if (root->d_inode->i_security_id == -1) {
628 ERROR("Out of memory");
629 return WIMLIB_ERR_NOMEM;
631 DEBUG("Added security ID = %u for `%s'",
632 root->d_inode->i_security_id, path);
634 } else if (ret < 0) {
635 ERROR_WITH_ERRNO("Failed to get security information from "
637 ret = WIMLIB_ERR_NTFS_3G;
639 root->d_inode->i_security_id = -1;
640 DEBUG("No security ID for `%s'", path);
645 int build_dentry_tree_ntfs(struct wim_dentry **root_p,
647 struct wim_lookup_table *lookup_table,
648 struct wim_security_data *sd,
649 const struct capture_config *config,
651 wimlib_progress_func_t progress_func,
657 struct sd_set sd_set = {
661 ntfs_volume **ntfs_vol_p = extra_arg;
663 DEBUG("Mounting NTFS volume `%s' read-only", device);
665 #ifdef HAVE_NTFS_MNT_RDONLY
667 vol = ntfs_mount(device, NTFS_MNT_RDONLY);
669 /* NTFS-3g 2011, 2012 */
670 vol = ntfs_mount(device, MS_RDONLY);
673 ERROR_WITH_ERRNO("Failed to mount NTFS volume `%s' read-only",
675 return WIMLIB_ERR_NTFS_3G;
677 ntfs_open_secure(vol);
679 /* We don't want to capture the special NTFS files such as $Bitmap. Not
680 * to be confused with "hidden" or "system" files which are real files
681 * that we do need to capture. */
682 NVolClearShowSysFiles(vol);
684 DEBUG("Opening root NTFS dentry");
685 root_ni = ntfs_inode_open(vol, FILE_root);
687 ERROR_WITH_ERRNO("Failed to open root inode of NTFS volume "
689 ret = WIMLIB_ERR_NTFS_3G;
693 /* Currently we assume that all the UTF-8 paths fit into this length and
694 * there is no check for overflow. */
695 char *path = MALLOC(32768);
697 ERROR("Could not allocate memory for NTFS pathname");
698 ret = WIMLIB_ERR_NOMEM;
704 ret = build_dentry_tree_ntfs_recursive(root_p, NULL, root_ni, path, 1,
705 FILE_NAME_POSIX, lookup_table,
712 ntfs_inode_close(root_ni);
713 destroy_sd_set(&sd_set);
715 ntfs_index_ctx_put(vol->secure_xsii);
716 ntfs_index_ctx_put(vol->secure_xsdh);
717 ntfs_inode_close(vol->secure_ni);
720 if (ntfs_umount(vol, FALSE) != 0) {
721 ERROR_WITH_ERRNO("Failed to unmount NTFS volume `%s'",
724 ret = WIMLIB_ERR_NTFS_3G;
727 /* We need to leave the NTFS volume mounted so that we can read
728 * the NTFS files again when we are actually writing the WIM */