4 * Capture a WIM image directly from an NTFS volume using libntfs-3g. We capture
5 * everything we can, including security data and alternate data streams.
9 * Copyright (C) 2012, 2013, 2014 Eric Biggers
11 * This file is free software; you can redistribute it and/or modify it under
12 * the terms of the GNU Lesser General Public License as published by the Free
13 * Software Foundation; either version 3 of the License, or (at your option) any
16 * This file is distributed in the hope that it will be useful, but WITHOUT
17 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
21 * You should have received a copy of the GNU Lesser General Public License
22 * along with this file; if not, see http://www.gnu.org/licenses/.
38 #include <ntfs-3g/attrib.h>
39 #include <ntfs-3g/reparse.h>
40 #include <ntfs-3g/security.h>
41 #include <ntfs-3g/volume.h>
43 #include "wimlib/capture.h"
44 #include "wimlib/dentry.h"
45 #include "wimlib/encoding.h"
46 #include "wimlib/endianness.h"
47 #include "wimlib/error.h"
48 #include "wimlib/lookup_table.h"
49 #include "wimlib/ntfs_3g.h"
50 #include "wimlib/paths.h"
51 #include "wimlib/security.h"
53 static inline ntfschar *
54 attr_record_name(ATTR_RECORD *ar)
56 return (ntfschar*)((u8*)ar + le16_to_cpu(ar->name_offset));
60 open_ntfs_attr(ntfs_inode *ni, struct ntfs_location *loc)
64 na = ntfs_attr_open(ni,
65 loc->is_reparse_point ? AT_REPARSE_POINT : AT_DATA,
67 loc->stream_name_nchars);
69 ERROR_WITH_ERRNO("Failed to open attribute of \"%"TS"\" in "
70 "NTFS volume", loc->path);
76 read_ntfs_file_prefix(const struct wim_lookup_table_entry *lte, u64 size,
77 consume_data_callback_t cb, void *cb_ctx)
79 struct ntfs_location *loc = lte->ntfs_loc;
80 ntfs_volume *vol = loc->ntfs_vol;
88 ni = ntfs_pathname_to_inode(vol, NULL, loc->path);
90 ERROR_WITH_ERRNO("Can't find NTFS inode for \"%"TS"\"", loc->path);
91 ret = WIMLIB_ERR_NTFS_3G;
95 na = open_ntfs_attr(ni, loc);
97 ret = WIMLIB_ERR_NTFS_3G;
98 goto out_close_ntfs_inode;
101 pos = (loc->is_reparse_point) ? 8 : 0;
102 bytes_remaining = size;
103 while (bytes_remaining) {
104 s64 to_read = min(bytes_remaining, sizeof(buf));
105 if (ntfs_attr_pread(na, pos, to_read, buf) != to_read) {
106 ERROR_WITH_ERRNO("Error reading \"%"TS"\"", loc->path);
107 ret = WIMLIB_ERR_NTFS_3G;
108 goto out_close_ntfs_attr;
111 bytes_remaining -= to_read;
112 ret = cb(buf, to_read, cb_ctx);
114 goto out_close_ntfs_attr;
119 out_close_ntfs_inode:
120 ntfs_inode_close(ni);
126 read_reparse_tag(ntfs_inode *ni, struct ntfs_location *loc,
127 u32 *reparse_tag_ret)
133 na = open_ntfs_attr(ni, loc);
135 ret = WIMLIB_ERR_NTFS_3G;
139 if (ntfs_attr_pread(na, 0, sizeof(reparse_tag),
140 &reparse_tag) != sizeof(reparse_tag))
142 ERROR_WITH_ERRNO("Error reading reparse data");
143 ret = WIMLIB_ERR_NTFS_3G;
144 goto out_close_ntfs_attr;
146 *reparse_tag_ret = le32_to_cpu(reparse_tag);
147 DEBUG("ReparseTag = %#x", *reparse_tag_ret);
156 /* Load the streams from a file or reparse point in the NTFS volume */
158 capture_ntfs_streams(struct wim_inode *inode,
162 struct list_head *unhashed_streams,
166 ntfs_attr_search_ctx *actx;
167 struct ntfs_location *ntfs_loc;
169 struct wim_lookup_table_entry *lte;
171 DEBUG("Capturing NTFS data streams from `%s'", path);
173 /* Get context to search the streams of the NTFS file. */
174 actx = ntfs_attr_get_search_ctx(ni, NULL);
176 ERROR_WITH_ERRNO("Cannot get NTFS attribute search "
177 "context for \"%s\"", path);
178 return WIMLIB_ERR_NTFS_3G;
181 /* Capture each data stream or reparse data stream. */
182 while (!ntfs_attr_lookup(type, NULL, 0,
183 CASE_SENSITIVE, 0, NULL, 0, actx))
185 u64 data_size = ntfs_get_attribute_value_length(actx->attr);
186 u64 name_length = actx->attr->name_length;
189 if (data_size == 0) {
190 /* Empty stream. No lookup table entry is needed. */
194 ntfs_loc = CALLOC(1, sizeof(*ntfs_loc));
196 ret = WIMLIB_ERR_NOMEM;
199 ntfs_loc->ntfs_vol = vol;
200 ntfs_loc->path = memdup(path, path_len + 1);
201 if (!ntfs_loc->path) {
202 ret = WIMLIB_ERR_NOMEM;
203 goto out_free_ntfs_loc;
206 ntfs_loc->stream_name = memdup(attr_record_name(actx->attr),
208 if (!ntfs_loc->stream_name) {
209 ret = WIMLIB_ERR_NOMEM;
210 goto out_free_ntfs_loc;
212 ntfs_loc->stream_name_nchars = name_length;
215 lte = new_lookup_table_entry();
217 ret = WIMLIB_ERR_NOMEM;
218 goto out_free_ntfs_loc;
220 lte->resource_location = RESOURCE_IN_NTFS_VOLUME;
221 lte->ntfs_loc = ntfs_loc;
223 if (type == AT_REPARSE_POINT) {
225 ERROR("Invalid reparse data on \"%s\" "
226 "(only %u bytes)!", path, (unsigned)data_size);
227 ret = WIMLIB_ERR_NTFS_3G;
230 lte->ntfs_loc->is_reparse_point = true;
231 lte->size = data_size - 8;
232 ret = read_reparse_tag(ni, lte->ntfs_loc,
233 &inode->i_reparse_tag);
237 lte->ntfs_loc->is_reparse_point = false;
238 lte->size = data_size;
241 if (name_length == 0) {
242 /* Unnamed data stream. Put the reference to it in the
246 if (!(inode->i_attributes &
247 FILE_ATTRIBUTE_REPARSE_POINT))
249 WARNING("Found two un-named "
250 "data streams for \"%s\" "
251 "(sizes = %"PRIu64", "
257 free_lookup_table_entry(lte);
265 /* Named data stream. Put the reference to it in the
266 * alternate data stream entries */
267 struct wim_ads_entry *new_ads_entry;
269 new_ads_entry = inode_add_ads_utf16le(inode,
270 attr_record_name(actx->attr),
272 if (!new_ads_entry) {
273 ret = WIMLIB_ERR_NOMEM;
276 wimlib_assert(new_ads_entry->stream_name_nbytes == name_length * 2);
277 stream_id = new_ads_entry->stream_id;
278 new_ads_entry->lte = lte;
281 add_unhashed_stream(lte, inode,
282 stream_id, unhashed_streams);
285 if (errno == ENOENT) {
288 ERROR_WITH_ERRNO("Error listing NTFS attributes of \"%s\"", path);
289 ret = WIMLIB_ERR_NTFS_3G;
293 free_lookup_table_entry(lte);
296 FREE(ntfs_loc->path);
297 FREE(ntfs_loc->stream_name);
301 ntfs_attr_put_search_ctx(actx);
303 DEBUG("Successfully captured NTFS streams from \"%s\"", path);
305 ERROR("Failed to capture NTFS streams from \"%s\"", path);
309 /* Binary tree that maps NTFS inode numbers to DOS names */
310 struct dos_name_map {
311 struct avl_tree_node *root;
314 struct dos_name_node {
315 struct avl_tree_node index_node;
321 #define DOS_NAME_NODE(avl_node) \
322 avl_tree_entry(avl_node, struct dos_name_node, index_node)
325 _avl_cmp_by_ntfs_ino(const struct avl_tree_node *n1,
326 const struct avl_tree_node *n2)
328 return cmp_u64(DOS_NAME_NODE(n1)->ntfs_ino,
329 DOS_NAME_NODE(n2)->ntfs_ino);
332 /* Inserts a new DOS name into the map */
334 insert_dos_name(struct dos_name_map *map, const ntfschar *dos_name,
335 size_t name_nbytes, le64 ntfs_ino)
337 struct dos_name_node *new_node;
339 DEBUG("DOS name_len = %zu", name_nbytes);
340 new_node = MALLOC(sizeof(struct dos_name_node));
342 return WIMLIB_ERR_NOMEM;
344 /* DOS names are supposed to be 12 characters max (that's 24 bytes,
345 * assuming 2-byte ntfs characters) */
346 wimlib_assert(name_nbytes <= sizeof(new_node->dos_name));
348 /* Initialize the DOS name, DOS name length, and NTFS inode number of
349 * the search tree node */
350 memcpy(new_node->dos_name, dos_name, name_nbytes);
351 new_node->name_nbytes = name_nbytes;
352 new_node->ntfs_ino = ntfs_ino;
354 /* Insert the search tree node */
355 if (avl_tree_insert(&map->root, &new_node->index_node,
356 _avl_cmp_by_ntfs_ino))
358 /* This should be impossible since an NTFS inode cannot
359 * have multiple DOS names, and we only should get each
360 * DOS name entry once from the ntfs_readdir() calls. */
361 ERROR("NTFS inode %"PRIu64" has multiple DOS names",
362 le64_to_cpu(ntfs_ino));
364 return WIMLIB_ERR_NOMEM;
366 DEBUG("Inserted DOS name for inode %"PRIu64, le64_to_cpu(ntfs_ino));
370 /* Returns a structure that contains the DOS name and its length for an NTFS
371 * inode, or NULL if the inode has no DOS name. */
372 static struct dos_name_node *
373 lookup_dos_name(const struct dos_name_map *map, u64 ntfs_ino)
375 struct dos_name_node dummy;
376 struct avl_tree_node *res;
378 dummy.ntfs_ino = cpu_to_le64(ntfs_ino);
380 res = avl_tree_lookup_node(map->root, &dummy.index_node,
381 _avl_cmp_by_ntfs_ino);
384 return DOS_NAME_NODE(res);
388 set_dentry_dos_name(struct wim_dentry *dentry, const struct dos_name_map *map)
390 const struct dos_name_node *node;
392 if (dentry->is_win32_name) {
393 node = lookup_dos_name(map, dentry->d_inode->i_ino);
395 dentry->short_name = utf16le_dupz((const utf16lechar *)node->dos_name,
397 if (!dentry->short_name)
398 return WIMLIB_ERR_NOMEM;
399 dentry->short_name_nbytes = node->name_nbytes;
400 DEBUG("Assigned DOS name to ino %"PRIu64,
401 dentry->d_inode->i_ino);
403 WARNING("NTFS inode %"PRIu64" has Win32 name with no "
404 "corresponding DOS name",
405 dentry->d_inode->i_ino);
412 free_dos_name_tree(struct avl_tree_node *node) {
414 free_dos_name_tree(node->left);
415 free_dos_name_tree(node->right);
416 FREE(DOS_NAME_NODE(node));
421 destroy_dos_name_map(struct dos_name_map *map)
423 free_dos_name_tree(map->root);
427 struct wim_dentry *parent;
430 struct dos_name_map *dos_name_map;
432 struct capture_params *params;
437 build_dentry_tree_ntfs_recursive(struct wim_dentry **root_p,
442 ntfs_volume *ntfs_vol,
443 struct capture_params *params);
446 wim_ntfs_capture_filldir(void *dirent, const ntfschar *name,
447 const int name_nchars, const int name_type,
448 const s64 pos, const MFT_REF mref,
449 const unsigned dt_type)
451 struct readdir_ctx *ctx;
452 size_t mbs_name_nbytes;
454 struct wim_dentry *child;
457 size_t name_nbytes = name_nchars * sizeof(ntfschar);
460 if (name_type & FILE_NAME_DOS) {
461 /* If this is the entry for a DOS name, store it for later. */
462 ret = insert_dos_name(ctx->dos_name_map, name,
463 name_nbytes, mref & MFT_REF_MASK_CPU);
465 /* Return now if an error occurred or if this is just a DOS name
466 * and not a Win32+DOS name. */
467 if (ret != 0 || name_type == FILE_NAME_DOS)
470 ret = utf16le_to_tstr(name, name_nbytes,
471 &mbs_name, &mbs_name_nbytes);
475 if (mbs_name[0] == '.' &&
476 (mbs_name[1] == '\0' ||
477 (mbs_name[1] == '.' && mbs_name[2] == '\0'))) {
480 * note: name_type is POSIX for these, so DOS names will not
481 * have been inserted for them. */
483 goto out_free_mbs_name;
486 /* Open the inode for this directory entry and recursively capture the
487 * directory tree rooted at it */
488 ntfs_inode *ni = ntfs_inode_open(ctx->vol, mref);
490 /* XXX This used to be treated as an error, but NTFS-3g seemed
491 * to be unable to read some inodes on a Windows 8 image for
493 WARNING_WITH_ERRNO("Failed to open NTFS file \"%s/%s\"",
494 ctx->path, mbs_name);
496 goto out_free_mbs_name;
498 path_len = ctx->path_len;
500 ctx->path[path_len++] = '/';
501 memcpy(ctx->path + path_len, mbs_name, mbs_name_nbytes + 1);
502 path_len += mbs_name_nbytes;
504 ret = build_dentry_tree_ntfs_recursive(&child, ni, ctx->path,
506 ctx->vol, ctx->params);
507 path_len -= mbs_name_nbytes + 1;
509 dentry_add_child(ctx->parent, child);
510 ntfs_inode_close(ni);
514 ctx->path[ctx->path_len] = '\0';
519 /* Recursively build a WIM dentry tree corresponding to an NTFS volume.
520 * At the same time, update the WIM lookup table with lookup table entries for
521 * the NTFS streams, and build an array of security descriptors.
524 build_dentry_tree_ntfs_recursive(struct wim_dentry **root_ret,
530 struct capture_params *params)
534 struct wim_dentry *root = NULL;
535 struct wim_inode *inode = NULL;
537 ret = try_exclude(path, path_len, params);
538 if (ret < 0) /* Excluded? */
540 if (ret > 0) /* Error? */
543 /* Get file attributes */
544 ret = ntfs_get_ntfs_attrib(ni, (char*)&attributes, sizeof(attributes));
545 if (ret != sizeof(attributes)) {
546 ERROR_WITH_ERRNO("Failed to get NTFS attributes from \"%s\"", path);
547 ret = WIMLIB_ERR_NTFS_3G;
551 if ((attributes & (FILE_ATTRIBUTE_DIRECTORY |
552 FILE_ATTRIBUTE_ENCRYPTED)) == FILE_ATTRIBUTE_ENCRYPTED)
554 if (params->add_flags & WIMLIB_ADD_FLAG_NO_UNSUPPORTED_EXCLUDE)
556 ERROR("Can't archive unsupported encrypted file \"%s\"", path);
557 ret = WIMLIB_ERR_UNSUPPORTED_FILE;
560 params->progress.scan.cur_path = path;
561 ret = do_capture_progress(params, WIMLIB_SCAN_DENTRY_UNSUPPORTED, NULL);
565 /* Create a WIM dentry with an associated inode, which may be shared */
566 ret = inode_table_new_dentry(params->inode_table,
567 path_basename_with_len(path, path_len),
568 ni->mft_no, 0, false, &root);
572 if (name_type & FILE_NAME_WIN32) /* Win32 or Win32+DOS name (rather than POSIX) */
573 root->is_win32_name = 1;
575 inode = root->d_inode;
577 if (inode->i_nlink > 1) {
578 /* Shared inode; nothing more to do */
582 inode->i_creation_time = le64_to_cpu(ni->creation_time);
583 inode->i_last_write_time = le64_to_cpu(ni->last_data_change_time);
584 inode->i_last_access_time = le64_to_cpu(ni->last_access_time);
585 inode->i_attributes = attributes;
586 inode->i_resolved = 1;
588 /* Capture streams. */
590 if (attributes & FILE_ATTR_REPARSE_POINT) {
591 /* Capture reparse data stream. */
592 ret = capture_ntfs_streams(inode, ni, path, path_len,
593 params->unhashed_streams,
594 vol, AT_REPARSE_POINT);
599 /* Capture data streams.
601 * Directories should not have an unnamed data stream, but they may have
602 * named data streams.
604 * Reparse points may have an unnamed data stream (which will be ignored
605 * in favor of the reparse data stream), and they also may have named
608 * Regular files can have an unnamed data stream as well as named data
610 ret = capture_ntfs_streams(inode, ni, path, path_len,
611 params->unhashed_streams, vol, AT_DATA);
615 if (ni->mrec->flags & MFT_RECORD_IS_DIRECTORY) {
617 /* Recurse to directory children */
619 struct dos_name_map dos_name_map = { .root = NULL };
620 struct readdir_ctx ctx = {
623 .path_len = path_len,
624 .dos_name_map = &dos_name_map,
629 ret = ntfs_readdir(ni, &pos, &ctx, wim_ntfs_capture_filldir);
635 /* error from ntfs_readdir() itself */
636 ERROR_WITH_ERRNO("Error reading directory \"%s\"", path);
637 ret = WIMLIB_ERR_NTFS_3G;
640 struct wim_dentry *child;
643 for_dentry_child(child, root) {
644 ret = set_dentry_dos_name(child, &dos_name_map);
649 destroy_dos_name_map(&dos_name_map);
653 path[path_len] = '\0';
655 /* Reparse-point fixups are a no-op because in NTFS-3g capture mode we
656 * only allow capturing an entire volume. */
657 if (params->add_flags & WIMLIB_ADD_FLAG_RPFIX &&
658 inode_is_symlink(inode))
659 inode->i_not_rpfixed = 0;
661 if (!(params->add_flags & WIMLIB_ADD_FLAG_NO_ACLS)) {
662 struct SECURITY_CONTEXT sec_ctx;
666 /* Get security descriptor */
667 memset(&sec_ctx, 0, sizeof(sec_ctx));
672 ret = ntfs_get_ntfs_acl(&sec_ctx, ni, sd, sizeof(_sd));
673 if (ret > sizeof(_sd)) {
675 ret = ntfs_get_ntfs_acl(&sec_ctx, ni, sd, ret);
678 inode->i_security_id = sd_set_add_sd(params->sd_set,
680 if (inode->i_security_id == -1) {
681 ERROR("Out of memory");
682 ret = WIMLIB_ERR_NOMEM;
685 DEBUG("Added security ID = %u for `%s'",
686 inode->i_security_id, path);
688 } else if (ret < 0) {
689 ERROR_WITH_ERRNO("Failed to get security information from "
691 ret = WIMLIB_ERR_NTFS_3G;
693 inode->i_security_id = -1;
694 DEBUG("No security ID for `%s'", path);
701 params->progress.scan.cur_path = path;
703 ret = do_capture_progress(params, WIMLIB_SCAN_DENTRY_EXCLUDED, NULL);
705 ret = do_capture_progress(params, WIMLIB_SCAN_DENTRY_OK, inode);
708 free_dentry_tree(root, params->lookup_table);
710 ret = report_capture_error(params, ret, path);
718 do_ntfs_umount(struct _ntfs_volume *vol)
720 DEBUG("Unmounting NTFS volume");
721 if (ntfs_umount(vol, FALSE))
722 return WIMLIB_ERR_NTFS_3G;
728 build_dentry_tree_ntfs(struct wim_dentry **root_p,
730 struct capture_params *params)
736 DEBUG("Mounting NTFS volume `%s' read-only", device);
738 /* NTFS-3g 2013 renamed the "read-only" mount flag from MS_RDONLY to
741 * Unfortunately we can't check for defined(NTFS_MNT_RDONLY) because
742 * NTFS_MNT_RDONLY is an enumerated constant. Also, the NTFS-3g headers don't
743 * seem to contain any explicit version information. So we have to rely on a
744 * test done at configure time to detect whether NTFS_MNT_RDONLY should be used.
746 #ifdef HAVE_NTFS_MNT_RDONLY
748 vol = ntfs_mount(device, NTFS_MNT_RDONLY);
749 #elif defined(MS_RDONLY)
750 /* NTFS-3g 2011, 2012 */
751 vol = ntfs_mount(device, MS_RDONLY);
753 #error "Can't find NTFS_MNT_RDONLY or MS_RDONLY flags"
756 ERROR_WITH_ERRNO("Failed to mount NTFS volume `%s' read-only",
758 return WIMLIB_ERR_NTFS_3G;
760 ntfs_open_secure(vol);
762 /* We don't want to capture the special NTFS files such as $Bitmap. Not
763 * to be confused with "hidden" or "system" files which are real files
764 * that we do need to capture. */
765 NVolClearShowSysFiles(vol);
767 DEBUG("Opening root NTFS dentry");
768 root_ni = ntfs_inode_open(vol, FILE_root);
770 ERROR_WITH_ERRNO("Failed to open root inode of NTFS volume "
772 ret = WIMLIB_ERR_NTFS_3G;
776 /* Currently we assume that all the paths fit into this length and there
777 * is no check for overflow. */
778 char *path = MALLOC(32768);
780 ERROR("Could not allocate memory for NTFS pathname");
781 ret = WIMLIB_ERR_NOMEM;
787 ret = build_dentry_tree_ntfs_recursive(root_p, root_ni, path, 1,
788 FILE_NAME_POSIX, vol, params);
791 ntfs_inode_close(root_ni);
793 ntfs_index_ctx_put(vol->secure_xsii);
794 ntfs_index_ctx_put(vol->secure_xsdh);
795 ntfs_inode_close(vol->secure_ni);
798 if (do_ntfs_umount(vol)) {
799 ERROR_WITH_ERRNO("Failed to unmount NTFS volume `%s'",
803 /* We need to leave the NTFS volume mounted so that we can read
804 * the NTFS files again when we are actually writing the WIM */
805 *(ntfs_volume**)params->extra_arg = vol;
809 #endif /* WITH_NTFS_3G */