]> wimlib.net Git - wimlib/blobdiff - src/ntfs-capture.c
NTFS capture updates
[wimlib] / src / ntfs-capture.c
index f49a7eeaa95332d914d0859133e8dbabbfbf3c62..751ce50f759e9f3e70b23666df451a02b236cdec 100644 (file)
@@ -48,12 +48,14 @@ extern int ntfs_inode_get_security(ntfs_inode *ni, u32 selection, char *buf,
 
 extern int ntfs_inode_get_attributes(ntfs_inode *ni);
 
-struct sd_tree {
-       u32 num_sds;
+/* Structure that allows searching the security descriptors by SHA1 message
+ * digest. */
+struct sd_set {
        struct wim_security_data *sd;
        struct sd_node *root;
 };
 
+/* Binary tree node of security descriptors, indexed by the @hash field. */
 struct sd_node {
        int security_id;
        u8 hash[SHA1_HASH_SIZE];
@@ -61,18 +63,20 @@ struct sd_node {
        struct sd_node *right;
 };
 
-static void free_sd_tree(struct sd_node *root)
+/* Frees a security descriptor index tree. */
+static void free_sd_set(struct sd_node *root)
 {
        if (root) {
-               free_sd_tree(root->left);
-               free_sd_tree(root->right);
+               free_sd_set(root->left);
+               free_sd_set(root->right);
                FREE(root);
        }
 }
 
+/* Inserts a a new node into the security descriptor index tree. */
 static void insert_sd_node(struct sd_node *new, struct sd_node *root)
 {
-       int cmp = hashes_cmp(root->hash, new->hash);
+       int cmp = hashes_cmp(new->hash, root->hash);
        if (cmp < 0) {
                if (root->left)
                        insert_sd_node(new, root->left);
@@ -88,22 +92,33 @@ static void insert_sd_node(struct sd_node *new, struct sd_node *root)
        }
 }
 
-static int lookup_sd(const u8 hash[SHA1_HASH_SIZE], struct sd_node *node)
+/* Returns the security ID of the security data having a SHA1 message digest of
+ * @hash in the security descriptor index tree rooted at @root. 
+ *
+ * If not found, return -1. */
+static int lookup_sd(const u8 hash[SHA1_HASH_SIZE], struct sd_node *root)
 {
        int cmp;
-       if (!node)
+       if (!root)
                return -1;
-       cmp = hashes_cmp(hash, node->hash);
+       cmp = hashes_cmp(hash, root->hash);
        if (cmp < 0)
-               return lookup_sd(hash, node->left);
+               return lookup_sd(hash, root->left);
        else if (cmp > 0)
-               return lookup_sd(hash, node->right);
+               return lookup_sd(hash, root->right);
        else
-               return node->security_id;
+               return root->security_id;
 }
 
-static int tree_add_sd(struct sd_tree *tree, const u8 *descriptor,
-                      size_t size)
+/*
+ * Adds a security descriptor to the indexed security descriptor set as well as
+ * the corresponding `struct wim_security_data', and returns the new security
+ * ID; or, if there is an existing security descriptor that is the same, return
+ * the security ID for it.  If a new security descriptor cannot be allocated,
+ * return -1.
+ */
+static int sd_set_add_sd(struct sd_set *sd_set, const u8 *descriptor,
+                        size_t size)
 {
        u8 hash[SHA1_HASH_SIZE];
        int security_id;
@@ -111,25 +126,29 @@ static int tree_add_sd(struct sd_tree *tree, const u8 *descriptor,
        u8 **descriptors;
        u64 *sizes;
        u8 *descr_copy;
-       struct wim_security_data *sd = tree->sd;
-       sha1_buffer(descriptor, size, hash);
+       struct wim_security_data *sd;
 
-       security_id = lookup_sd(hash, tree->root);
+       sha1_buffer(descriptor, size, hash);
+       security_id = lookup_sd(hash, sd_set->root);
        if (security_id >= 0)
                return security_id;
 
-       new = MALLOC(sizeof(struct sd_node));
+       new = MALLOC(sizeof(*new));
        if (!new)
-               return -1;
+               goto out;
        descr_copy = MALLOC(size);
        if (!descr_copy)
                goto out_free_node;
+
+       sd = sd_set->sd;
+
        memcpy(descr_copy, descriptor, size);
-       new->security_id = tree->num_sds++;
+       new->security_id = sd->num_entries;
        new->left = NULL;
        new->right = NULL;
        copy_hash(new->hash, hash);
 
+
        descriptors = REALLOC(sd->descriptors,
                              (sd->num_entries + 1) * sizeof(sd->descriptors[0]));
        if (!descriptors)
@@ -143,45 +162,34 @@ static int tree_add_sd(struct sd_tree *tree, const u8 *descriptor,
        sd->descriptors[sd->num_entries] = descr_copy;
        sd->sizes[sd->num_entries] = size;
        sd->num_entries++;
-       sd->total_length += size + 8;
+       sd->total_length += size + sizeof(sd->sizes[0]);
 
-       if (tree->root)
-               insert_sd_node(tree->root, new);
+       if (sd_set->root)
+               insert_sd_node(sd_set->root, new);
        else
-               tree->root = new;
+               sd_set->root = new;
        return new->security_id;
 out_free_descr:
        FREE(descr_copy);
 out_free_node:
        FREE(new);
+out:
        return -1;
 }
 
-#if 0
-static int build_sd_tree(struct wim_security_data *sd, struct sd_tree *tree)
+static inline ntfschar *attr_record_name(ATTR_RECORD *ar)
 {
-       int ret;
-       u32 orig_num_entries = sd->num_entries;
-       u32 orig_total_length = sd->total_length;
-
-       tree->num_sds = 0;
-       tree->sd = sd;
-       tree->root = NULL;
-
-       for (u32 i = 0; i < sd->num_entries; i++) {
-               ret = tree_add_sd(tree, sd->descriptors[i], sd->sizes[i]);
-               if (ret < 0)
-                       goto out_revert;
-       }
-       return 0;
-out_revert:
-       sd->num_entries = orig_num_entries;
-       sd->total_length = orig_total_length;
-       free_sd_tree(tree->root);
-       return ret;
+       return (ntfschar*)((u8*)ar + le16_to_cpu(ar->name_offset));
 }
-#endif
 
+/* Calculates the SHA1 message digest of a NTFS attribute. 
+ *
+ * @ni:  The NTFS inode containing the attribute.
+ * @ar:         The ATTR_RECORD describing the attribute.
+ * @md:  If successful, the returned SHA1 message digest.
+ *
+ * Return 0 on success or nonzero on error.
+ */
 static int ntfs_attr_sha1sum(ntfs_inode *ni, ATTR_RECORD *ar,
                             u8 md[SHA1_HASH_SIZE])
 {
@@ -191,8 +199,7 @@ static int ntfs_attr_sha1sum(ntfs_inode *ni, ATTR_RECORD *ar,
        ntfs_attr *na;
        SHA_CTX ctx;
 
-       na = ntfs_attr_open(ni, ar->type,
-                           (ntfschar*)((u8*)ar + le16_to_cpu(ar->name_offset)),
+       na = ntfs_attr_open(ni, ar->type, attr_record_name(ar),
                            ar->name_length);
        if (!na) {
                ERROR_WITH_ERRNO("Failed to open NTFS attribute");
@@ -202,6 +209,9 @@ static int ntfs_attr_sha1sum(ntfs_inode *ni, ATTR_RECORD *ar,
        bytes_remaining = na->data_size;
        sha1_init(&ctx);
 
+       DEBUG("Calculating SHA1 message digest (%"PRIu64" bytes)",
+                       bytes_remaining);
+
        while (bytes_remaining) {
                s64 to_read = min(bytes_remaining, sizeof(buf));
                if (ntfs_attr_pread(na, pos, to_read, buf) != to_read) {
@@ -217,11 +227,13 @@ static int ntfs_attr_sha1sum(ntfs_inode *ni, ATTR_RECORD *ar,
        return 0;
 }
 
-/* Load a normal file in the NTFS volume into the WIM lookup table */
-static int capture_normal_ntfs_file(struct dentry *dentry, ntfs_inode *ni,
-                                   char path[], size_t path_len,
-                                   struct lookup_table *lookup_table,
-                                   ntfs_volume **ntfs_vol_p)
+/* Load the streams from a WIM file or reparse point in the NTFS volume into the
+ * WIM lookup table */
+static int capture_ntfs_streams(struct dentry *dentry, ntfs_inode *ni,
+                               char path[], size_t path_len,
+                               struct lookup_table *lookup_table,
+                               ntfs_volume **ntfs_vol_p,
+                               ATTR_TYPES type)
 {
 
        ntfs_attr_search_ctx *actx;
@@ -230,56 +242,94 @@ static int capture_normal_ntfs_file(struct dentry *dentry, ntfs_inode *ni,
        struct lookup_table_entry *lte;
        int ret = 0;
 
+       DEBUG("Capturing NTFS data streams from `%s'", path);
+
+       /* Get context to search the streams of the NTFS file. */
        actx = ntfs_attr_get_search_ctx(ni, NULL);
        if (!actx) {
                ERROR_WITH_ERRNO("Cannot get attribute search "
                                 "context");
                return WIMLIB_ERR_NTFS_3G;
        }
-       while (!ntfs_attr_lookup(AT_DATA, NULL, 0,
+
+       /* Capture each data stream or reparse data stream. */
+       while (!ntfs_attr_lookup(type, NULL, 0,
                                 CASE_SENSITIVE, 0, NULL, 0, actx))
        {
+               char *stream_name_utf8;
+               size_t stream_name_utf16_len;
+
+               /* Checksum the stream. */
                ret = ntfs_attr_sha1sum(ni, actx->attr, attr_hash);
                if (ret != 0)
                        goto out_put_actx;
+
+               /* Make a lookup table entry for the stream, or use an existing
+                * one if there's already an identical stream. */
                lte = __lookup_resource(lookup_table, attr_hash);
+               ret = WIMLIB_ERR_NOMEM;
                if (lte) {
                        lte->refcnt++;
                } else {
                        struct ntfs_location *ntfs_loc;
 
-                       ret = WIMLIB_ERR_NOMEM;
-
                        ntfs_loc = CALLOC(1, sizeof(*ntfs_loc));
-                       if (!ntfs_loc) {
+                       if (!ntfs_loc)
                                goto out_put_actx;
-                       }
+                       ntfs_loc->ntfs_vol_p = ntfs_vol_p;
                        ntfs_loc->path_utf8 = MALLOC(path_len + 1);
                        if (!ntfs_loc->path_utf8)
-                               goto out_put_actx;
+                               goto out_free_ntfs_loc;
                        memcpy(ntfs_loc->path_utf8, path, path_len + 1);
                        ntfs_loc->stream_name_utf16 = MALLOC(actx->attr->name_length * 2);
                        if (!ntfs_loc->stream_name_utf16)
-                               goto out_put_actx;
+                               goto out_free_ntfs_loc;
                        memcpy(ntfs_loc->stream_name_utf16,
-                              (u8*)actx->attr +
-                                       le16_to_cpu(actx->attr->name_offset),
+                              attr_record_name(actx->attr),
                               actx->attr->name_length * 2);
 
                        ntfs_loc->stream_name_utf16_num_chars = actx->attr->name_length;
+                       ntfs_loc->is_reparse_point = (type == AT_REPARSE_POINT);
                        lte = new_lookup_table_entry();
                        if (!lte)
-                               goto out_put_actx;
+                               goto out_free_ntfs_loc;
                        lte->ntfs_loc = ntfs_loc;
                        lte->resource_location = RESOURCE_IN_NTFS_VOLUME;
                        lte->resource_entry.original_size = actx->attr->data_size;
                        lte->resource_entry.size = actx->attr->data_size;
+                       DEBUG("Add resource for `%s' (size = %zu)",
+                               dentry->file_name_utf8,
+                               lte->resource_entry.original_size);
                        copy_hash(lte->hash, attr_hash);
                        lookup_table_insert(lookup_table, lte);
                }
-               dentry->lte = lte;
+               if (actx->attr->name_length == 0) {
+                       if (dentry->lte) {
+                               ERROR("Found two un-named data streams for "
+                                     "`%s'", path);
+                               ret = WIMLIB_ERR_NTFS_3G;
+                               goto out_free_lte;
+                       }
+                       dentry->lte = lte;
+               } else {
+                       struct ads_entry *new_ads_entry;
+                       stream_name_utf8 = utf16_to_utf8((const u8*)attr_record_name(actx->attr),
+                                                        actx->attr->name_length,
+                                                        &stream_name_utf16_len);
+                       if (!stream_name_utf8)
+                               goto out_free_lte;
+                       new_ads_entry = dentry_add_ads(dentry, stream_name_utf8);
+                       FREE(stream_name_utf8);
+                       if (!new_ads_entry)
+                               goto out_free_lte;
+                               
+                       new_ads_entry->lte = lte;
+               }
        }
+       ret = 0;
        goto out_put_actx;
+out_free_lte:
+       free_lookup_table_entry(lte);
 out_free_ntfs_loc:
        if (ntfs_loc) {
                FREE(ntfs_loc->path_utf8);
@@ -288,38 +338,167 @@ out_free_ntfs_loc:
        }
 out_put_actx:
        ntfs_attr_put_search_ctx(actx);
+       if (ret == 0)
+               DEBUG("Successfully captured NTFS streams from `%s'", path);
+       else
+               DEBUG("Failed to capture NTFS streams from `%s", path);
        return ret;
 }
 
-static int __build_dentry_tree_ntfs(struct dentry *dentry, ntfs_inode *ni,
+struct readdir_ctx {
+       struct dentry       *parent;
+       ntfs_inode          *dir_ni;
+       char                *path;
+       size_t               path_len;
+       struct lookup_table *lookup_table;
+       struct sd_set       *sd_set;
+       const struct capture_config *config;
+       ntfs_volume        **ntfs_vol_p;
+};
+
+static int __build_dentry_tree_ntfs(struct dentry **root_p, ntfs_inode *ni,
                                    char path[], size_t path_len,
                                    struct lookup_table *lookup_table,
-                                   struct sd_tree *tree,
+                                   struct sd_set *sd_set,
+                                   const struct capture_config *config,
+                                   ntfs_volume **ntfs_vol_p);
+
+
+static int wim_ntfs_capture_filldir(void *dirent, const ntfschar *name,
+                                   const int name_len, const int name_type,
+                                   const s64 pos, const MFT_REF mref,
+                                   const unsigned dt_type)
+{
+       struct readdir_ctx *ctx;
+       size_t utf8_name_len;
+       char *utf8_name;
+       struct dentry *child = NULL;
+       int ret;
+       size_t path_len;
+
+       if (name_type == FILE_NAME_DOS)
+               return 0;
+
+       ret = -1;
+
+       utf8_name = utf16_to_utf8((const u8*)name, name_len * 2,
+                                 &utf8_name_len);
+       if (!utf8_name)
+               goto out;
+
+       if (utf8_name[0] == '.' &&
+            (utf8_name[1] == '\0' ||
+             (utf8_name[1] == '.' && utf8_name[2] == '\0'))) {
+               DEBUG("Skipping dentry `%s'", utf8_name);
+               ret = 0;
+               goto out_free_utf8_name;
+       }
+
+       DEBUG("Opening inode for `%s'", utf8_name);
+
+       ctx = dirent;
+
+       ntfs_inode *ni = ntfs_inode_open(ctx->dir_ni->vol, mref);
+       if (!ni) {
+               ERROR_WITH_ERRNO("Failed to open NTFS inode");
+               ret = 1;
+       }
+       path_len = ctx->path_len;
+       if (path_len != 1)
+               ctx->path[path_len++] = '/';
+       memcpy(ctx->path + path_len, utf8_name, utf8_name_len + 1);
+       path_len += utf8_name_len;
+       ret = __build_dentry_tree_ntfs(&child, ni, ctx->path, path_len,
+                                      ctx->lookup_table, ctx->sd_set,
+                                      ctx->config, ctx->ntfs_vol_p);
+
+       if (child) {
+               DEBUG("Linking dentry `%s' with parent `%s'",
+                     child->file_name_utf8, ctx->parent->file_name_utf8);
+               link_dentry(child, ctx->parent);
+       }
+out_close_ni:
+       ntfs_inode_close(ni);
+out_free_utf8_name:
+       FREE(utf8_name);
+out:
+       return ret;
+}
+
+/* Recursively build a WIM dentry tree corresponding to a NTFS volume.
+ * At the same time, update the WIM lookup table with lookup table entries for
+ * the NTFS streams, and build an array of security descriptors.
+ */
+static int __build_dentry_tree_ntfs(struct dentry **root_p, ntfs_inode *ni,
+                                   char path[], size_t path_len,
+                                   struct lookup_table *lookup_table,
+                                   struct sd_set *sd_set,
+                                   const struct capture_config *config,
                                    ntfs_volume **ntfs_vol_p)
 {
-       u32 attributes = ntfs_inode_get_attributes(ni);
-       int mrec_flags = ni->mrec->flags;
+       u32 attributes;
+       int mrec_flags;
        u32 sd_size;
        int ret = 0;
+       struct dentry *root;
+
+       if (exclude_path(path, config, false)) {
+               DEBUG("Excluding `%s' from capture", path);
+               return 0;
+       }
+
+       DEBUG("Starting recursive capture at path = `%s'", path);
+       mrec_flags = ni->mrec->flags;
+       attributes = ntfs_inode_get_attributes(ni);
+
+       root = new_dentry(path_basename(path));
+       if (!root)
+               return WIMLIB_ERR_NOMEM;
 
-       dentry->creation_time    = le64_to_cpu(ni->creation_time);
-       dentry->last_write_time  = le64_to_cpu(ni->last_data_change_time);
-       dentry->last_access_time = le64_to_cpu(ni->last_access_time);
-       dentry->security_id      = le32_to_cpu(ni->security_id);
-       dentry->attributes       = le32_to_cpu(attributes);
-       dentry->resolved = true;
+       root->creation_time    = le64_to_cpu(ni->creation_time);
+       root->last_write_time  = le64_to_cpu(ni->last_data_change_time);
+       root->last_access_time = le64_to_cpu(ni->last_access_time);
+       root->security_id      = le32_to_cpu(ni->security_id);
+       root->attributes       = le32_to_cpu(attributes);
+       root->hard_link  = ni->mft_no;
+       root->resolved = true;
 
        if (attributes & FILE_ATTR_REPARSE_POINT) {
+               DEBUG("Reparse point `%s'", path);
                /* Junction point, symbolic link, or other reparse point */
+               ret = capture_ntfs_streams(root, ni, path, path_len,
+                                          lookup_table, ntfs_vol_p,
+                                          AT_REPARSE_POINT);
        } else if (mrec_flags & MFT_RECORD_IS_DIRECTORY) {
+               DEBUG("Directory `%s'", path);
+
                /* Normal directory */
+               s64 pos = 0;
+               struct readdir_ctx ctx = {
+                       .parent       = root,
+                       .dir_ni       = ni,
+                       .path         = path,
+                       .path_len     = path_len,
+                       .lookup_table = lookup_table,
+                       .sd_set       = sd_set,
+                       .config       = config,
+                       .ntfs_vol_p   = ntfs_vol_p,
+               };
+               ret = ntfs_readdir(ni, &pos, &ctx, wim_ntfs_capture_filldir);
+               if (ret != 0) {
+                       ERROR_WITH_ERRNO("ntfs_readdir()");
+                       ret = WIMLIB_ERR_NTFS_3G;
+               }
        } else {
+               DEBUG("Normal file `%s'", path);
                /* Normal file */
-               ret = capture_normal_ntfs_file(dentry, ni, path, path_len,
-                                              lookup_table, ntfs_vol_p);
+               ret = capture_ntfs_streams(root, ni, path, path_len,
+                                          lookup_table, ntfs_vol_p,
+                                          AT_DATA);
        }
        if (ret != 0)
                return ret;
+
        ret = ntfs_inode_get_security(ni,
                                      OWNER_SECURITY_INFORMATION |
                                      GROUP_SECURITY_INFORMATION |
@@ -333,24 +512,43 @@ static int __build_dentry_tree_ntfs(struct dentry *dentry, ntfs_inode *ni,
                                      DACL_SECURITY_INFORMATION  |
                                      SACL_SECURITY_INFORMATION,
                                      sd, sd_size, &sd_size);
-       dentry->security_id = tree_add_sd(tree, sd, sd_size);
-       return 0;
+       if (ret == 0) {
+               ERROR_WITH_ERRNO("Failed to get security information from "
+                                "`%s'", path);
+               ret = WIMLIB_ERR_NTFS_3G;
+       } else {
+               if (ret > 0) {
+                       /*print_security_descriptor(sd, sd_size);*/
+                       root->security_id = sd_set_add_sd(sd_set, sd, sd_size);
+                       DEBUG("Added security ID = %u for `%s'",
+                             root->security_id, path);
+               } else { 
+                       root->security_id = -1;
+                       DEBUG("No security ID for `%s'", path);
+               }
+               ret = 0;
+       }
+       *root_p = root;
+       return ret;
 }
 
-static int build_dentry_tree_ntfs(struct dentry *root_dentry,
+static int build_dentry_tree_ntfs(struct dentry **root_p,
                                  const char *device,
                                  struct lookup_table *lookup_table,
                                  struct wim_security_data *sd,
+                                 const struct capture_config *config,
                                  int flags,
                                  void *extra_arg)
 {
        ntfs_volume *vol;
        ntfs_inode *root_ni;
        int ret = 0;
-       struct sd_tree tree;
+       struct sd_set tree;
        tree.sd = sd;
        tree.root = NULL;
        ntfs_volume **ntfs_vol_p = extra_arg;
+
+       DEBUG("Mounting NTFS volume `%s' read-only", device);
        
        vol = ntfs_mount(device, MS_RDONLY);
        if (!vol) {
@@ -358,6 +556,10 @@ static int build_dentry_tree_ntfs(struct dentry *root_dentry,
                                 device);
                return WIMLIB_ERR_NTFS_3G;
        }
+
+       NVolClearShowSysFiles(vol);
+
+       DEBUG("Opening root NTFS dentry");
        root_ni = ntfs_inode_open(vol, FILE_root);
        if (!root_ni) {
                ERROR_WITH_ERRNO("Failed to open root inode of NTFS volume "
@@ -368,33 +570,40 @@ static int build_dentry_tree_ntfs(struct dentry *root_dentry,
        char path[4096];
        path[0] = '/';
        path[1] = '\0';
-       ret = __build_dentry_tree_ntfs(root_dentry, root_ni, path, 1,
-                                      lookup_table, &tree, ntfs_vol_p);
+       ret = __build_dentry_tree_ntfs(root_p, root_ni, path, 1,
+                                      lookup_table, &tree, config,
+                                      ntfs_vol_p);
        ntfs_inode_close(root_ni);
 
 out:
-       if (ntfs_umount(vol, FALSE) != 0) {
-               ERROR_WITH_ERRNO("Failed to unmount NTFS volume `%s'", device);
-               if (ret == 0)
-                       ret = WIMLIB_ERR_NTFS_3G;
+       if (ret) {
+               if (ntfs_umount(vol, FALSE) != 0) {
+                       ERROR_WITH_ERRNO("Failed to unmount NTFS volume `%s'",
+                                        device);
+                       if (ret == 0)
+                               ret = WIMLIB_ERR_NTFS_3G;
+               }
+       } else {
+               *ntfs_vol_p = vol;
        }
        return ret;
 }
 
+
+
 WIMLIBAPI int wimlib_add_image_from_ntfs_volume(WIMStruct *w,
                                                const char *device,
                                                const char *name,
-                                               const char *description,
-                                               const char *flags_element,
+                                               const char *config_str,
+                                               size_t config_len,
                                                int flags)
 {
        if (flags & (WIMLIB_ADD_IMAGE_FLAG_DEREFERENCE)) {
                ERROR("Cannot dereference files when capturing directly from NTFS");
                return WIMLIB_ERR_INVALID_PARAM;
        }
-       return do_add_image(w, device, name, description, flags_element, flags,
-                           build_dentry_tree_ntfs,
-                           &w->ntfs_vol);
+       return do_add_image(w, device, name, config_str, config_len, flags,
+                           build_dentry_tree_ntfs, &w->ntfs_vol);
 }
 
 #else /* WITH_NTFS_3G */
@@ -403,7 +612,9 @@ WIMLIBAPI int wimlib_add_image_from_ntfs_volume(WIMStruct *w,
                                                const char *name,
                                                const char *description,
                                                const char *flags_element,
-                                               int flags)
+                                               int flags,
+                                               const char *config_str,
+                                               size_t config_len)
 {
        ERROR("wimlib was compiled without support for NTFS-3g, so");
        ERROR("we cannot capture a WIM image directly from a NTFS volume");