From 491079f27a268fb9e5bc7f54a7cbe3fbb9fa2d94 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 26 Aug 2012 12:50:35 -0500 Subject: [PATCH] NTFS capture updates (IN PROGRESS) Also, add preapply_dentry_with_dos_name() function --- programs/imagex.c | 66 +++++++++++++++++++---- src/hardlink.c | 61 ++++++++++++++++++--- src/ntfs-apply.c | 100 ++++++++++++++++++++++++++++------ src/ntfs-capture.c | 132 ++++++++++++++++++++++++--------------------- 4 files changed, 262 insertions(+), 97 deletions(-) diff --git a/programs/imagex.c b/programs/imagex.c index 1fce776b..134ff29d 100644 --- a/programs/imagex.c +++ b/programs/imagex.c @@ -80,14 +80,16 @@ static const char *path_basename(const char *path) static const char *usage_strings[] = { [APPEND] = -" imagex append DIRECTORY WIMFILE [\"IMAGE_NAME\"] [\"DESCRIPTION\"] [--boot]\n" -" [--check] [--flags EDITIONID] [--dereference]\n", +" imagex append (DIRECTORY | NTFS_VOLUME) WIMFILE [\"IMAGE_NAME\"]\n" +" [\"DESCRIPTION\"] [--boot] [--check] [--flags EDITIONID]\n" +" [--dereference]\n", [APPLY] = -" imagex apply WIMFILE [IMAGE_NUM | IMAGE_NAME | all] DIRECTORY [--check]\n" -" [--hardlink] [--symlink] [--verbose]\n", +" imagex apply WIMFILE [IMAGE_NUM | IMAGE_NAME | all]\n" +" (DIRECTORY | NTFS_VOLUME) [--check] [--hardlink]\n" +" [--symlink] [--verbose]\n", [CAPTURE] = -" imagex capture DIRECTORY WIMFILE [\"IMAGE_NAME\"] [\"DESCRIPTION\"]\n" -" l [--boot] [--check] [--compress[=TYPE]]\n" +" imagex capture (DIRECTORY | NTFS_VOLUME) WIMFILE [\"IMAGE_NAME\"]\n" +" [\"DESCRIPTION\"] [--boot] [--check] [--compress[=TYPE]]\n" " [--flags \"EditionID\"] [--verbose] [--dereference]\n", [DELETE] = " imagex delete WIMFILE (IMAGE_NUM | IMAGE_NAME | all) [--check]\n", @@ -341,12 +343,34 @@ static int imagex_append(int argc, const char **argv) if (ret != 0) return ret; +#ifdef WITH_NTFS_3G + struct stat stbuf; + + ret = stat(dir, &stbuf); + if (ret == 0) { + if (S_ISBLK(stbuf.st_mode) || S_ISREG(stbuf.st_mode)) { + const char *ntfs_device = dir; + printf("Capturing WIM image NTFS filesystem on `%s'\n", + ntfs_device); + ret = wimlib_add_image_from_ntfs_volume(w, ntfs_device, + name, desc, + flags_element, + add_image_flags); + goto out_write; + } + } else { + if (errno != -ENOENT) + imagex_error_with_errno("Failed to stat `%s'", dir); + } +#endif ret = wimlib_add_image(w, dir, name, desc, flags_element, add_image_flags); + +out_write: if (ret != 0) - goto done; + goto out; ret = wimlib_overwrite(w, write_flags); -done: +out: wimlib_free(w); return ret; } @@ -504,17 +528,39 @@ static int imagex_capture(int argc, const char **argv) if (ret != 0) return ret; +#ifdef WITH_NTFS_3G + struct stat stbuf; + + ret = stat(dir, &stbuf); + if (ret == 0) { + if (S_ISBLK(stbuf.st_mode) || S_ISREG(stbuf.st_mode)) { + const char *ntfs_device = dir; + printf("Capturing WIM image NTFS filesystem on `%s'\n", + ntfs_device); + ret = wimlib_add_image_from_ntfs_volume(w, ntfs_device, + name, desc, + flags_element, + add_image_flags); + goto out_write; + } + } else { + if (errno != -ENOENT) + imagex_error_with_errno("Failed to stat `%s'", dir); + } +#endif ret = wimlib_add_image(w, dir, name, desc, flags_element, add_image_flags); + +out_write: if (ret != 0) { imagex_error("Failed to add the image `%s'", dir); - goto done; + goto out; } ret = wimlib_write(w, wimfile, WIM_ALL_IMAGES, write_flags); if (ret != 0) imagex_error("Failed to write the WIM file `%s'", wimfile); -done: +out: wimlib_free(w); return ret; } diff --git a/src/hardlink.c b/src/hardlink.c index c5324e74..f5d3e1c5 100644 --- a/src/hardlink.c +++ b/src/hardlink.c @@ -235,10 +235,32 @@ static bool dentries_have_same_ads(const struct dentry *d1, return true; } -/* Share the alternate stream entries between hard-linked dentries. */ +/* + * Share the alternate stream entries between hard-linked dentries. + * + * Notes: + * - If you use 'imagex.exe' (version 6.1.7600.16385) to create a WIM containing + * hard-linked files, only one dentry in the hard link set will refer to data + * streams, including all alternate data streams. The rest of the dentries in + * the hard link set will be marked as having 0 alternate data streams and + * will not refer to any main file stream (the SHA1 message digest will be all + * 0's). + * + * - However, if you look at the WIM's that Microsoft actually distributes (e.g. + * Windows 7/8 boot.wim, install.wim), it's not the same as above. The + * dentries in hard link sets will have stream information duplicated. I + * can't say anything about the alternate data streams because these WIMs do + * not contain alternate data streams. + * + * - Windows 7 'install.wim' contains hard link sets containing dentries with + * inconsistent streams and other inconsistent information such as security + * ID. The only way I can think to handle these is to treat the hard link + * grouping as erroneous and split up the hard link group. + */ static int share_dentry_ads(struct dentry *owner, struct dentry *user) { const char *mismatch_type; + bool data_streams_shared = true; wimlib_assert(owner->num_ads == 0 || owner->ads_entries != user->ads_entries); if (owner->attributes != user->attributes) { @@ -255,12 +277,19 @@ static int share_dentry_ads(struct dentry *owner, struct dentry *user) goto mismatch; } if (!hashes_equal(owner->hash, user->hash)) { - mismatch_type = "main file resource"; - goto mismatch; + if (is_zero_hash(user->hash)) { + data_streams_shared = false; + copy_hash(user->hash, owner->hash); + } else { + mismatch_type = "main file resource"; + goto mismatch; + } } - if (!dentries_have_same_ads(owner, user)) { - mismatch_type = "Alternate Stream Entries"; - goto mismatch; + if (data_streams_shared) { + if (!dentries_have_same_ads(owner, user)) { + mismatch_type = "Alternate Stream Entries"; + goto mismatch; + } } dentry_free_ads_entries(user); user->ads_entries = owner->ads_entries; @@ -279,11 +308,27 @@ static int link_group_free_duplicate_data(struct link_group *group, { struct dentry *owner, *user, *tmp; + /* Find a dentry with non-zero hash to use as a possible link group + * owner (see comments above the share_dentry_ads() function */ owner = container_of(group->dentry_list, struct dentry, link_group_list); - owner->ads_entries_status = ADS_ENTRIES_OWNER; + do { + /* imagex.exe may move the un-named data stream from the dentry + * itself to the first alternate data stream, if there are + * other alternate data streams */ + if (!is_zero_hash(owner->hash) || + (owner->num_ads && !is_zero_hash(owner->ads_entries[0].hash))) + goto found_owner; + owner = container_of(owner->link_group_list.next, + struct dentry, + link_group_list); + } while (&owner->link_group_list != group->dentry_list); - list_for_each_entry_safe(user, tmp, group->dentry_list, + ERROR("Could not find owner of data streams in hard link group"); + return WIMLIB_ERR_INVALID_DENTRY; +found_owner: + owner->ads_entries_status = ADS_ENTRIES_OWNER; + list_for_each_entry_safe(user, tmp, &owner->link_group_list, link_group_list) { /* I would like it to be an error if two dentries are in the diff --git a/src/ntfs-apply.c b/src/ntfs-apply.c index 47d86523..3ac62291 100644 --- a/src/ntfs-apply.c +++ b/src/ntfs-apply.c @@ -262,6 +262,74 @@ static int apply_reparse_data(ntfs_inode *ni, const struct dentry *dentry, return 0; } +static int do_wim_apply_dentry_ntfs(struct dentry *dentry, ntfs_inode *dir_ni, + WIMStruct *w); + +/* + * If @dentry is part of a hard link group, search for hard-linked dentries in + * the same directory that have a nonempty DOS (short) filename. There should + * be exactly 0 or 1 such dentries. If there is 1, extract that dentry first, + * so that the DOS name is correctly associated with the corresponding long name + * in the Win32 namespace, and not any of the additional names in the POSIX + * namespace created from hard links. + */ +static int preapply_dentry_with_dos_name(struct dentry *dentry, + ntfs_inode **dir_ni_p, + WIMStruct *w) +{ + int ret; + struct dentry *other; + struct dentry *dentry_with_dos_name; + + if (dentry->link_group_list.next == &dentry->link_group_list) + return 0; + + dentry_with_dos_name = NULL; + list_for_each_entry(other, &dentry->link_group_list, + link_group_list) + { + if (dentry->parent == other->parent && other->short_name_len) { + if (dentry_with_dos_name) { + ERROR("Found multiple DOS names for file `%s' " + "in the same directory", + dentry_with_dos_name->full_path_utf8); + return WIMLIB_ERR_INVALID_DENTRY; + } + dentry_with_dos_name = other; + } + } + /* If there's a dentry with a DOS name, extract it first */ + if (dentry_with_dos_name && !dentry_with_dos_name->extracted_file) { + char *p; + const char *dir_name; + char orig; + ntfs_volume *vol = (*dir_ni_p)->vol; + + DEBUG("pre-applying DOS name `%s'", dentry_with_dos_name); + ret = do_wim_apply_dentry_ntfs(dentry_with_dos_name, + *dir_ni_p, w); + if (ret != 0) + return ret; + p = dentry->full_path_utf8 + dentry->full_path_utf8_len; + do { + p--; + } while (*p != '/'); + + orig = *p; + *p = '\0'; + dir_name = dentry->full_path_utf8; + + *dir_ni_p = ntfs_pathname_to_inode(vol, NULL, dir_name); + *p = orig; + if (!*dir_ni_p) { + ERROR_WITH_ERRNO("Could not find NTFS inode for `%s'", + dir_name); + return WIMLIB_ERR_NTFS_3G; + } + } + return 0; +} + /* * Applies a WIM dentry to a NTFS filesystem. * @@ -285,28 +353,22 @@ static int do_wim_apply_dentry_ntfs(struct dentry *dentry, ntfs_inode *dir_ni, } else { struct dentry *other; + ret = preapply_dentry_with_dos_name(dentry, &dir_ni, w); + if (ret != 0) + return ret; + type = S_IFREG; - /* If this dentry is one of a hard link set of at least 2 - * dentries. If one of the other dentries has already - * been extracted, make a hard link to it. Otherwise, - * extract the file, and set the dentry->extracted_file - * field so that other dentries in the hard link group - * can link to it. */ + /* See if we can make a hard link */ list_for_each_entry(other, &dentry->link_group_list, - link_group_list) - { + link_group_list) { if (other->extracted_file) { - is_hardlink = true; - ret = wim_apply_hardlink_ntfs(dentry, - other, - dir_ni, - &ni); + ret = wim_apply_hardlink_ntfs(dentry, other, + dir_ni, &ni); if (ret != 0) - goto out_close_dir_ni; - else - goto out_set_dos_name; + return ret; } } + /* Can't make a hard link */ FREE(dentry->extracted_file); dentry->extracted_file = STRDUP(dentry->full_path_utf8); if (!dentry->extracted_file) { @@ -475,6 +537,9 @@ static int wim_apply_dentry_ntfs(struct dentry *dentry, void *arg) ntfs_inode *close_after_dir; const char *dir_name; + if (dentry->extracted_file) + return 0; + wimlib_assert(dentry->full_path_utf8); DEBUG("Applying dentry `%s' to NTFS", dentry->full_path_utf8); @@ -495,13 +560,14 @@ static int wim_apply_dentry_ntfs(struct dentry *dentry, void *arg) dir_name = dentry->full_path_utf8; dir_ni = ntfs_pathname_to_inode(vol, NULL, dir_name); + if (dir_ni) + DEBUG("Found NTFS inode for `%s'", dir_name); *p = orig; if (!dir_ni) { ERROR_WITH_ERRNO("Could not find NTFS inode for `%s'", dir_name); return WIMLIB_ERR_NTFS_3G; } - DEBUG("Found NTFS inode for `%s'", dir_name); return do_wim_apply_dentry_ntfs(dentry, dir_ni, w); } diff --git a/src/ntfs-capture.c b/src/ntfs-capture.c index 0a27353c..08868419 100644 --- a/src/ntfs-capture.c +++ b/src/ntfs-capture.c @@ -48,12 +48,14 @@ extern int ntfs_inode_get_security(ntfs_inode *ni, u32 selection, char *buf, extern int ntfs_inode_get_attributes(ntfs_inode *ni); -struct sd_tree { - u32 num_sds; +/* Structure that allows searching the security descriptors by SHA1 message + * digest. */ +struct sd_set { struct wim_security_data *sd; struct sd_node *root; }; +/* Binary tree node of security descriptors, indexed by the @hash field. */ struct sd_node { int security_id; u8 hash[SHA1_HASH_SIZE]; @@ -61,18 +63,20 @@ struct sd_node { struct sd_node *right; }; -static void free_sd_tree(struct sd_node *root) +/* Frees a security descriptor index tree. */ +static void free_sd_set(struct sd_node *root) { if (root) { - free_sd_tree(root->left); - free_sd_tree(root->right); + free_sd_set(root->left); + free_sd_set(root->right); FREE(root); } } +/* Inserts a a new node into the security descriptor index tree. */ static void insert_sd_node(struct sd_node *new, struct sd_node *root) { - int cmp = hashes_cmp(root->hash, new->hash); + int cmp = hashes_cmp(new->hash, root->hash); if (cmp < 0) { if (root->left) insert_sd_node(new, root->left); @@ -88,22 +92,33 @@ static void insert_sd_node(struct sd_node *new, struct sd_node *root) } } -static int lookup_sd(const u8 hash[SHA1_HASH_SIZE], struct sd_node *node) +/* Returns the security ID of the security data having a SHA1 message digest of + * @hash in the security descriptor index tree rooted at @root. + * + * If not found, return -1. */ +static int lookup_sd(const u8 hash[SHA1_HASH_SIZE], struct sd_node *root) { int cmp; - if (!node) + if (!root) return -1; - cmp = hashes_cmp(hash, node->hash); + cmp = hashes_cmp(hash, root->hash); if (cmp < 0) - return lookup_sd(hash, node->left); + return lookup_sd(hash, root->left); else if (cmp > 0) - return lookup_sd(hash, node->right); + return lookup_sd(hash, root->right); else - return node->security_id; + return root->security_id; } -static int tree_add_sd(struct sd_tree *tree, const u8 *descriptor, - size_t size) +/* + * Adds a security descriptor to the indexed security descriptor set as well as + * the corresponding `struct wim_security_data', and returns the new security + * ID; or, if there is an existing security descriptor that is the same, return + * the security ID for it. If a new security descriptor cannot be allocated, + * return -1. + */ +static int sd_set_add_sd(struct sd_set *sd_set, const u8 *descriptor, + size_t size) { u8 hash[SHA1_HASH_SIZE]; int security_id; @@ -111,25 +126,29 @@ static int tree_add_sd(struct sd_tree *tree, const u8 *descriptor, u8 **descriptors; u64 *sizes; u8 *descr_copy; - struct wim_security_data *sd = tree->sd; + struct wim_security_data *sd; sha1_buffer(descriptor, size, hash); - security_id = lookup_sd(hash, tree->root); + security_id = lookup_sd(hash, sd_set->root); if (security_id >= 0) return security_id; - new = MALLOC(sizeof(struct sd_node)); + new = MALLOC(sizeof(*new)); if (!new) - return -1; + goto out; descr_copy = MALLOC(size); if (!descr_copy) goto out_free_node; + + sd = sd_set->sd; + memcpy(descr_copy, descriptor, size); - new->security_id = tree->num_sds++; + new->security_id = sd->num_entries; new->left = NULL; new->right = NULL; copy_hash(new->hash, hash); + descriptors = REALLOC(sd->descriptors, (sd->num_entries + 1) * sizeof(sd->descriptors[0])); if (!descriptors) @@ -143,45 +162,34 @@ static int tree_add_sd(struct sd_tree *tree, const u8 *descriptor, sd->descriptors[sd->num_entries] = descr_copy; sd->sizes[sd->num_entries] = size; sd->num_entries++; - sd->total_length += size + 8; + sd->total_length += size + sizeof(sd->sizes[0]); - if (tree->root) - insert_sd_node(tree->root, new); + if (sd_set->root) + insert_sd_node(sd_set->root, new); else - tree->root = new; + sd_set->root = new; return new->security_id; out_free_descr: FREE(descr_copy); out_free_node: FREE(new); +out: return -1; } -#if 0 -static int build_sd_tree(struct wim_security_data *sd, struct sd_tree *tree) +static inline ntfschar *attr_record_name(ATTR_RECORD *ar) { - int ret; - u32 orig_num_entries = sd->num_entries; - u32 orig_total_length = sd->total_length; - - tree->num_sds = 0; - tree->sd = sd; - tree->root = NULL; - - for (u32 i = 0; i < sd->num_entries; i++) { - ret = tree_add_sd(tree, sd->descriptors[i], sd->sizes[i]); - if (ret < 0) - goto out_revert; - } - return 0; -out_revert: - sd->num_entries = orig_num_entries; - sd->total_length = orig_total_length; - free_sd_tree(tree->root); - return ret; + return (ntfschar*)((u8*)ar + le16_to_cpu(ar->name_offset)); } -#endif +/* Calculates the SHA1 message digest of a NTFS attribute. + * + * @ni: The NTFS inode containing the attribute. + * @ar: The ATTR_RECORD describing the attribute. + * @md: If successful, the returned SHA1 message digest. + * + * Return 0 on success or nonzero on error. + */ static int ntfs_attr_sha1sum(ntfs_inode *ni, ATTR_RECORD *ar, u8 md[SHA1_HASH_SIZE]) { @@ -191,8 +199,7 @@ static int ntfs_attr_sha1sum(ntfs_inode *ni, ATTR_RECORD *ar, ntfs_attr *na; SHA_CTX ctx; - na = ntfs_attr_open(ni, ar->type, - (ntfschar*)((u8*)ar + le16_to_cpu(ar->name_offset)), + na = ntfs_attr_open(ni, ar->type, attr_record_name(ar), ar->name_length); if (!na) { ERROR_WITH_ERRNO("Failed to open NTFS attribute"); @@ -263,20 +270,18 @@ static int capture_ntfs_streams(struct dentry *dentry, ntfs_inode *ni, ntfs_loc = CALLOC(1, sizeof(*ntfs_loc)); - if (!ntfs_loc) { + if (!ntfs_loc) goto out_put_actx; - } ntfs_loc->ntfs_vol_p = ntfs_vol_p; ntfs_loc->path_utf8 = MALLOC(path_len + 1); if (!ntfs_loc->path_utf8) - goto out_put_actx; + goto out_free_ntfs_loc; memcpy(ntfs_loc->path_utf8, path, path_len + 1); ntfs_loc->stream_name_utf16 = MALLOC(actx->attr->name_length * 2); if (!ntfs_loc->stream_name_utf16) goto out_free_ntfs_loc; memcpy(ntfs_loc->stream_name_utf16, - (u8*)actx->attr + - le16_to_cpu(actx->attr->name_offset), + attr_record_name(actx->attr), actx->attr->name_length * 2); ntfs_loc->stream_name_utf16_num_chars = actx->attr->name_length; @@ -295,8 +300,7 @@ static int capture_ntfs_streams(struct dentry *dentry, ntfs_inode *ni, dentry->lte = lte; } else { struct ads_entry *new_ads_entry; - stream_name_utf8 = utf16_to_utf8((u8*)actx->attr + - le16_to_cpu(actx->attr->name_offset), + stream_name_utf8 = utf16_to_utf8((const u8*)attr_record_name(actx->attr), actx->attr->name_length, &stream_name_utf16_len); if (!stream_name_utf8) @@ -330,14 +334,14 @@ struct readdir_ctx { char *path; size_t path_len; struct lookup_table *lookup_table; - struct sd_tree *tree; + struct sd_set *sd_set; ntfs_volume **ntfs_vol_p; }; static int __build_dentry_tree_ntfs(struct dentry *dentry, ntfs_inode *ni, char path[], size_t path_len, struct lookup_table *lookup_table, - struct sd_tree *tree, + struct sd_set *sd_set, ntfs_volume **ntfs_vol_p); @@ -373,7 +377,7 @@ static int filldir(void *dirent, const ntfschar *name, memcpy(ctx->path + ctx->path_len, utf8_name, utf8_name_len + 1); path_len = ctx->path_len + utf8_name_len; ret = __build_dentry_tree_ntfs(child, ni, ctx->path, path_len, - ctx->lookup_table, ctx->tree, + ctx->lookup_table, ctx->sd_set, ctx->ntfs_vol_p); link_dentry(child, ctx->dentry); out_close_ni: @@ -391,7 +395,7 @@ out: static int __build_dentry_tree_ntfs(struct dentry *dentry, ntfs_inode *ni, char path[], size_t path_len, struct lookup_table *lookup_table, - struct sd_tree *tree, + struct sd_set *sd_set, ntfs_volume **ntfs_vol_p) { u32 attributes = ntfs_inode_get_attributes(ni); @@ -421,7 +425,7 @@ static int __build_dentry_tree_ntfs(struct dentry *dentry, ntfs_inode *ni, .path = path, .path_len = path_len, .lookup_table = lookup_table, - .tree = tree, + .sd_set = sd_set, .ntfs_vol_p = ntfs_vol_p, }; ret = ntfs_readdir(ni, &pos, &ctx, filldir); @@ -448,8 +452,12 @@ static int __build_dentry_tree_ntfs(struct dentry *dentry, ntfs_inode *ni, DACL_SECURITY_INFORMATION | SACL_SECURITY_INFORMATION, sd, sd_size, &sd_size); - dentry->security_id = tree_add_sd(tree, sd, sd_size); - return 0; + dentry->security_id = sd_set_add_sd(sd_set, sd, sd_size); + if (dentry->security_id == -1) { + ERROR("Could not allocate security ID"); + ret = WIMLIB_ERR_NOMEM; + } + return ret; } static int build_dentry_tree_ntfs(struct dentry *root_dentry, @@ -462,7 +470,7 @@ static int build_dentry_tree_ntfs(struct dentry *root_dentry, ntfs_volume *vol; ntfs_inode *root_ni; int ret = 0; - struct sd_tree tree; + struct sd_set tree; tree.sd = sd; tree.root = NULL; ntfs_volume **ntfs_vol_p = extra_arg; -- 2.43.0