From 00d7680491529e7fd95f5721bd01795a730fc440 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sun, 18 Oct 2015 19:39:30 -0500 Subject: [PATCH] Don't unnecessarily rebuild exported metadata resources - When image metadata is modified, release the descriptor for the backing resource (if any), since it can no longer be used - In write_metadata_resources(), allow copying metadata resources from a WIM file other than the one being written - Remove the 'modified' and 'dont_check_metadata_hash' fields which are no longer needed --- include/wimlib/blob_table.h | 10 +++---- include/wimlib/metadata.h | 40 +++++++++++++++++++++++----- src/add_image.c | 2 -- src/blob_table.c | 5 +++- src/export_image.c | 5 ---- src/extract.c | 1 - src/metadata_resource.c | 20 +++++--------- src/mount_image.c | 13 +++------ src/template.c | 2 +- src/update_image.c | 2 +- src/wim.c | 6 ++--- src/write.c | 53 +++++++++++++++++++++++-------------- 12 files changed, 90 insertions(+), 69 deletions(-) diff --git a/include/wimlib/blob_table.h b/include/wimlib/blob_table.h index c5295820..abc0c10f 100644 --- a/include/wimlib/blob_table.h +++ b/include/wimlib/blob_table.h @@ -152,13 +152,6 @@ struct blob_descriptor { u16 unique_size : 1; u16 will_be_in_output_wim : 1; - /* Set to 1 if this blob represents a metadata resource that has been - * changed. In such cases, the hash cannot be used to verify the data - * if the metadata resource is read again. (This could be avoided if we - * used separate fields for input/output checksum, but most blobs - * wouldn't need this.) */ - u16 dont_check_metadata_hash : 1; - u16 may_send_done_with_file : 1; /* Only used by wimlib_export_image() */ @@ -313,6 +306,9 @@ extern void blob_decrement_num_opened_fds(struct blob_descriptor *blob); #endif +extern void +blob_release_location(struct blob_descriptor *blob); + extern void free_blob_descriptor(struct blob_descriptor *blob); diff --git a/include/wimlib/metadata.h b/include/wimlib/metadata.h index 10cd414c..845d3954 100644 --- a/include/wimlib/metadata.h +++ b/include/wimlib/metadata.h @@ -1,6 +1,7 @@ #ifndef _WIMLIB_METADATA_H #define _WIMLIB_METADATA_H +#include "wimlib/blob_table.h" #include "wimlib/list.h" #include "wimlib/types.h" #include "wimlib/wim.h" @@ -18,7 +19,12 @@ struct wim_image_metadata { /* Pointer to the security data of the image. */ struct wim_security_data *security_data; - /* Pointer to the blob descriptor for this image's metadata resource */ + /* Pointer to the blob descriptor for this image's metadata resource. + * If this image metadata is sourced from a WIM file (as opposed to + * being created from scratch) and hasn't been modified from the version + * in that WIM file, then this blob descriptor's data corresponds to the + * WIM backing source. Otherwise, this blob descriptor is a dummy entry + * with blob_location==BLOB_NONEXISTENT. */ struct blob_descriptor *metadata_blob; /* Linked list of 'struct wim_inode's for this image. */ @@ -30,11 +36,6 @@ struct wim_image_metadata { * into the WIMStruct's blob table. This list is appended to when files * are scanned for inclusion in this WIM image. */ struct list_head unhashed_blobs; - - /* 1 iff the dentry tree has been modified from the original stored in - * the WIM file. If this is the case, the memory for the dentry tree - * should not be freed when switching to a different WIM image. */ - u8 modified : 1; }; /* Retrieve the metadata of the image in @wim currently selected with @@ -61,6 +62,33 @@ wim_get_current_security_data(WIMStruct *wim) return wim_get_current_image_metadata(wim)->security_data; } +/* Return true iff the specified image has been changed since being read from + * its backing file or has been created from scratch. */ +static inline bool +is_image_dirty(const struct wim_image_metadata *imd) +{ + /* The only possible values here are BLOB_NONEXISTENT and BLOB_IN_WIM */ + return imd->metadata_blob->blob_location == BLOB_NONEXISTENT; +} + +/* Return true iff the specified image is unchanged since being read from the + * specified backing WIM file. */ +static inline bool +is_image_unchanged_from_wim(const struct wim_image_metadata *imd, + const WIMStruct *wim) +{ + return !is_image_dirty(imd) && imd->metadata_blob->rdesc->wim == wim; +} + +/* Mark the metadata for the specified WIM image "dirty" following changes to + * the image's directory tree. This records that the metadata no longer matches + * the version in the WIM file (if any). */ +static inline void +mark_image_dirty(struct wim_image_metadata *imd) +{ + blob_release_location(imd->metadata_blob); +} + /* Iterate over each inode in a WIM image */ #define image_for_each_inode(inode, imd) \ hlist_for_each_entry(inode, &(imd)->inode_list, i_hlist_node) diff --git a/src/add_image.c b/src/add_image.c index 2a4a6f09..b4eb0a9e 100644 --- a/src/add_image.c +++ b/src/add_image.c @@ -49,7 +49,6 @@ add_empty_image_metadata(WIMStruct *wim) goto out; metadata_blob->refcnt = 1; - metadata_blob->unhashed = 1; metadata_blob->is_metadata = 1; /* Create empty security data (no security descriptors). */ @@ -66,7 +65,6 @@ add_empty_image_metadata(WIMStruct *wim) imd->root_dentry = NULL; imd->metadata_blob = metadata_blob; imd->security_data = sd; - imd->modified = 1; /* Append as next image index. */ ret = append_image_metadata(wim, imd); diff --git a/src/blob_table.c b/src/blob_table.c index 96290e8d..39193b42 100644 --- a/src/blob_table.c +++ b/src/blob_table.c @@ -162,7 +162,9 @@ out_free: return NULL; } -static void +/* Release a blob descriptor from its location, if any, and set its new location + * to BLOB_NONEXISTENT. */ +void blob_release_location(struct blob_descriptor *blob) { switch (blob->blob_location) { @@ -193,6 +195,7 @@ blob_release_location(struct blob_descriptor *blob) break; #endif } + blob->blob_location = BLOB_NONEXISTENT; } void diff --git a/src/export_image.c b/src/export_image.c index b7f4511e..ff20a73f 100644 --- a/src/export_image.c +++ b/src/export_image.c @@ -229,11 +229,6 @@ wimlib_export_image(WIMStruct *src_wim, if (ret) goto out_rollback; src_imd->refcnt++; - - /* Lock the metadata into memory. XXX: need better solution for - * this. */ - src_imd->modified = 1; - } /* Image export complete. Finish by setting any needed special metadata diff --git a/src/extract.c b/src/extract.c index 04d484f4..0d7ec687 100644 --- a/src/extract.c +++ b/src/extract.c @@ -1936,7 +1936,6 @@ wimlib_extract_image_from_pipe_with_progress(int pipe_fd, ret = read_metadata_resource(imd); if (ret) goto out_wimlib_free; - imd->modified = 1; } else { /* Metadata resource is not for the image being * extracted. Skip over it. */ diff --git a/src/metadata_resource.c b/src/metadata_resource.c index c0467ef6..4cd53032 100644 --- a/src/metadata_resource.c +++ b/src/metadata_resource.c @@ -73,6 +73,7 @@ read_metadata_resource(struct wim_image_metadata *imd) const struct blob_descriptor *metadata_blob; void *buf; int ret; + u8 hash[SHA1_HASH_SIZE]; struct wim_security_data *sd; struct wim_dentry *root; @@ -84,16 +85,12 @@ read_metadata_resource(struct wim_image_metadata *imd) return ret; /* Checksum the metadata resource. */ - if (!metadata_blob->dont_check_metadata_hash) { - u8 hash[SHA1_HASH_SIZE]; - - sha1_buffer(buf, metadata_blob->size, hash); - if (!hashes_equal(metadata_blob->hash, hash)) { - ERROR("Metadata resource is corrupted " - "(invalid SHA-1 message digest)!"); - ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; - goto out_free_buf; - } + sha1_buffer(buf, metadata_blob->size, hash); + if (!hashes_equal(metadata_blob->hash, hash)) { + ERROR("Metadata resource is corrupted " + "(invalid SHA-1 message digest)!"); + ret = WIMLIB_ERR_INVALID_METADATA_RESOURCE; + goto out_free_buf; } /* Parse the metadata resource. @@ -245,9 +242,6 @@ write_metadata_resource(WIMStruct *wim, int image, int write_resource_flags) imd->metadata_blob->hash, write_resource_flags); - /* Original checksum was overridden; set a flag so it isn't used. */ - imd->metadata_blob->dont_check_metadata_hash = 1; - FREE(buf); return ret; } diff --git a/src/mount_image.c b/src/mount_image.c index 83f176a1..239184aa 100644 --- a/src/mount_image.c +++ b/src/mount_image.c @@ -1051,7 +1051,6 @@ renew_current_image(struct wimfs_context *ctx) goto err_put_replace_imd; new_blob->refcnt = 1; - new_blob->unhashed = 1; new_blob->is_metadata = 1; /* Make the image being moved available at a new index. Increments the @@ -1118,6 +1117,7 @@ commit_image(struct wimfs_context *ctx, int unmount_flags, mqd_t mq) INIT_LIST_HEAD(&ctx->orig_blob_list); delete_empty_blobs(ctx); xml_update_image_info(ctx->wim, ctx->wim->current_image); + mark_image_dirty(wim_get_current_image_metadata(ctx->wim)); write_flags = 0; @@ -2129,10 +2129,9 @@ wimlib_mount_image(WIMStruct *wim, int image, const char *dir, /* Get the metadata for the image to mount. */ imd = wim_get_current_image_metadata(wim); - if (imd->modified) { - /* To avoid complicating things, we don't support mounting - * images to which in-memory modifications have already been - * made. */ + /* To avoid complicating things, we don't support mounting images to + * which in-memory modifications have already been made. */ + if (is_image_dirty(imd)) { ERROR("Cannot mount a modified WIM image!"); return WIMLIB_ERR_INVALID_PARAM; } @@ -2208,10 +2207,6 @@ wimlib_mount_image(WIMStruct *wim, int image, const char *dir, * the file descriptor arrays */ prepare_inodes(&ctx); - /* If a read-write mount, mark the image as modified. */ - if (mount_flags & WIMLIB_MOUNT_FLAG_READWRITE) - imd->modified = 1; - /* Save the absolute path to the mountpoint directory. */ ctx.mountpoint_abspath = realpath(dir, NULL); if (ctx.mountpoint_abspath) diff --git a/src/template.c b/src/template.c index 1a9a2069..e5413e5f 100644 --- a/src/template.c +++ b/src/template.c @@ -172,7 +172,7 @@ wimlib_reference_template_image(WIMStruct *wim, int new_image, return WIMLIB_ERR_METADATA_NOT_FOUND; new_imd = wim->image_metadata[new_image - 1]; - if (!new_imd->modified) + if (!is_image_dirty(new_imd)) return WIMLIB_ERR_INVALID_PARAM; ret = select_wim_image(template_wim, template_image); diff --git a/src/update_image.c b/src/update_image.c index 2b2c107b..49492a3f 100644 --- a/src/update_image.c +++ b/src/update_image.c @@ -1425,7 +1425,7 @@ wimlib_update_image(WIMStruct *wim, if (ret) goto out_free_cmds_copy; - wim->image_metadata[image - 1]->modified = 1; + mark_image_dirty(wim->image_metadata[image - 1]); /* Statistics about the WIM image, such as the numbers of files and * directories, may have changed. Call xml_update_image_info() to diff --git a/src/wim.c b/src/wim.c index 1b66aaf7..e77d7170 100644 --- a/src/wim.c +++ b/src/wim.c @@ -325,11 +325,11 @@ select_wim_image(WIMStruct *wim, int image) return WIMLIB_ERR_METADATA_NOT_FOUND; /* If a valid image is currently selected, its metadata can be freed if - * it has not been modified. */ + * it is not dirty and no other WIMStructs may have it selected. */ deselect_current_wim_image(wim); wim->current_image = image; imd = wim_get_current_image_metadata(wim); - if (imd->root_dentry || imd->modified) { + if (imd->root_dentry || is_image_dirty(imd)) { ret = 0; } else { ret = read_metadata_resource(imd); @@ -346,7 +346,7 @@ deselect_current_wim_image(WIMStruct *wim) if (wim->current_image == WIMLIB_NO_IMAGE) return; imd = wim_get_current_image_metadata(wim); - if (!imd->modified) { + if (!is_image_dirty(imd) && imd->refcnt == 1) { wimlib_assert(list_empty(&imd->unhashed_blobs)); destroy_image_metadata(imd, NULL, false); } diff --git a/src/write.c b/src/write.c index a649dfa4..0a899945 100644 --- a/src/write.c +++ b/src/write.c @@ -2179,20 +2179,28 @@ write_metadata_resources(WIMStruct *wim, int image, int write_flags) struct wim_image_metadata *imd; imd = wim->image_metadata[i - 1]; - /* Build a new metadata resource only if image was modified from - * the original (or was newly added). Otherwise just copy the - * existing one. */ - if (imd->modified) { + if (is_image_dirty(imd)) { + /* The image was modified from the original, or was + * newly added, so we have to build and write a new + * metadata resource. */ ret = write_metadata_resource(wim, i, write_resource_flags); - } else if (write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT) { - /* For compactions, existing metadata resources are - * written along with the existing file resources. */ - ret = 0; - } else if (write_flags & WIMLIB_WRITE_FLAG_APPEND) { - blob_set_out_reshdr_for_reuse(imd->metadata_blob); + } else if (is_image_unchanged_from_wim(imd, wim) && + (write_flags & (WIMLIB_WRITE_FLAG_UNSAFE_COMPACT | + WIMLIB_WRITE_FLAG_APPEND))) + { + /* The metadata resource is already in the WIM file. + * For appends, we don't need to write it at all. For + * compactions, we re-write existing metadata resources + * along with the existing file resources, not here. */ + if (write_flags & WIMLIB_WRITE_FLAG_APPEND) + blob_set_out_reshdr_for_reuse(imd->metadata_blob); ret = 0; } else { + /* The metadata resource is in a WIM file other than the + * one being written to. We need to rewrite it, + * possibly compressed differently; but rebuilding the + * metadata itself isn't necessary. */ ret = write_wim_resource(imd->metadata_blob, &wim->out_fd, wim->out_compression_type, @@ -2880,11 +2888,16 @@ wimlib_write_to_fd(WIMStruct *wim, int fd, return write_standalone_wim(wim, &fd, image, write_flags, num_threads); } +/* Have there been any changes to images in the specified WIM, including updates + * as well as deletions and additions of entire images, but excluding changes to + * the XML document? */ static bool -any_images_modified(WIMStruct *wim) +any_images_changed(WIMStruct *wim) { + if (wim->image_deletion_occurred) + return true; for (int i = 0; i < wim->hdr.image_count; i++) - if (wim->image_metadata[i]->modified) + if (!is_image_unchanged_from_wim(wim->image_metadata[i], wim)) return true; return false; } @@ -3045,7 +3058,7 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads) * with the file resources. */ for (int i = 0; i < wim->hdr.image_count; i++) { struct wim_image_metadata *imd = wim->image_metadata[i]; - if (!imd->modified) { + if (is_image_unchanged_from_wim(imd, wim)) { fully_reference_blob_for_write(imd->metadata_blob, &blob_list); } @@ -3083,13 +3096,13 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads) * don't allow any file and metadata resources to appear without * returning WIMLIB_ERR_RESOURCE_ORDER (due to the fact that we * would otherwise overwrite these resources). */ - if (!wim->image_deletion_occurred && !any_images_modified(wim)) { - /* If no images have been modified and no images have - * been deleted, a new blob table does not need to be - * written. We shall write the new XML data and - * optional integrity table immediately after the blob - * table. Note that this may overwrite an existing - * integrity table. */ + if (!any_images_changed(wim)) { + /* If no images have been modified, added, or deleted, + * then a new blob table does not need to be written. + * We shall write the new XML data and optional + * integrity table immediately after the blob table. + * Note that this may overwrite an existing integrity + * table. */ old_wim_end = old_blob_table_end; write_flags |= WIMLIB_WRITE_FLAG_NO_NEW_BLOBS; } else if (wim_has_integrity_table(wim)) { -- 2.43.0