From: Eric Biggers Date: Wed, 25 Mar 2015 01:08:13 +0000 (-0500) Subject: Adjust handling of blob reference counts X-Git-Tag: v1.8.1~66 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=15ded30b9bcce7ac0388dec90dd4d6ef3d6d1596 Adjust handling of blob reference counts --- diff --git a/include/wimlib/blob_table.h b/include/wimlib/blob_table.h index c07acfad..eca035aa 100644 --- a/include/wimlib/blob_table.h +++ b/include/wimlib/blob_table.h @@ -279,12 +279,15 @@ extern struct blob_descriptor * new_blob_descriptor(void) _malloc_attribute; extern struct blob_descriptor * -clone_blob_descriptor(const struct blob_descriptor *blob) - _malloc_attribute; +clone_blob_descriptor(const struct blob_descriptor *blob) _malloc_attribute; extern void -blob_decrement_refcnt(struct blob_descriptor *blob, - struct blob_table *table); +blob_decrement_refcnt(struct blob_descriptor *blob, struct blob_table *table); + +extern void +blob_subtract_refcnt(struct blob_descriptor *blob, struct blob_table *table, + u32 count); + #ifdef WITH_FUSE extern void blob_decrement_num_opened_fds(struct blob_descriptor *blob); @@ -376,6 +379,11 @@ extern struct blob_descriptor * new_blob_from_data_buffer(const void *buffer, size_t size, struct blob_table *blob_table); +extern struct blob_descriptor * +after_blob_hashed(struct blob_descriptor *blob, + struct blob_descriptor **back_ptr, + struct blob_table *blob_table); + extern int hash_unhashed_blob(struct blob_descriptor *blob, struct blob_table *blob_table, diff --git a/include/wimlib/inode.h b/include/wimlib/inode.h index 1818d108..38f74a9f 100644 --- a/include/wimlib/inode.h +++ b/include/wimlib/inode.h @@ -350,7 +350,19 @@ extern struct wim_inode_stream * inode_add_stream(struct wim_inode *inode, int stream_type, const utf16lechar *stream_name, struct blob_descriptor *blob); -extern struct wim_inode_stream * +extern void +inode_replace_stream_blob(struct wim_inode *inode, + struct wim_inode_stream *strm, + struct blob_descriptor *new_blob, + struct blob_table *blob_table); + +extern bool +inode_replace_stream_data(struct wim_inode *inode, + struct wim_inode_stream *strm, + const void *data, size_t size, + struct blob_table *blob_table); + +extern bool inode_add_stream_with_data(struct wim_inode *inode, int stream_type, const utf16lechar *stream_name, const void *data, size_t size, @@ -367,13 +379,6 @@ stream_blob_resolved(const struct wim_inode_stream *strm) return strm->_stream_blob; } -static inline void -stream_set_blob(struct wim_inode_stream *strm, struct blob_descriptor *blob) -{ - strm->_stream_blob = blob; - strm->stream_resolved = 1; -} - static inline bool stream_is_named(const struct wim_inode_stream *strm) { diff --git a/src/add_image.c b/src/add_image.c index cff98203..d3778295 100644 --- a/src/add_image.c +++ b/src/add_image.c @@ -48,6 +48,7 @@ add_empty_image_metadata(WIMStruct *wim) if (!metadata_blob) goto out; + metadata_blob->refcnt = 1; metadata_blob->flags = WIM_RESHDR_FLAG_METADATA; metadata_blob->unhashed = 1; diff --git a/src/blob_table.c b/src/blob_table.c index 84835b5a..6cc0a98d 100644 --- a/src/blob_table.c +++ b/src/blob_table.c @@ -105,8 +105,7 @@ new_blob_descriptor(void) if (blob == NULL) return NULL; - blob->refcnt = 1; - + /* blob->refcnt = 0 */ /* blob->blob_location = BLOB_NONEXISTENT */ BUILD_BUG_ON(BLOB_NONEXISTENT != 0); @@ -263,10 +262,21 @@ finalize_blob(struct blob_descriptor *blob) void blob_decrement_refcnt(struct blob_descriptor *blob, struct blob_table *table) { - if (unlikely(blob->refcnt == 0)) /* See comment above */ + blob_subtract_refcnt(blob, table, 1); +} + +void +blob_subtract_refcnt(struct blob_descriptor *blob, struct blob_table *table, + u32 count) +{ + if (unlikely(blob->refcnt < count)) { + blob->refcnt = 0; /* See comment above */ return; + } + + blob->refcnt -= count; - if (--blob->refcnt != 0) + if (blob->refcnt != 0) return; if (blob->unhashed) { @@ -1255,7 +1265,6 @@ new_blob_from_data_buffer(const void *buffer, size_t size, if (existing_blob) { wimlib_assert(existing_blob->size == size); blob = existing_blob; - blob->refcnt++; } else { void *buffer_copy; blob = new_blob_descriptor(); @@ -1275,6 +1284,36 @@ new_blob_from_data_buffer(const void *buffer, size_t size, return blob; } +struct blob_descriptor * +after_blob_hashed(struct blob_descriptor *blob, + struct blob_descriptor **back_ptr, + struct blob_table *blob_table) +{ + struct blob_descriptor *duplicate_blob; + + list_del(&blob->unhashed_list); + blob->unhashed = 0; + + /* Look for a duplicate blob */ + duplicate_blob = lookup_blob(blob_table, blob->hash); + if (duplicate_blob) { + /* We have a duplicate blob. Transfer the reference counts from + * this blob to the duplicate and update the reference to this + * blob (from a stream) to point to the duplicate. The caller + * is responsible for freeing @blob if needed. */ + wimlib_assert(duplicate_blob->size == blob->size); + duplicate_blob->refcnt += blob->refcnt; + blob->refcnt = 0; + *back_ptr = duplicate_blob; + return duplicate_blob; + } else { + /* No duplicate blob, so we need to insert this blob into the + * blob table and treat it as a hashed blob. */ + blob_table_insert(blob_table, blob); + return blob; + } +} + /* * Calculate the SHA-1 message digest of a blob and move its descriptor from the * list of unhashed blobs to the blob table, possibly joining it with an @@ -1295,42 +1334,16 @@ int hash_unhashed_blob(struct blob_descriptor *blob, struct blob_table *blob_table, struct blob_descriptor **blob_ret) { - int ret; - struct blob_descriptor *duplicate_blob; struct blob_descriptor **back_ptr; + int ret; - wimlib_assert(blob->unhashed); - - /* back_ptr must be saved because @back_inode and @back_stream_id are in - * union with the SHA-1 message digest and will no longer be valid once - * the SHA-1 has been calculated. */ back_ptr = retrieve_pointer_to_unhashed_blob(blob); ret = sha1_blob(blob); if (ret) return ret; - list_del(&blob->unhashed_list); - blob->unhashed = 0; - - /* Look for a duplicate blob */ - duplicate_blob = lookup_blob(blob_table, blob->hash); - if (duplicate_blob) { - /* We have a duplicate blob. Transfer the reference counts from - * this blob to the duplicate and update the reference to this - * blob (from an stream) to point to the duplicate. The caller - * is responsible for freeing @blob if needed. */ - wimlib_assert(duplicate_blob->size == blob->size); - duplicate_blob->refcnt += blob->refcnt; - blob->refcnt = 0; - *back_ptr = duplicate_blob; - blob = duplicate_blob; - } else { - /* No duplicate blob, so we need to insert this blob into the - * blob table and treat it as a hashed blob. */ - blob_table_insert(blob_table, blob); - } - *blob_ret = blob; + *blob_ret = after_blob_hashed(blob, back_ptr, blob_table); return 0; } diff --git a/src/inode.c b/src/inode.c index e6da5095..62cab6ed 100644 --- a/src/inode.c +++ b/src/inode.c @@ -201,6 +201,52 @@ inode_get_unnamed_stream(const struct wim_inode *inode, int stream_type) return NULL; } + +static void +inode_set_stream_blob(struct wim_inode *inode, struct wim_inode_stream *strm, + struct blob_descriptor *new_blob) +{ + strm->_stream_blob = new_blob; + strm->stream_resolved = 1; + if (new_blob) + new_blob->refcnt += inode->i_nlink; +} + +static void +inode_unset_stream_blob(struct wim_inode *inode, struct wim_inode_stream *strm, + struct blob_table *blob_table) +{ + struct blob_descriptor *old_blob; + + old_blob = stream_blob(strm, blob_table); + if (old_blob) + blob_subtract_refcnt(old_blob, blob_table, inode->i_nlink); + strm->_stream_blob = NULL; + strm->stream_resolved = 1; +} + +/* + * Replace the blob associated with the specified stream. + * + * @inode + * The inode containing @strm + * @strm + * The stream whose data needs to be replaced + * @new_blob + * The new blob descriptor to assign + * @blob_table + * Pointer to the blob table in which data blobs are being indexed + */ +void +inode_replace_stream_blob(struct wim_inode *inode, + struct wim_inode_stream *strm, + struct blob_descriptor *new_blob, + struct blob_table *blob_table) +{ + inode_unset_stream_blob(inode, strm, blob_table); + inode_set_stream_blob(inode, strm, new_blob); +} + /* * Add a new stream to the specified inode. * @@ -266,9 +312,10 @@ inode_add_stream(struct wim_inode *inode, int stream_type, if (!new_strm->stream_name) return NULL; } + new_strm->stream_id = inode->i_next_stream_id++; - stream_set_blob(new_strm, blob); + inode_set_stream_blob(inode, new_strm, blob); inode->i_num_streams++; @@ -276,10 +323,39 @@ inode_add_stream(struct wim_inode *inode, int stream_type, } /* - * Create a new blob descriptor for the specified data buffer or use an existing - * blob descriptor in @blob_table for an identical blob, then add a stream of - * the specified type and name to the specified inode and set it to initially - * reference the blob. + * Replace the data of the specified stream. + * + * @inode + * The inode containing @strm + * @strm + * The stream whose data needs to be replaced + * @data + * The buffer of data to assign to the stream + * @size + * Size of the @data buffer, in bytes + * @blob_table + * Pointer to the blob table in which data blobs are being indexed + * + * Returns true if successful; false with errno set if unsuccessful. + */ +bool +inode_replace_stream_data(struct wim_inode *inode, + struct wim_inode_stream *strm, + const void *data, size_t size, + struct blob_table *blob_table) +{ + struct blob_descriptor *new_blob; + + new_blob = new_blob_from_data_buffer(data, size, blob_table); + if (!new_blob) + return false; + + inode_replace_stream_blob(inode, strm, new_blob, blob_table); + return true; +} + +/* + * Add a new stream to the specified inode and assign it the specified data. * * @inode * The inode to which to add the stream @@ -289,54 +365,55 @@ inode_add_stream(struct wim_inode *inode, int stream_type, * The name of the stream being added as a null-terminated UTF-16LE string, * or NO_STREAM_NAME if the stream is unnamed * @data - * The uncompressed data of the blob + * The buffer of data to assign to the new stream * @size - * The size, in bytes, of the blob data + * Size of the @data buffer, in bytes * @blob_table - * Pointer to the blob table in which the blob needs to be indexed. + * Pointer to the blob table in which data blobs are being indexed * - * Returns a pointer to the new stream if successfully added, otherwise NULL - * with errno set. + * Returns true if successful; false with errno set if unsuccessful. */ -struct wim_inode_stream * +bool inode_add_stream_with_data(struct wim_inode *inode, int stream_type, const utf16lechar *stream_name, const void *data, size_t size, struct blob_table *blob_table) { - struct blob_descriptor *blob; struct wim_inode_stream *strm; + struct blob_descriptor *blob; - blob = new_blob_from_data_buffer(data, size, blob_table); - if (!blob) - return NULL; - strm = inode_add_stream(inode, stream_type, stream_name, blob); + strm = inode_add_stream(inode, stream_type, stream_name, NULL); if (!strm) - blob_decrement_refcnt(blob, blob_table); - return strm; + return false; + + blob = new_blob_from_data_buffer(data, size, blob_table); + if (!blob) { + inode_remove_stream(inode, strm, blob_table); + return false; + } + + inode_set_stream_blob(inode, strm, blob); + return true; } /* - * Remove a stream from the specified inode and release the reference to the - * blob descriptor, if any. + * Remove a stream from the specified inode. + * + * This handles releasing the references to the blob descriptor, if any. */ void inode_remove_stream(struct wim_inode *inode, struct wim_inode_stream *strm, struct blob_table *blob_table) { - struct blob_descriptor *blob; unsigned idx = strm - inode->i_streams; wimlib_assert(idx < inode->i_num_streams); - blob = stream_blob(strm, blob_table); - if (blob) - blob_decrement_refcnt(blob, blob_table); + inode_unset_stream_blob(inode, strm, blob_table); destroy_stream(strm); - memmove(&inode->i_streams[idx], - &inode->i_streams[idx + 1], + memmove(strm, strm + 1, (inode->i_num_streams - idx - 1) * sizeof(inode->i_streams[0])); inode->i_num_streams--; } @@ -400,9 +477,12 @@ inode_resolve_streams(struct wim_inode *inode, struct blob_table *table, blobs[i] = blob; } - for (unsigned i = 0; i < inode->i_num_streams; i++) - if (!inode->i_streams[i].stream_resolved) - stream_set_blob(&inode->i_streams[i], blobs[i]); + for (unsigned i = 0; i < inode->i_num_streams; i++) { + if (!inode->i_streams[i].stream_resolved) { + inode->i_streams[i]._stream_blob = blobs[i]; + inode->i_streams[i].stream_resolved = 1; + } + } return 0; } diff --git a/src/mount_image.c b/src/mount_image.c index 124295ce..7fe8bb53 100644 --- a/src/mount_image.c +++ b/src/mount_image.c @@ -757,15 +757,9 @@ extract_blob_to_staging_dir(struct wim_inode *inode, filedes_init(&fd->f_staging_fd, raw_fd); } - /* Remove the appropriate count of file descriptors and stream - * references from the old blob. */ - if (old_blob) { + if (old_blob) old_blob->num_opened_fds -= new_blob->num_opened_fds; - for (u32 i = 0; i < inode->i_nlink; i++) - blob_decrement_refcnt(old_blob, ctx->wim->blob_table); - } - new_blob->refcnt = inode->i_nlink; new_blob->blob_location = BLOB_IN_STAGING_FILE; new_blob->staging_file_name = staging_file_name; new_blob->staging_dir_fd = ctx->staging_dir_fd; @@ -773,7 +767,7 @@ extract_blob_to_staging_dir(struct wim_inode *inode, prepare_unhashed_blob(new_blob, inode, strm->stream_id, &wim_get_current_image_metadata(ctx->wim)->unhashed_blobs); - stream_set_blob(strm, new_blob); + inode_replace_stream_blob(inode, strm, new_blob, ctx->wim->blob_table); return 0; out_revert_fd_changes: @@ -947,11 +941,8 @@ release_extra_refcnts(struct wimfs_context *ctx) struct blob_table *blob_table = ctx->wim->blob_table; struct blob_descriptor *blob, *tmp; - list_for_each_entry_safe(blob, tmp, list, orig_blob_list) { - u32 n = blob->out_refcnt; - while (n--) - blob_decrement_refcnt(blob, blob_table); - } + list_for_each_entry_safe(blob, tmp, list, orig_blob_list) + blob_subtract_refcnt(blob, blob_table, blob->out_refcnt); } /* Delete the 'struct blob_descriptor' for any stream that was modified @@ -1027,6 +1018,7 @@ renew_current_image(struct wimfs_context *ctx) new_blob = new_blob_descriptor(); if (!new_blob) goto err_put_replace_imd; + new_blob->refcnt = 1; new_blob->flags = WIM_RESHDR_FLAG_METADATA; new_blob->unhashed = 1; @@ -1808,7 +1800,7 @@ wimfs_setxattr(const char *path, const char *name, { struct wimfs_context *ctx = wimfs_get_context(); struct wim_inode *inode; - struct wim_inode_stream *existing_strm; + struct wim_inode_stream *strm; const utf16lechar *uname; int ret; @@ -1850,8 +1842,8 @@ wimfs_setxattr(const char *path, const char *name, if (ret) return -errno; - existing_strm = inode_get_stream(inode, STREAM_TYPE_DATA, uname); - if (existing_strm) { + strm = inode_get_stream(inode, STREAM_TYPE_DATA, uname); + if (strm) { ret = -EEXIST; if (flags & XATTR_CREATE) goto out_put_uname; @@ -1861,14 +1853,22 @@ wimfs_setxattr(const char *path, const char *name, goto out_put_uname; } - if (!inode_add_stream_with_data(inode, STREAM_TYPE_DATA, uname, - value, size, ctx->wim->blob_table)) - { - ret = -errno; - goto out_put_uname; + if (strm) { + if (!inode_replace_stream_data(inode, strm, value, size, + ctx->wim->blob_table)) + { + ret = -errno; + goto out_put_uname; + } + } else { + if (!inode_add_stream_with_data(inode, STREAM_TYPE_DATA, uname, + value, size, ctx->wim->blob_table)) + { + ret = -errno; + goto out_put_uname; + } } - if (existing_strm) - inode_remove_stream(inode, existing_strm, ctx->wim->blob_table); + ret = 0; out_put_uname: tstr_put_utf16le(uname); diff --git a/src/template.c b/src/template.c index 57eaca71..1fd6d93d 100644 --- a/src/template.c +++ b/src/template.c @@ -33,11 +33,24 @@ #include "wimlib/metadata.h" #include "wimlib/util.h" +static u64 +stream_size(const struct wim_inode_stream *strm, + const struct blob_table *blob_table) +{ + const struct blob_descriptor *blob; + + blob = stream_blob(strm, blob_table); + if (!blob) + return 0; + return blob->size; +} + /* Returns %true iff the metadata of @inode and @template_inode are reasonably * consistent with them being the same, unmodified file. */ static bool inode_metadata_consistent(const struct wim_inode *inode, const struct wim_inode *template_inode, + const struct blob_table *blob_table, const struct blob_table *template_blob_table) { /* Must have exact same creation time and last write time. */ @@ -50,106 +63,60 @@ inode_metadata_consistent(const struct wim_inode *inode, if (inode->i_last_access_time < template_inode->i_last_access_time) return false; - /* Must have same number of streams. */ - if (inode->i_num_streams != template_inode->i_num_streams) - return false; - + /* All stream sizes must match. */ for (unsigned i = 0; i < inode->i_num_streams; i++) { - const struct blob_descriptor *blob, *template_blob; + const struct wim_inode_stream *strm, *template_strm; - /* If the streams for the inode are for some reason not - * resolved, then the hashes are already available and the point - * of this function is defeated. */ - if (!inode->i_streams[i].stream_resolved) + strm = &inode->i_streams[i]; + template_strm = inode_get_stream(template_inode, + strm->stream_type, + strm->stream_name); + if (!template_strm) return false; - blob = stream_blob_resolved(&inode->i_streams[i]); - template_blob = stream_blob(&template_inode->i_streams[i], - template_blob_table); - - /* Compare blob sizes. */ - if (blob && template_blob) { - if (blob->size != template_blob->size) - return false; - - /* If hash happens to be available, compare with template. */ - if (!blob->unhashed && !template_blob->unhashed && - !hashes_equal(blob->hash, template_blob->hash)) - return false; - - } else if (blob && blob->size) { + if (stream_size(strm, blob_table) != + stream_size(template_strm, template_blob_table)) return false; - } else if (template_blob && template_blob->size) { - return false; - } } - /* All right, barring a full checksum and given that the inodes share a - * path and the user isn't trying to trick us, these inodes most likely - * refer to the same file. */ return true; } /** * Given an inode @inode that has been determined to be "the same" as another - * inode @template_inode in either the same WIM or another WIM, retrieve some - * useful information (e.g. checksums) from @template_inode. - * - * This assumes that the streams for @inode have been resolved (to point - * directly to the appropriate `struct blob_descriptor's) but do not necessarily - * have checksum information filled in. + * inode @template_inode in either the same WIM or another WIM, copy stream + * checksums from @template_inode to @inode. */ -static int +static void inode_copy_checksums(struct wim_inode *inode, struct wim_inode *template_inode, - WIMStruct *wim, - WIMStruct *template_wim) + struct blob_table *blob_table, + struct blob_table *template_blob_table) { for (unsigned i = 0; i < inode->i_num_streams; i++) { - struct blob_descriptor *blob, *template_blob; - struct blob_descriptor *replace_blob; + const struct wim_inode_stream *strm, *template_strm; + struct blob_descriptor *blob, *template_blob, **back_ptr; - blob = stream_blob_resolved(&inode->i_streams[i]); - template_blob = stream_blob(&template_inode->i_streams[i], - template_wim->blob_table); + strm = &inode->i_streams[i]; + template_strm = inode_get_stream(template_inode, + strm->stream_type, + strm->stream_name); - /* Only take action if both entries exist, the entry for @inode - * has no checksum calculated, but the entry for @template_inode - * does. */ - if (blob == NULL || template_blob == NULL || + blob = stream_blob(strm, blob_table); + template_blob = stream_blob(template_strm, template_blob_table); + + /* To copy hashes: both blobs must exist, the blob for @inode + * must be unhashed, and the blob for @template_inode must be + * hashed. */ + if (!blob || !template_blob || !blob->unhashed || template_blob->unhashed) continue; - wimlib_assert(blob->refcnt == inode->i_nlink); - - /* If the WIM of the template image is the same as the WIM of - * the new image, then @template_blob can be used directly. - * - * Otherwise, look for a blob with the same hash in the WIM of - * the new image. If found, use it; otherwise re-use the - * blob descriptor being discarded, filling in the hash. */ - - if (wim == template_wim) - replace_blob = template_blob; - else - replace_blob = lookup_blob(wim->blob_table, - template_blob->hash); - - list_del(&blob->unhashed_list); - if (replace_blob) { + back_ptr = retrieve_pointer_to_unhashed_blob(blob); + copy_hash(blob->hash, template_blob->hash); + if (after_blob_hashed(blob, back_ptr, blob_table) != blob) free_blob_descriptor(blob); - } else { - copy_hash(blob->hash, template_blob->hash); - blob->unhashed = 0; - blob_table_insert(wim->blob_table, blob); - blob->refcnt = 0; - replace_blob = blob; - } - - stream_set_blob(&inode->i_streams[i], replace_blob); - replace_blob->refcnt += inode->i_nlink; } - return 0; } struct reference_template_args { @@ -184,17 +151,17 @@ dentry_reference_template(struct wim_dentry *dentry, void *_args) inode = dentry->d_inode; template_inode = template_dentry->d_inode; - if (inode_metadata_consistent(inode, template_inode, - template_wim->blob_table)) { - /*DEBUG("\"%"TS"\": No change detected", dentry->_full_path);*/ - ret = inode_copy_checksums(inode, template_inode, - wim, template_wim); + if (inode_metadata_consistent(inode, template_inode, wim->blob_table, + template_wim->blob_table)) + { + DEBUG("\"%"TS"\": No change detected", dentry->_full_path); + inode_copy_checksums(inode, template_inode, wim->blob_table, + template_wim->blob_table); inode->i_visited = 1; } else { DEBUG("\"%"TS"\": change detected!", dentry->_full_path); - ret = 0; } - return ret; + return 0; } /* API function documented in wimlib.h */