From: Eric Biggers Date: Mon, 19 Aug 2013 05:14:29 +0000 (-0500) Subject: join.c, write.c: Optimize WIMLIB_ALL_IMAGES writes (and joins) X-Git-Tag: v1.5.0~30 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=0e8ba68f2bd5e3f844611b93b6335e0a24f5c327 join.c, write.c: Optimize WIMLIB_ALL_IMAGES writes (and joins) --- diff --git a/include/wimlib.h b/include/wimlib.h index 3d8b3177..40d062aa 100644 --- a/include/wimlib.h +++ b/include/wimlib.h @@ -2267,8 +2267,9 @@ wimlib_iterate_lookup_table(WIMStruct *wim, int flags, * parts have the same GUID and compression type. * * Note: wimlib_export_image() can provide similar functionality to - * wimlib_join(), since it is possible to export all images from a split WIM. - * Actually, wimlib_join() currently calls wimlib_export_image internally. + * wimlib_join(), since it is possible to export all images from a split WIM + * into a new ::WIMStruct, then write it. However, wimlib_join() may have + * better performance than this method. */ extern int wimlib_join(const wimlib_tchar * const *swms, @@ -3170,8 +3171,6 @@ wimlib_update_image(WIMStruct *wim, * with @p wim, or some file resources in @p wim refer to files in the * outside filesystem, and a read error occurred when reading one of these * files. - * @retval ::WIMLIB_ERR_SPLIT_UNSUPPORTED - * @p wim is part of a split WIM, not a standalone WIM. * @retval ::WIMLIB_ERR_WRITE * An error occurred when trying to write data to the new WIM file. */ diff --git a/src/join.c b/src/join.c index d606b7be..2157697d 100644 --- a/src/join.c +++ b/src/join.c @@ -29,6 +29,7 @@ #include "wimlib.h" #include "wimlib/types.h" +#include "wimlib/swm.h" #include "wimlib/util.h" #include "wimlib/wim.h" @@ -45,9 +46,8 @@ wimlib_join(const tchar * const *swm_names, unsigned i; unsigned j; WIMStruct *swm0; - WIMStruct **swms; + WIMStruct **additional_swms; unsigned num_additional_swms; - WIMStruct *wim; swm_open_flags |= WIMLIB_OPEN_FLAG_SPLIT_OK; @@ -55,8 +55,8 @@ wimlib_join(const tchar * const *swm_names, return WIMLIB_ERR_INVALID_PARAM; num_additional_swms = num_swms - 1; - swms = CALLOC(num_additional_swms, sizeof(swms[0])); - if (!swms) + additional_swms = CALLOC(num_additional_swms, sizeof(additional_swms[0])); + if (!additional_swms) return WIMLIB_ERR_NOMEM; swm0 = NULL; @@ -70,7 +70,7 @@ wimlib_join(const tchar * const *swm_names, if (swm->hdr.part_number == 1 && swm0 == NULL) swm0 = swm; else - swms[j++] = swm; + additional_swms[j++] = swm; } if (!swm0) { @@ -78,39 +78,18 @@ wimlib_join(const tchar * const *swm_names, goto out_free_swms; } - ret = wimlib_create_new_wim(swm0->compression_type, &wim); + ret = verify_swm_set(swm0, additional_swms, num_additional_swms); if (ret) goto out_free_swms; - ret = wimlib_export_image(swm0, WIMLIB_ALL_IMAGES, wim, NULL, NULL, 0, - swms, num_additional_swms, progress_func); - if (ret) - goto out_free_wim; - - wim->hdr.flags |= swm0->hdr.flags & (WIM_HDR_FLAG_RP_FIX | - WIM_HDR_FLAG_READONLY); - if (!(wim_write_flags & (WIMLIB_WRITE_FLAG_CHECK_INTEGRITY | - WIMLIB_WRITE_FLAG_NO_CHECK_INTEGRITY))) - { - if (wim_has_integrity_table(swm0)) - wim_write_flags |= WIMLIB_WRITE_FLAG_CHECK_INTEGRITY; - } - if (!(wim_write_flags & (WIMLIB_WRITE_FLAG_PIPABLE | - WIMLIB_WRITE_FLAG_NOT_PIPABLE))) - { - if (wim_is_pipable(swm0)) - wim_write_flags |= WIMLIB_WRITE_FLAG_PIPABLE; - } - + merge_lookup_tables(swm0, additional_swms, num_additional_swms); - ret = wimlib_write(wim, output_path, WIMLIB_ALL_IMAGES, + ret = wimlib_write(swm0, output_path, WIMLIB_ALL_IMAGES, wim_write_flags, 1, progress_func); -out_free_wim: - wimlib_free(wim); out_free_swms: for (i = 0; i < num_additional_swms; i++) - wimlib_free(swms[i]); - FREE(swms); + wimlib_free(additional_swms[i]); + FREE(additional_swms); wimlib_free(swm0); return ret; } diff --git a/src/write.c b/src/write.c index e4a0f3ad..939fe459 100644 --- a/src/write.c +++ b/src/write.c @@ -1817,23 +1817,40 @@ struct find_streams_ctx { struct stream_size_table stream_size_tab; }; +static void +lte_reference_for_write(struct wim_lookup_table_entry *lte, + struct find_streams_ctx *ctx, + unsigned nref) +{ + if (lte->out_refcnt == 0) { + if (lte->unhashed) + stream_size_table_insert(lte, &ctx->stream_size_tab); + list_add_tail(<e->write_streams_list, &ctx->stream_list); + } + lte->out_refcnt += nref; +} + +static int +do_lte_reference_for_write(struct wim_lookup_table_entry *lte, void *_ctx) +{ + struct find_streams_ctx *ctx = _ctx; + lte->out_refcnt = 0; + lte_reference_for_write(lte, ctx, lte->refcnt); + return 0; +} + static void inode_find_streams_to_write(struct wim_inode *inode, struct wim_lookup_table *table, - struct list_head *stream_list, - struct stream_size_table *tab) + struct find_streams_ctx *ctx) { struct wim_lookup_table_entry *lte; - for (unsigned i = 0; i <= inode->i_num_ads; i++) { + unsigned i; + + for (i = 0; i <= inode->i_num_ads; i++) { lte = inode_stream_lte(inode, i, table); - if (lte) { - if (lte->out_refcnt == 0) { - if (lte->unhashed) - stream_size_table_insert(lte, tab); - list_add_tail(<e->write_streams_list, stream_list); - } - lte->out_refcnt += inode->i_nlink; - } + if (lte) + lte_reference_for_write(lte, ctx, inode->i_nlink); } } @@ -1853,11 +1870,8 @@ image_find_streams_to_write(WIMStruct *wim) /* Go through this image's inodes to find any streams that have not been * found yet. */ - image_for_each_inode(inode, imd) { - inode_find_streams_to_write(inode, wim->lookup_table, - &ctx->stream_list, - &ctx->stream_size_tab); - } + image_for_each_inode(inode, imd) + inode_find_streams_to_write(inode, wim->lookup_table, ctx); return 0; } @@ -1891,7 +1905,32 @@ prepare_stream_list(WIMStruct *wim, int image, struct list_head *stream_list) &ctx.stream_size_tab); INIT_LIST_HEAD(&ctx.stream_list); wim->private = &ctx; - ret = for_image(wim, image, image_find_streams_to_write); + +#if 1 + /* Optimization enabled by default: if we're writing all the images, + * it's not strictly necessary to decompress, parse, and go through the + * dentry tree in each image's metadata resource. Instead, include all + * the hashed streams referenced from the lookup table as well as all + * unhashed streams referenced in the per-image list. For 'out_refcnt' + * for each stream, just copy the value from 'refcnt', which is the + * reference count of that stream in the entire WIM. */ + if (image == WIMLIB_ALL_IMAGES) { + struct wim_lookup_table_entry *lte; + struct wim_image_metadata *imd; + unsigned i; + + for_lookup_table_entry(wim->lookup_table, + do_lte_reference_for_write, &ctx); + for (i = 0; i < wim->hdr.image_count; i++) { + imd = wim->image_metadata[i]; + image_for_each_unhashed_stream(lte, imd) + do_lte_reference_for_write(lte, &ctx); + } + ret = 0; + } else +#endif + ret = for_image(wim, image, image_find_streams_to_write); + destroy_stream_size_table(&ctx.stream_size_tab); if (ret) return ret; @@ -1971,6 +2010,9 @@ write_wim_metadata_resources(WIMStruct *wim, int image, int write_flags, struct wim_image_metadata *imd; imd = wim->image_metadata[i - 1]; + /* Build a new metadata resource only if image was modified from + * the original (or was newly added). Otherwise just copy the + * existing one. */ if (imd->modified) { ret = write_metadata_resource(wim, i, write_resource_flags); @@ -2374,8 +2416,9 @@ write_wim_part(WIMStruct *wim, (image < 1 || image > wim->hdr.image_count)) return WIMLIB_ERR_INVALID_IMAGE; - /* @wim must specify a standalone WIM. */ - if (wim->hdr.total_parts != 1) + /* @wim must specify a standalone WIM, or at least the first part of a + * split WIM. */ + if (wim->hdr.part_number != 1) return WIMLIB_ERR_SPLIT_UNSUPPORTED; /* Check for contradictory flags. */