join.c, write.c: Optimize WIMLIB_ALL_IMAGES writes (and joins)
authorEric Biggers <ebiggers3@gmail.com>
Mon, 19 Aug 2013 05:14:29 +0000 (00:14 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Mon, 19 Aug 2013 05:14:29 +0000 (00:14 -0500)
include/wimlib.h
src/join.c
src/write.c

index 3d8b317..40d062a 100644 (file)
@@ -2267,8 +2267,9 @@ wimlib_iterate_lookup_table(WIMStruct *wim, int flags,
  *     parts have the same GUID and compression type.
  *
  * Note: wimlib_export_image() can provide similar functionality to
- * wimlib_join(), since it is possible to export all images from a split WIM.
- * Actually, wimlib_join() currently calls wimlib_export_image internally.
+ * wimlib_join(), since it is possible to export all images from a split WIM
+ * into a new ::WIMStruct, then write it.  However, wimlib_join() may have
+ * better performance than this method.
  */
 extern int
 wimlib_join(const wimlib_tchar * const *swms,
@@ -3170,8 +3171,6 @@ wimlib_update_image(WIMStruct *wim,
  *     with @p wim, or some file resources in @p wim refer to files in the
  *     outside filesystem, and a read error occurred when reading one of these
  *     files.
- * @retval ::WIMLIB_ERR_SPLIT_UNSUPPORTED
- *     @p wim is part of a split WIM, not a standalone WIM.
  * @retval ::WIMLIB_ERR_WRITE
  *     An error occurred when trying to write data to the new WIM file.
  */
index d606b7b..2157697 100644 (file)
@@ -29,6 +29,7 @@
 
 #include "wimlib.h"
 #include "wimlib/types.h"
+#include "wimlib/swm.h"
 #include "wimlib/util.h"
 #include "wimlib/wim.h"
 
@@ -45,9 +46,8 @@ wimlib_join(const tchar * const *swm_names,
        unsigned i;
        unsigned j;
        WIMStruct *swm0;
-       WIMStruct **swms;
+       WIMStruct **additional_swms;
        unsigned num_additional_swms;
-       WIMStruct *wim;
 
        swm_open_flags |= WIMLIB_OPEN_FLAG_SPLIT_OK;
 
@@ -55,8 +55,8 @@ wimlib_join(const tchar * const *swm_names,
                return WIMLIB_ERR_INVALID_PARAM;
        num_additional_swms = num_swms - 1;
 
-       swms = CALLOC(num_additional_swms, sizeof(swms[0]));
-       if (!swms)
+       additional_swms = CALLOC(num_additional_swms, sizeof(additional_swms[0]));
+       if (!additional_swms)
                return WIMLIB_ERR_NOMEM;
 
        swm0 = NULL;
@@ -70,7 +70,7 @@ wimlib_join(const tchar * const *swm_names,
                if (swm->hdr.part_number == 1 && swm0 == NULL)
                        swm0 = swm;
                else
-                       swms[j++] = swm;
+                       additional_swms[j++] = swm;
        }
 
        if (!swm0) {
@@ -78,39 +78,18 @@ wimlib_join(const tchar * const *swm_names,
                goto out_free_swms;
        }
 
-       ret = wimlib_create_new_wim(swm0->compression_type, &wim);
+       ret = verify_swm_set(swm0, additional_swms, num_additional_swms);
        if (ret)
                goto out_free_swms;
 
-       ret = wimlib_export_image(swm0, WIMLIB_ALL_IMAGES, wim, NULL, NULL, 0,
-                                 swms, num_additional_swms, progress_func);
-       if (ret)
-               goto out_free_wim;
-
-       wim->hdr.flags |= swm0->hdr.flags & (WIM_HDR_FLAG_RP_FIX |
-                                            WIM_HDR_FLAG_READONLY);
-       if (!(wim_write_flags & (WIMLIB_WRITE_FLAG_CHECK_INTEGRITY |
-                                WIMLIB_WRITE_FLAG_NO_CHECK_INTEGRITY)))
-       {
-               if (wim_has_integrity_table(swm0))
-                       wim_write_flags |= WIMLIB_WRITE_FLAG_CHECK_INTEGRITY;
-       }
-       if (!(wim_write_flags & (WIMLIB_WRITE_FLAG_PIPABLE |
-                                WIMLIB_WRITE_FLAG_NOT_PIPABLE)))
-       {
-               if (wim_is_pipable(swm0))
-                       wim_write_flags |= WIMLIB_WRITE_FLAG_PIPABLE;
-       }
-
+       merge_lookup_tables(swm0, additional_swms, num_additional_swms);
 
-       ret = wimlib_write(wim, output_path, WIMLIB_ALL_IMAGES,
+       ret = wimlib_write(swm0, output_path, WIMLIB_ALL_IMAGES,
                           wim_write_flags, 1, progress_func);
-out_free_wim:
-       wimlib_free(wim);
 out_free_swms:
        for (i = 0; i < num_additional_swms; i++)
-               wimlib_free(swms[i]);
-       FREE(swms);
+               wimlib_free(additional_swms[i]);
+       FREE(additional_swms);
        wimlib_free(swm0);
        return ret;
 }
index e4a0f3a..939fe45 100644 (file)
@@ -1818,22 +1818,39 @@ struct find_streams_ctx {
 };
 
 static void
+lte_reference_for_write(struct wim_lookup_table_entry *lte,
+                       struct find_streams_ctx *ctx,
+                       unsigned nref)
+{
+       if (lte->out_refcnt == 0) {
+               if (lte->unhashed)
+                       stream_size_table_insert(lte, &ctx->stream_size_tab);
+               list_add_tail(&lte->write_streams_list, &ctx->stream_list);
+       }
+       lte->out_refcnt += nref;
+}
+
+static int
+do_lte_reference_for_write(struct wim_lookup_table_entry *lte, void *_ctx)
+{
+       struct find_streams_ctx *ctx = _ctx;
+       lte->out_refcnt = 0;
+       lte_reference_for_write(lte, ctx, lte->refcnt);
+       return 0;
+}
+
+static void
 inode_find_streams_to_write(struct wim_inode *inode,
                            struct wim_lookup_table *table,
-                           struct list_head *stream_list,
-                           struct stream_size_table *tab)
+                           struct find_streams_ctx *ctx)
 {
        struct wim_lookup_table_entry *lte;
-       for (unsigned i = 0; i <= inode->i_num_ads; i++) {
+       unsigned i;
+
+       for (i = 0; i <= inode->i_num_ads; i++) {
                lte = inode_stream_lte(inode, i, table);
-               if (lte) {
-                       if (lte->out_refcnt == 0) {
-                               if (lte->unhashed)
-                                       stream_size_table_insert(lte, tab);
-                               list_add_tail(&lte->write_streams_list, stream_list);
-                       }
-                       lte->out_refcnt += inode->i_nlink;
-               }
+               if (lte)
+                       lte_reference_for_write(lte, ctx, inode->i_nlink);
        }
 }
 
@@ -1853,11 +1870,8 @@ image_find_streams_to_write(WIMStruct *wim)
 
        /* Go through this image's inodes to find any streams that have not been
         * found yet. */
-       image_for_each_inode(inode, imd) {
-               inode_find_streams_to_write(inode, wim->lookup_table,
-                                           &ctx->stream_list,
-                                           &ctx->stream_size_tab);
-       }
+       image_for_each_inode(inode, imd)
+               inode_find_streams_to_write(inode, wim->lookup_table, ctx);
        return 0;
 }
 
@@ -1891,7 +1905,32 @@ prepare_stream_list(WIMStruct *wim, int image, struct list_head *stream_list)
                               &ctx.stream_size_tab);
        INIT_LIST_HEAD(&ctx.stream_list);
        wim->private = &ctx;
-       ret = for_image(wim, image, image_find_streams_to_write);
+
+#if 1
+       /* Optimization enabled by default:  if we're writing all the images,
+        * it's not strictly necessary to decompress, parse, and go through the
+        * dentry tree in each image's metadata resource.  Instead, include all
+        * the hashed streams referenced from the lookup table as well as all
+        * unhashed streams referenced in the per-image list.  For 'out_refcnt'
+        * for each stream, just copy the value from 'refcnt', which is the
+        * reference count of that stream in the entire WIM.  */
+       if (image == WIMLIB_ALL_IMAGES) {
+               struct wim_lookup_table_entry *lte;
+               struct wim_image_metadata *imd;
+               unsigned i;
+
+               for_lookup_table_entry(wim->lookup_table,
+                                      do_lte_reference_for_write, &ctx);
+               for (i = 0; i < wim->hdr.image_count; i++) {
+                       imd = wim->image_metadata[i];
+                       image_for_each_unhashed_stream(lte, imd)
+                               do_lte_reference_for_write(lte, &ctx);
+               }
+               ret = 0;
+       } else
+#endif
+               ret = for_image(wim, image, image_find_streams_to_write);
+
        destroy_stream_size_table(&ctx.stream_size_tab);
        if (ret)
                return ret;
@@ -1971,6 +2010,9 @@ write_wim_metadata_resources(WIMStruct *wim, int image, int write_flags,
                struct wim_image_metadata *imd;
 
                imd = wim->image_metadata[i - 1];
+               /* Build a new metadata resource only if image was modified from
+                * the original (or was newly added).  Otherwise just copy the
+                * existing one.  */
                if (imd->modified) {
                        ret = write_metadata_resource(wim, i,
                                                      write_resource_flags);
@@ -2374,8 +2416,9 @@ write_wim_part(WIMStruct *wim,
             (image < 1 || image > wim->hdr.image_count))
                return WIMLIB_ERR_INVALID_IMAGE;
 
-       /* @wim must specify a standalone WIM.  */
-       if (wim->hdr.total_parts != 1)
+       /* @wim must specify a standalone WIM, or at least the first part of a
+        * split WIM.  */
+       if (wim->hdr.part_number != 1)
                return WIMLIB_ERR_SPLIT_UNSUPPORTED;
 
        /* Check for contradictory flags.  */