]> wimlib.net Git - wimlib/blobdiff - src/write.c
Add experimental support for Windows VSS
[wimlib] / src / write.c
index b595c875e08e08e80313aa3bd2e363e8577df2d3..9f48d3f1ded5973d1a215c5cd4b23f9f391d5d12 100644 (file)
@@ -28,7 +28,7 @@
 
 #if defined(HAVE_SYS_FILE_H) && defined(HAVE_FLOCK)
 /* On BSD, this should be included before "wimlib/list.h" so that "wimlib/list.h" can
- * overwrite the LIST_HEAD macro. */
+ * override the LIST_HEAD macro. */
 #  include <sys/file.h>
 #endif
 
@@ -118,7 +118,7 @@ blob_filtered(const struct blob_descriptor *blob,
        write_flags = ctx->write_flags;
        wim = ctx->wim;
 
-       if (write_flags & WIMLIB_WRITE_FLAG_OVERWRITE &&
+       if (write_flags & WIMLIB_WRITE_FLAG_APPEND &&
            blob->blob_location == BLOB_IN_WIM &&
            blob->rdesc->wim == wim)
                return 1;
@@ -138,60 +138,77 @@ blob_hard_filtered(const struct blob_descriptor *blob,
        return blob_filtered(blob, ctx) < 0;
 }
 
-static inline int
+static inline bool
 may_soft_filter_blobs(const struct filter_context *ctx)
 {
-       if (ctx == NULL)
-               return 0;
-       return ctx->write_flags & WIMLIB_WRITE_FLAG_OVERWRITE;
+       return ctx && (ctx->write_flags & WIMLIB_WRITE_FLAG_APPEND);
 }
 
-static inline int
+static inline bool
 may_hard_filter_blobs(const struct filter_context *ctx)
 {
-       if (ctx == NULL)
-               return 0;
-       return ctx->write_flags & WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS;
+       return ctx && (ctx->write_flags & WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS);
 }
 
-static inline int
+static inline bool
 may_filter_blobs(const struct filter_context *ctx)
 {
        return (may_soft_filter_blobs(ctx) || may_hard_filter_blobs(ctx));
 }
 
-/* Return true if the specified resource is compressed and the compressed data
- * can be reused with the specified output parameters.  */
+/* Return true if the specified blob is located in a WIM resource which can be
+ * reused in the output WIM file, without being recompressed.  */
 static bool
-can_raw_copy(const struct blob_descriptor *blob,
-            int write_resource_flags, int out_ctype, u32 out_chunk_size)
+can_raw_copy(const struct blob_descriptor *blob, int write_resource_flags,
+            int out_ctype, u32 out_chunk_size)
 {
        const struct wim_resource_descriptor *rdesc;
 
+       /* Recompress everything if requested.  */
        if (write_resource_flags & WRITE_RESOURCE_FLAG_RECOMPRESS)
                return false;
 
-       if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE)
-               return false;
-
+       /* A blob not located in a WIM resource cannot be reused.  */
        if (blob->blob_location != BLOB_IN_WIM)
                return false;
 
        rdesc = blob->rdesc;
 
-       if (rdesc->is_pipable != !!(write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE))
+       /* In the case of an in-place compaction, always reuse resources located
+        * in the WIM being compacted.  */
+       if (rdesc->wim->being_compacted)
+               return true;
+
+       /* Otherwise, only reuse compressed resources.  */
+       if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE ||
+           !(rdesc->flags & (WIM_RESHDR_FLAG_COMPRESSED |
+                             WIM_RESHDR_FLAG_SOLID)))
+               return false;
+
+       /* When writing a pipable WIM, we can only reuse pipable resources; and
+        * when writing a non-pipable WIM, we can only reuse non-pipable
+        * resources.  */
+       if (rdesc->is_pipable !=
+           !!(write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE))
                return false;
 
+       /* When writing a solid WIM, we can only reuse solid resources; and when
+        * writing a non-solid WIM, we can only reuse non-solid resources.  */
+       if (!!(rdesc->flags & WIM_RESHDR_FLAG_SOLID) !=
+           !!(write_resource_flags & WRITE_RESOURCE_FLAG_SOLID))
+               return false;
+
+       /* Note: it is theoretically possible to copy chunks of compressed data
+        * between non-solid, solid, and pipable resources.  However, we don't
+        * currently implement this optimization because it would be complex and
+        * would usually go unused.  */
+
        if (rdesc->flags & WIM_RESHDR_FLAG_COMPRESSED) {
-               /* Normal compressed resource: Must use same compression type
-                * and chunk size.  */
+               /* To re-use a non-solid resource, it must use the desired
+                * compression type and chunk size.  */
                return (rdesc->compression_type == out_ctype &&
                        rdesc->chunk_size == out_chunk_size);
-       }
-
-       if ((rdesc->flags & WIM_RESHDR_FLAG_SOLID) &&
-           (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID))
-       {
+       } else {
                /* Solid resource: Such resources may contain multiple blobs,
                 * and in general only a subset of them need to be written.  As
                 * a heuristic, re-use the raw data if more than two-thirds the
@@ -202,6 +219,10 @@ can_raw_copy(const struct blob_descriptor *blob,
                 * check if they are compatible with @out_ctype and
                 * @out_chunk_size.  */
 
+               /* Did we already decide to reuse the resource?  */
+               if (rdesc->raw_copy_ok)
+                       return true;
+
                struct blob_descriptor *res_blob;
                u64 write_size = 0;
 
@@ -211,8 +232,6 @@ can_raw_copy(const struct blob_descriptor *blob,
 
                return (write_size > rdesc->uncompressed_size * 2 / 3);
        }
-
-       return false;
 }
 
 static u32
@@ -338,10 +357,6 @@ struct write_blobs_ctx {
 
        struct filter_context *filter_ctx;
 
-       /* Upper bound on the total number of bytes that need to be compressed.
-        * */
-       u64 num_bytes_to_compress;
-
        /* Pointer to the chunk_compressor implementation being used for
         * compressing chunks of data, or NULL if chunks are being written
         * uncompressed.  */
@@ -624,6 +639,7 @@ do_done_with_blob(struct blob_descriptor *blob,
 {
        int ret;
        struct wim_inode *inode;
+       const tchar *path;
        tchar *cookie1;
        tchar *cookie2;
 
@@ -637,10 +653,12 @@ do_done_with_blob(struct blob_descriptor *blob,
        if (--inode->i_num_remaining_streams > 0)
                return 0;
 
-       cookie1 = progress_get_streamless_path(blob->file_on_disk);
-       cookie2 = progress_get_win32_path(blob->file_on_disk);
+       path = blob_file_path(blob);
+
+       cookie1 = progress_get_streamless_path(path);
+       cookie2 = progress_get_win32_path(path);
 
-       ret = done_with_file(blob->file_on_disk, progfunc, progctx);
+       ret = done_with_file(path, progfunc, progctx);
 
        progress_put_win32_path(cookie2);
        progress_put_streamless_path(cookie1);
@@ -1094,11 +1112,16 @@ write_blob_end_read(struct blob_descriptor *blob, int status, void *_ctx)
        return status;
 }
 
-/* Compute statistics about a list of blobs that will be written.
+/*
+ * Compute statistics about a list of blobs that will be written.
  *
  * Assumes the blobs are sorted such that all blobs located in each distinct WIM
- * (specified by WIMStruct) are together.  */
-static void
+ * (specified by WIMStruct) are together.
+ *
+ * For compactions, also verify that there are no overlapping resources.  This
+ * really should be checked earlier, but for now it's easiest to check here.
+ */
+static int
 compute_blob_list_stats(struct list_head *blob_list,
                        struct write_blobs_ctx *ctx)
 {
@@ -1107,15 +1130,32 @@ compute_blob_list_stats(struct list_head *blob_list,
        u64 num_blobs = 0;
        u64 total_parts = 0;
        WIMStruct *prev_wim_part = NULL;
+       const struct wim_resource_descriptor *prev_rdesc = NULL;
 
        list_for_each_entry(blob, blob_list, write_blobs_list) {
                num_blobs++;
                total_bytes += blob->size;
                if (blob->blob_location == BLOB_IN_WIM) {
-                       if (prev_wim_part != blob->rdesc->wim) {
-                               prev_wim_part = blob->rdesc->wim;
+                       const struct wim_resource_descriptor *rdesc = blob->rdesc;
+                       WIMStruct *wim = rdesc->wim;
+
+                       if (prev_wim_part != wim) {
+                               prev_wim_part = wim;
                                total_parts++;
                        }
+                       if (unlikely(wim->being_compacted) && rdesc != prev_rdesc) {
+                               if (prev_rdesc != NULL &&
+                                   rdesc->offset_in_wim <
+                                               prev_rdesc->offset_in_wim +
+                                               prev_rdesc->size_in_wim)
+                               {
+                                       WARNING("WIM file contains overlapping "
+                                               "resources!  Compaction is not "
+                                               "possible.");
+                                       return WIMLIB_ERR_RESOURCE_ORDER;
+                               }
+                               prev_rdesc = rdesc;
+                       }
                }
        }
        ctx->progress_data.progress.write_streams.total_bytes       = total_bytes;
@@ -1126,6 +1166,7 @@ compute_blob_list_stats(struct list_head *blob_list,
        ctx->progress_data.progress.write_streams.total_parts       = total_parts;
        ctx->progress_data.progress.write_streams.completed_parts   = 0;
        ctx->progress_data.next_progress = 0;
+       return 0;
 }
 
 /* Find blobs in @blob_list that can be copied to the output WIM in raw form
@@ -1133,14 +1174,12 @@ compute_blob_list_stats(struct list_head *blob_list,
  * @raw_copy_blobs.  Return the total uncompressed size of the blobs that need
  * to be compressed.  */
 static u64
-find_raw_copy_blobs(struct list_head *blob_list,
-                   int write_resource_flags,
-                   int out_ctype,
-                   u32 out_chunk_size,
+find_raw_copy_blobs(struct list_head *blob_list, int write_resource_flags,
+                   int out_ctype, u32 out_chunk_size,
                    struct list_head *raw_copy_blobs)
 {
        struct blob_descriptor *blob, *tmp;
-       u64 num_bytes_to_compress = 0;
+       u64 num_nonraw_bytes = 0;
 
        INIT_LIST_HEAD(raw_copy_blobs);
 
@@ -1150,23 +1189,17 @@ find_raw_copy_blobs(struct list_head *blob_list,
                        blob->rdesc->raw_copy_ok = 0;
 
        list_for_each_entry_safe(blob, tmp, blob_list, write_blobs_list) {
-               if (blob->blob_location == BLOB_IN_WIM &&
-                   blob->rdesc->raw_copy_ok)
-               {
-                       list_move_tail(&blob->write_blobs_list,
-                                      raw_copy_blobs);
-               } else if (can_raw_copy(blob, write_resource_flags,
-                                       out_ctype, out_chunk_size))
+               if (can_raw_copy(blob, write_resource_flags,
+                                out_ctype, out_chunk_size))
                {
                        blob->rdesc->raw_copy_ok = 1;
-                       list_move_tail(&blob->write_blobs_list,
-                                      raw_copy_blobs);
+                       list_move_tail(&blob->write_blobs_list, raw_copy_blobs);
                } else {
-                       num_bytes_to_compress += blob->size;
+                       num_nonraw_bytes += blob->size;
                }
        }
 
-       return num_bytes_to_compress;
+       return num_nonraw_bytes;
 }
 
 /* Copy a raw compressed resource located in another WIM file to the WIM file
@@ -1198,21 +1231,37 @@ write_raw_copy_resource(struct wim_resource_descriptor *in_rdesc,
        }
        in_fd = &in_rdesc->wim->in_fd;
        wimlib_assert(cur_read_offset != end_read_offset);
-       do {
 
-               bytes_to_read = min(sizeof(buf), end_read_offset - cur_read_offset);
+       if (likely(!in_rdesc->wim->being_compacted) ||
+           in_rdesc->offset_in_wim > out_fd->offset) {
+               do {
+                       bytes_to_read = min(sizeof(buf),
+                                           end_read_offset - cur_read_offset);
 
-               ret = full_pread(in_fd, buf, bytes_to_read, cur_read_offset);
-               if (ret)
-                       return ret;
+                       ret = full_pread(in_fd, buf, bytes_to_read,
+                                        cur_read_offset);
+                       if (ret)
+                               return ret;
 
-               ret = full_write(out_fd, buf, bytes_to_read);
-               if (ret)
-                       return ret;
+                       ret = full_write(out_fd, buf, bytes_to_read);
+                       if (ret)
+                               return ret;
+
+                       cur_read_offset += bytes_to_read;
+
+               } while (cur_read_offset != end_read_offset);
+       } else {
+               /* Optimization: the WIM file is being compacted and the
+                * resource being written is already in the desired location.
+                * Skip over the data instead of re-writing it.  */
 
-               cur_read_offset += bytes_to_read;
+               /* Due the earlier check for overlapping resources, it should
+                * never be the case that we already overwrote the resource.  */
+               wimlib_assert(!(in_rdesc->offset_in_wim < out_fd->offset));
 
-       } while (cur_read_offset != end_read_offset);
+               if (-1 == filedes_seek(out_fd, out_fd->offset + in_rdesc->size_in_wim))
+                       return WIMLIB_ERR_WRITE;
+       }
 
        list_for_each_entry(blob, &in_rdesc->blob_list, rdesc_node) {
                if (blob->will_be_in_output_wim) {
@@ -1294,17 +1343,6 @@ validate_blob_list(struct list_head *blob_list)
        }
 }
 
-static inline bool
-blob_is_in_file(const struct blob_descriptor *blob)
-{
-       return blob->blob_location == BLOB_IN_FILE_ON_DISK
-#ifdef __WIN32__
-           || blob->blob_location == BLOB_IN_WINNT_FILE_ON_DISK
-           || blob->blob_location == BLOB_WIN32_ENCRYPTED
-#endif
-          ;
-}
-
 static void
 init_done_with_file_info(struct list_head *blob_list)
 {
@@ -1418,9 +1456,9 @@ init_done_with_file_info(struct list_head *blob_list)
  * identical to another blob already being written or one that would be filtered
  * out of the output WIM using blob_filtered() with the context @filter_ctx.
  * Each such duplicate blob will be removed from @blob_list, its reference count
- * transfered to the pre-existing duplicate blob, its memory freed, and will not
- * be written.  Alternatively, if a blob in @blob_list is a duplicate with any
- * blob in @blob_table that has not been marked for writing or would not be
+ * transferred to the pre-existing duplicate blob, its memory freed, and will
+ * not be written.  Alternatively, if a blob in @blob_list is a duplicate with
+ * any blob in @blob_table that has not been marked for writing or would not be
  * hard-filtered, it is freed and the pre-existing duplicate is written instead,
  * taking ownership of the reference count and slot in the @blob_table_list.
  *
@@ -1442,6 +1480,7 @@ write_blob_list(struct list_head *blob_list,
        int ret;
        struct write_blobs_ctx ctx;
        struct list_head raw_copy_blobs;
+       u64 num_nonraw_bytes;
 
        wimlib_assert((write_resource_flags &
                       (WRITE_RESOURCE_FLAG_SOLID |
@@ -1487,7 +1526,9 @@ write_blob_list(struct list_head *blob_list,
        if (ret)
                return ret;
 
-       compute_blob_list_stats(blob_list, &ctx);
+       ret = compute_blob_list_stats(blob_list, &ctx);
+       if (ret)
+               return ret;
 
        if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID_SORT) {
                ret = sort_blob_list_for_solid_compression(blob_list);
@@ -1498,24 +1539,18 @@ write_blob_list(struct list_head *blob_list,
        ctx.progress_data.progfunc = progfunc;
        ctx.progress_data.progctx = progctx;
 
-       ctx.num_bytes_to_compress = find_raw_copy_blobs(blob_list,
-                                                       write_resource_flags,
-                                                       out_ctype,
-                                                       out_chunk_size,
-                                                       &raw_copy_blobs);
-
-       if (ctx.num_bytes_to_compress == 0)
-               goto out_write_raw_copy_resources;
+       num_nonraw_bytes = find_raw_copy_blobs(blob_list, write_resource_flags,
+                                              out_ctype, out_chunk_size,
+                                              &raw_copy_blobs);
 
-       /* Unless uncompressed output was required, allocate a chunk_compressor
-        * to do compression.  There are serial and parallel implementations of
-        * the chunk_compressor interface.  We default to parallel using the
+       /* Unless no data needs to be compressed, allocate a chunk_compressor to
+        * do compression.  There are serial and parallel implementations of the
+        * chunk_compressor interface.  We default to parallel using the
         * specified number of threads, unless the upper bound on the number
         * bytes needing to be compressed is less than a heuristic value.  */
-       if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
-
+       if (num_nonraw_bytes != 0 && out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
        #ifdef ENABLE_MULTITHREADED_COMPRESSION
-               if (ctx.num_bytes_to_compress > max(2000000, out_chunk_size)) {
+               if (num_nonraw_bytes > max(2000000, out_chunk_size)) {
                        ret = new_parallel_chunk_compressor(out_ctype,
                                                            out_chunk_size,
                                                            num_threads, 0,
@@ -1541,9 +1576,6 @@ write_blob_list(struct list_head *blob_list,
        else
                ctx.progress_data.progress.write_streams.num_threads = 1;
 
-       INIT_LIST_HEAD(&ctx.blobs_being_compressed);
-       INIT_LIST_HEAD(&ctx.blobs_in_solid_resource);
-
        ret = call_progress(ctx.progress_data.progfunc,
                            WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
                            &ctx.progress_data.progress,
@@ -1551,8 +1583,21 @@ write_blob_list(struct list_head *blob_list,
        if (ret)
                goto out_destroy_context;
 
+       /* Copy any compressed resources for which the raw data can be reused
+        * without decompression.  */
+       ret = write_raw_copy_resources(&raw_copy_blobs, ctx.out_fd,
+                                      &ctx.progress_data);
+
+       if (ret || num_nonraw_bytes == 0)
+               goto out_destroy_context;
+
+       INIT_LIST_HEAD(&ctx.blobs_being_compressed);
+
        if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
-               ret = begin_write_resource(&ctx, ctx.num_bytes_to_compress);
+
+               INIT_LIST_HEAD(&ctx.blobs_in_solid_resource);
+
+               ret = begin_write_resource(&ctx, num_nonraw_bytes);
                if (ret)
                        goto out_destroy_context;
        }
@@ -1605,12 +1650,6 @@ write_blob_list(struct list_head *blob_list,
                wimlib_assert(offset_in_res == reshdr.uncompressed_size);
        }
 
-out_write_raw_copy_resources:
-       /* Copy any compressed resources for which the raw data can be reused
-        * without decompression.  */
-       ret = write_raw_copy_resources(&raw_copy_blobs, ctx.out_fd,
-                                      &ctx.progress_data);
-
 out_destroy_context:
        FREE(ctx.chunk_csizes);
        if (ctx.compressor)
@@ -1971,9 +2010,9 @@ filter_blob_list_for_write(struct list_head *blob_list,
  *     STREAMS_OK:  For writes of all images, assume that all blobs in the blob
  *     table of @wim and the per-image lists of unhashed blobs should be taken
  *     as-is, and image metadata should not be searched for references.  This
- *     does not exclude filtering with OVERWRITE and SKIP_EXTERNAL_WIMS, below.
+ *     does not exclude filtering with APPEND and SKIP_EXTERNAL_WIMS, below.
  *
- *     OVERWRITE:  Blobs already present in @wim shall not be returned in
+ *     APPEND:  Blobs already present in @wim shall not be returned in
  *     @blob_list_ret.
  *
  *     SKIP_EXTERNAL_WIMS:  Blobs already present in a WIM file, but not @wim,
@@ -1993,9 +2032,9 @@ filter_blob_list_for_write(struct list_head *blob_list,
  *     the blobs in @blob_list_ret.
  *
  *     This list will be a proper superset of @blob_list_ret if and only if
- *     WIMLIB_WRITE_FLAG_OVERWRITE was specified in @write_flags and some of
- *     the blobs that would otherwise need to be written were already located
- *     in the WIM file.
+ *     WIMLIB_WRITE_FLAG_APPEND was specified in @write_flags and some of the
+ *     blobs that would otherwise need to be written were already located in
+ *     the WIM file.
  *
  *     All blobs in this list will have @out_refcnt set to the number of
  *     references to the blob in the output WIM.  If
@@ -2133,16 +2172,28 @@ write_metadata_resources(WIMStruct *wim, int image, int write_flags)
                struct wim_image_metadata *imd;
 
                imd = wim->image_metadata[i - 1];
-               /* Build a new metadata resource only if image was modified from
-                * the original (or was newly added).  Otherwise just copy the
-                * existing one.  */
-               if (imd->modified) {
+               if (is_image_dirty(imd)) {
+                       /* The image was modified from the original, or was
+                        * newly added, so we have to build and write a new
+                        * metadata resource.  */
                        ret = write_metadata_resource(wim, i,
                                                      write_resource_flags);
-               } else if (write_flags & WIMLIB_WRITE_FLAG_OVERWRITE) {
-                       blob_set_out_reshdr_for_reuse(imd->metadata_blob);
+               } else if (is_image_unchanged_from_wim(imd, wim) &&
+                          (write_flags & (WIMLIB_WRITE_FLAG_UNSAFE_COMPACT |
+                                          WIMLIB_WRITE_FLAG_APPEND)))
+               {
+                       /* The metadata resource is already in the WIM file.
+                        * For appends, we don't need to write it at all.  For
+                        * compactions, we re-write existing metadata resources
+                        * along with the existing file resources, not here.  */
+                       if (write_flags & WIMLIB_WRITE_FLAG_APPEND)
+                               blob_set_out_reshdr_for_reuse(imd->metadata_blob);
                        ret = 0;
                } else {
+                       /* The metadata resource is in a WIM file other than the
+                        * one being written to.  We need to rewrite it,
+                        * possibly compressed differently; but rebuilding the
+                        * metadata itself isn't necessary.  */
                        ret = write_wim_resource(imd->metadata_blob,
                                                 &wim->out_fd,
                                                 wim->out_compression_type,
@@ -2214,7 +2265,7 @@ write_blob_table(WIMStruct *wim, int image, int write_flags,
        int ret;
 
        /* Set output resource metadata for blobs already present in WIM.  */
-       if (write_flags & WIMLIB_WRITE_FLAG_OVERWRITE) {
+       if (write_flags & WIMLIB_WRITE_FLAG_APPEND) {
                struct blob_descriptor *blob;
                list_for_each_entry(blob, blob_table_list, blob_table_list) {
                        if (blob->blob_location == BLOB_IN_WIM &&
@@ -2297,14 +2348,13 @@ finish_write(WIMStruct *wim, int image, int write_flags,
                                wim->out_hdr.boot_idx - 1]->metadata_blob->out_reshdr);
        }
 
-       /* If overwriting the WIM file containing an integrity table in-place,
-        * we'd like to re-use the information in the old integrity table
-        * instead of recalculating it.  But we might overwrite the old
-        * integrity table when we expand the XML data.  Read it into memory
-        * just in case.  */
-       if ((write_flags & (WIMLIB_WRITE_FLAG_OVERWRITE |
+       /* If appending to a WIM file containing an integrity table, we'd like
+        * to re-use the information in the old integrity table instead of
+        * recalculating it.  But we might overwrite the old integrity table
+        * when we expand the XML data.  Read it into memory just in case.  */
+       if ((write_flags & (WIMLIB_WRITE_FLAG_APPEND |
                            WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)) ==
-               (WIMLIB_WRITE_FLAG_OVERWRITE |
+               (WIMLIB_WRITE_FLAG_APPEND |
                 WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
            && wim_has_integrity_table(wim))
        {
@@ -2322,10 +2372,8 @@ finish_write(WIMStruct *wim, int image, int write_flags,
        if (!(write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS)) {
                ret = write_blob_table(wim, image, write_flags,
                                       blob_table_list);
-               if (ret) {
-                       free_integrity_table(old_integrity_table);
-                       return ret;
-               }
+               if (ret)
+                       goto out;
        }
 
        /* Write XML data.  */
@@ -2335,13 +2383,13 @@ finish_write(WIMStruct *wim, int image, int write_flags,
        ret = write_wim_xml_data(wim, image, xml_totalbytes,
                                 &wim->out_hdr.xml_data_reshdr,
                                 write_resource_flags);
-       if (ret) {
-               free_integrity_table(old_integrity_table);
-               return ret;
-       }
+       if (ret)
+               goto out;
 
        /* Write integrity table if needed.  */
-       if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
+       if ((write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) &&
+           wim->out_hdr.blob_table_reshdr.offset_in_wim != 0)
+       {
                if (write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS) {
                        /* The XML data we wrote may have overwritten part of
                         * the old integrity table, so while calculating the new
@@ -2352,10 +2400,8 @@ finish_write(WIMStruct *wim, int image, int write_flags,
                        zero_reshdr(&checkpoint_hdr.integrity_table_reshdr);
                        checkpoint_hdr.flags |= WIM_HDR_FLAG_WRITE_IN_PROGRESS;
                        ret = write_wim_header(&checkpoint_hdr, &wim->out_fd, 0);
-                       if (ret) {
-                               free_integrity_table(old_integrity_table);
-                               return ret;
-                       }
+                       if (ret)
+                               goto out;
                }
 
                new_blob_table_end = wim->out_hdr.blob_table_reshdr.offset_in_wim +
@@ -2365,9 +2411,8 @@ finish_write(WIMStruct *wim, int image, int write_flags,
                                            new_blob_table_end,
                                            old_blob_table_end,
                                            old_integrity_table);
-               free_integrity_table(old_integrity_table);
                if (ret)
-                       return ret;
+                       goto out;
        } else {
                /* No integrity table.  */
                zero_reshdr(&wim->out_hdr.integrity_table_reshdr);
@@ -2383,7 +2428,19 @@ finish_write(WIMStruct *wim, int image, int write_flags,
        else
                ret = write_wim_header(&wim->out_hdr, &wim->out_fd, 0);
        if (ret)
-               return ret;
+               goto out;
+
+       ret = WIMLIB_ERR_WRITE;
+       if (unlikely(write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT)) {
+               /* Truncate any data the compaction freed up.  */
+               if (ftruncate(wim->out_fd.fd, wim->out_fd.offset) &&
+                   errno != EINVAL) /* allow compaction on untruncatable files,
+                                       e.g. block devices  */
+               {
+                       ERROR_WITH_ERRNO("Failed to truncate the output WIM file");
+                       goto out;
+               }
+       }
 
        /* Possibly sync file data to disk before closing.  On POSIX systems, it
         * is necessary to do this before using rename() to overwrite an
@@ -2391,19 +2448,24 @@ finish_write(WIMStruct *wim, int image, int write_flags,
         * the system is abruptly terminated when the metadata for the rename
         * operation has been written to disk, but the new file data has not.
         */
+       ret = WIMLIB_ERR_WRITE;
        if (write_flags & WIMLIB_WRITE_FLAG_FSYNC) {
                if (fsync(wim->out_fd.fd)) {
                        ERROR_WITH_ERRNO("Error syncing data to WIM file");
-                       return WIMLIB_ERR_WRITE;
+                       goto out;
                }
        }
 
+       ret = WIMLIB_ERR_WRITE;
        if (close_wim_writable(wim, write_flags)) {
                ERROR_WITH_ERRNO("Failed to close the output WIM file");
-               return WIMLIB_ERR_WRITE;
+               goto out;
        }
 
-       return 0;
+       ret = 0;
+out:
+       free_integrity_table(old_integrity_table);
+       return ret;
 }
 
 #if defined(HAVE_SYS_FILE_H) && defined(HAVE_FLOCK)
@@ -2574,6 +2636,25 @@ should_default_to_solid_compression(WIMStruct *wim, int write_flags)
                wim_has_solid_resources(wim);
 }
 
+/* Update the images' filecount/bytecount stats (in the XML info) to take into
+ * account any recent modifications.  */
+static int
+update_image_stats(WIMStruct *wim)
+{
+       if (!wim_has_metadata(wim))
+               return 0;
+       for (int i = 0; i < wim->hdr.image_count; i++) {
+               struct wim_image_metadata *imd = wim->image_metadata[i];
+               if (imd->stats_outdated) {
+                       int ret = xml_update_image_info(wim, i + 1);
+                       if (ret)
+                               return ret;
+                       imd->stats_outdated = false;
+               }
+       }
+       return 0;
+}
+
 /* Write a standalone WIM or split WIM (SWM) part to a new file or to a file
  * descriptor.  */
 int
@@ -2620,6 +2701,10 @@ write_wim_part(WIMStruct *wim,
                                    WIMLIB_WRITE_FLAG_NOT_PIPABLE))
                return WIMLIB_ERR_INVALID_PARAM;
 
+       /* Only wimlib_overwrite() accepts UNSAFE_COMPACT.  */
+       if (write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT)
+               return WIMLIB_ERR_INVALID_PARAM;
+
        /* Include an integrity table by default if no preference was given and
         * the WIM already had an integrity table.  */
        if (!(write_flags & (WIMLIB_WRITE_FLAG_CHECK_INTEGRITY |
@@ -2717,6 +2802,11 @@ write_wim_part(WIMStruct *wim,
                        wim->out_hdr.boot_idx = 1;
        }
 
+       /* Update image stats if needed.  */
+       ret = update_image_stats(wim);
+       if (ret)
+               return ret;
+
        /* Set up the output file descriptor.  */
        if (write_flags & WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR) {
                /* File descriptor was explicitly provided.  */
@@ -2819,11 +2909,16 @@ wimlib_write_to_fd(WIMStruct *wim, int fd,
        return write_standalone_wim(wim, &fd, image, write_flags, num_threads);
 }
 
+/* Have there been any changes to images in the specified WIM, including updates
+ * as well as deletions and additions of entire images, but excluding changes to
+ * the XML document?  */
 static bool
-any_images_modified(WIMStruct *wim)
+any_images_changed(WIMStruct *wim)
 {
+       if (wim->image_deletion_occurred)
+               return true;
        for (int i = 0; i < wim->hdr.image_count; i++)
-               if (wim->image_metadata[i]->modified)
+               if (!is_image_unchanged_from_wim(wim->image_metadata[i], wim))
                        return true;
        return false;
 }
@@ -2842,8 +2937,8 @@ check_resource_offset(struct blob_descriptor *blob, void *_wim)
 }
 
 /* Make sure no file or metadata resources are located after the XML data (or
- * integrity table if present)--- otherwise we can't safely overwrite the WIM in
- * place and we return WIMLIB_ERR_RESOURCE_ORDER.  */
+ * integrity table if present)--- otherwise we can't safely append to the WIM
+ * file and we return WIMLIB_ERR_RESOURCE_ORDER.  */
 static int
 check_resource_offsets(WIMStruct *wim, off_t end_offset)
 {
@@ -2863,6 +2958,20 @@ check_resource_offsets(WIMStruct *wim, off_t end_offset)
        return 0;
 }
 
+static int
+free_blob_if_invalidated(struct blob_descriptor *blob, void *_wim)
+{
+       const WIMStruct *wim = _wim;
+
+       if (!blob->will_be_in_output_wim &&
+           blob->blob_location == BLOB_IN_WIM && blob->rdesc->wim == wim)
+       {
+               blob_table_unlink(wim->blob_table, blob);
+               free_blob_descriptor(blob);
+       }
+       return 0;
+}
+
 /*
  * Overwrite a WIM, possibly appending new resources to it.
  *
@@ -2913,18 +3022,30 @@ check_resource_offsets(WIMStruct *wim, off_t end_offset)
  *                   XML data (variable size)
  *                   Integrity table (optional) (variable size)
  *
- * This method allows an image to be appended to a large WIM very quickly, and
+ * This function allows an image to be appended to a large WIM very quickly, and
  * is crash-safe except in the case of write re-ordering, but the disadvantage
  * is that a small hole is left in the WIM where the old blob table, xml data,
  * and integrity table were.  (These usually only take up a small amount of
  * space compared to the blobs, however.)
+ *
+ * Finally, this function also supports "compaction" overwrites as an
+ * alternative to the normal "append" overwrites described above.  In a
+ * compaction, data is written starting immediately from the end of the header.
+ * All existing resources are written first, in order by file offset.  New
+ * resources are written afterwards, and at the end any extra data is truncated
+ * from the file.  The advantage of this approach is that is that the WIM file
+ * ends up fully optimized, without any holes remaining.  The main disadavantage
+ * is that this operation is fundamentally unsafe and cannot be interrupted
+ * without data corruption.  Consequently, compactions are only ever done when
+ * explicitly requested by the library user with the flag
+ * WIMLIB_WRITE_FLAG_UNSAFE_COMPACT.  (Another disadvantage is that a compaction
+ * can be much slower than an append.)
  */
 static int
 overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads)
 {
        int ret;
        off_t old_wim_end;
-       u64 old_blob_table_end, old_xml_begin, old_xml_end;
        struct list_head blob_list;
        struct list_head blob_table_list;
        struct filter_context filter_ctx;
@@ -2950,66 +3071,113 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads)
        if (should_default_to_solid_compression(wim, write_flags))
                write_flags |= WIMLIB_WRITE_FLAG_SOLID;
 
-       /* Set additional flags for overwrite.  */
-       write_flags |= WIMLIB_WRITE_FLAG_OVERWRITE |
-                      WIMLIB_WRITE_FLAG_STREAMS_OK;
-
-       /* Make sure there is no data after the XML data, except possibily an
-        * integrity table.  If this were the case, then this data would be
-        * overwritten.  */
-       old_xml_begin = wim->hdr.xml_data_reshdr.offset_in_wim;
-       old_xml_end = old_xml_begin + wim->hdr.xml_data_reshdr.size_in_wim;
-       old_blob_table_end = wim->hdr.blob_table_reshdr.offset_in_wim +
-                            wim->hdr.blob_table_reshdr.size_in_wim;
-       if (wim_has_integrity_table(wim) &&
-           wim->hdr.integrity_table_reshdr.offset_in_wim < old_xml_end) {
-               WARNING("Didn't expect the integrity table to be before the XML data");
-               ret = WIMLIB_ERR_RESOURCE_ORDER;
-               goto out;
-       }
+       if (unlikely(write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT)) {
 
-       if (old_blob_table_end > old_xml_begin) {
-               WARNING("Didn't expect the blob table to be after the XML data");
-               ret = WIMLIB_ERR_RESOURCE_ORDER;
-               goto out;
-       }
+               /* In-place compaction  */
+
+               WARNING("The WIM file \"%"TS"\" is being compacted in place.\n"
+                       "          Do *not* interrupt the operation, or else "
+                       "the WIM file will be\n"
+                       "          corrupted!", wim->filename);
+               wim->being_compacted = 1;
+               old_wim_end = WIM_HEADER_DISK_SIZE;
 
-       /* Set @old_wim_end, which indicates the point beyond which we don't
-        * allow any file and metadata resources to appear without returning
-        * WIMLIB_ERR_RESOURCE_ORDER (due to the fact that we would otherwise
-        * overwrite these resources). */
-       if (!wim->image_deletion_occurred && !any_images_modified(wim)) {
-               /* If no images have been modified and no images have been
-                * deleted, a new blob table does not need to be written.  We
-                * shall write the new XML data and optional integrity table
-                * immediately after the blob table.  Note that this may
-                * overwrite an existing integrity table. */
-               old_wim_end = old_blob_table_end;
-               write_flags |= WIMLIB_WRITE_FLAG_NO_NEW_BLOBS;
-       } else if (wim_has_integrity_table(wim)) {
-               /* Old WIM has an integrity table; begin writing new blobs after
-                * it. */
-               old_wim_end = wim->hdr.integrity_table_reshdr.offset_in_wim +
-                             wim->hdr.integrity_table_reshdr.size_in_wim;
+               ret = prepare_blob_list_for_write(wim, WIMLIB_ALL_IMAGES,
+                                                 write_flags, &blob_list,
+                                                 &blob_table_list, &filter_ctx);
+               if (ret)
+                       goto out;
+
+               /* Prevent new files from being deduplicated with existing blobs
+                * in the WIM that we haven't decided to write.  Such blobs will
+                * be overwritten during the compaction.  */
+               for_blob_in_table(wim->blob_table, free_blob_if_invalidated, wim);
+
+               if (wim_has_metadata(wim)) {
+                       /* Add existing metadata resources to be compacted along
+                        * with the file resources.  */
+                       for (int i = 0; i < wim->hdr.image_count; i++) {
+                               struct wim_image_metadata *imd = wim->image_metadata[i];
+                               if (is_image_unchanged_from_wim(imd, wim)) {
+                                       fully_reference_blob_for_write(imd->metadata_blob,
+                                                                      &blob_list);
+                               }
+                       }
+               }
        } else {
-               /* No existing integrity table; begin writing new blobs after
-                * the old XML data. */
-               old_wim_end = old_xml_end;
-       }
+               u64 old_blob_table_end, old_xml_begin, old_xml_end;
+
+               /* Set additional flags for append.  */
+               write_flags |= WIMLIB_WRITE_FLAG_APPEND |
+                              WIMLIB_WRITE_FLAG_STREAMS_OK;
+
+               /* Make sure there is no data after the XML data, except
+                * possibily an integrity table.  If this were the case, then
+                * this data would be overwritten.  */
+               old_xml_begin = wim->hdr.xml_data_reshdr.offset_in_wim;
+               old_xml_end = old_xml_begin + wim->hdr.xml_data_reshdr.size_in_wim;
+               if (wim->hdr.blob_table_reshdr.offset_in_wim == 0)
+                       old_blob_table_end = WIM_HEADER_DISK_SIZE;
+               else
+                       old_blob_table_end = wim->hdr.blob_table_reshdr.offset_in_wim +
+                                            wim->hdr.blob_table_reshdr.size_in_wim;
+               if (wim_has_integrity_table(wim) &&
+                   wim->hdr.integrity_table_reshdr.offset_in_wim < old_xml_end) {
+                       WARNING("Didn't expect the integrity table to be "
+                               "before the XML data");
+                       ret = WIMLIB_ERR_RESOURCE_ORDER;
+                       goto out;
+               }
 
-       ret = check_resource_offsets(wim, old_wim_end);
-       if (ret)
-               goto out;
+               if (old_blob_table_end > old_xml_begin) {
+                       WARNING("Didn't expect the blob table to be after "
+                               "the XML data");
+                       ret = WIMLIB_ERR_RESOURCE_ORDER;
+                       goto out;
+               }
+               /* Set @old_wim_end, which indicates the point beyond which we
+                * don't allow any file and metadata resources to appear without
+                * returning WIMLIB_ERR_RESOURCE_ORDER (due to the fact that we
+                * would otherwise overwrite these resources). */
+               if (!any_images_changed(wim)) {
+                       /* If no images have been modified, added, or deleted,
+                        * then a new blob table does not need to be written.
+                        * We shall write the new XML data and optional
+                        * integrity table immediately after the blob table.
+                        * Note that this may overwrite an existing integrity
+                        * table.  */
+                       old_wim_end = old_blob_table_end;
+                       write_flags |= WIMLIB_WRITE_FLAG_NO_NEW_BLOBS;
+               } else if (wim_has_integrity_table(wim)) {
+                       /* Old WIM has an integrity table; begin writing new
+                        * blobs after it. */
+                       old_wim_end = wim->hdr.integrity_table_reshdr.offset_in_wim +
+                                     wim->hdr.integrity_table_reshdr.size_in_wim;
+               } else {
+                       /* No existing integrity table; begin writing new blobs
+                        * after the old XML data. */
+                       old_wim_end = old_xml_end;
+               }
+
+               ret = check_resource_offsets(wim, old_wim_end);
+               if (ret)
+                       goto out;
 
-       ret = prepare_blob_list_for_write(wim, WIMLIB_ALL_IMAGES, write_flags,
-                                         &blob_list, &blob_table_list,
-                                         &filter_ctx);
+               ret = prepare_blob_list_for_write(wim, WIMLIB_ALL_IMAGES,
+                                                 write_flags, &blob_list,
+                                                 &blob_table_list, &filter_ctx);
+               if (ret)
+                       goto out;
+
+               if (write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS)
+                       wimlib_assert(list_empty(&blob_list));
+       }
+
+       /* Update image stats if needed.  */
+       ret = update_image_stats(wim);
        if (ret)
                goto out;
 
-       if (write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS)
-               wimlib_assert(list_empty(&blob_list));
-
        ret = open_wim_writable(wim, wim->filename, O_RDWR);
        if (ret)
                goto out;
@@ -3051,7 +3219,8 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads)
        return 0;
 
 out_truncate:
-       if (!(write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS)) {
+       if (!(write_flags & (WIMLIB_WRITE_FLAG_NO_NEW_BLOBS |
+                            WIMLIB_WRITE_FLAG_UNSAFE_COMPACT))) {
                WARNING("Truncating \"%"TS"\" to its original size "
                        "(%"PRIu64" bytes)", wim->filename, old_wim_end);
                /* Return value of ftruncate() is ignored because this is
@@ -3065,6 +3234,7 @@ out_unlock_wim:
 out_close_wim:
        (void)close_wim_writable(wim, write_flags);
 out:
+       wim->being_compacted = 0;
        return ret;
 }
 
@@ -3120,8 +3290,8 @@ overwrite_wim_via_tmpfile(WIMStruct *wim, int write_flags, unsigned num_threads)
                             &progress, wim->progctx);
 }
 
-/* Determine if the specified WIM file may be updated by appending in-place
- * rather than writing and replacing it with an entirely new file.  */
+/* Determine if the specified WIM file may be updated in-place rather than by
+ * writing and replacing it with an entirely new file.  */
 static bool
 can_overwrite_wim_inplace(const WIMStruct *wim, int write_flags)
 {
@@ -3163,6 +3333,20 @@ wimlib_overwrite(WIMStruct *wim, int write_flags, unsigned num_threads)
        if (!wim->filename)
                return WIMLIB_ERR_NO_FILENAME;
 
+       if (unlikely(write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT)) {
+               /*
+                * In UNSAFE_COMPACT mode:
+                *      - RECOMPRESS is forbidden
+                *      - REBUILD is ignored
+                *      - SOFT_DELETE and NO_SOLID_SORT are implied
+                */
+               if (write_flags & WIMLIB_WRITE_FLAG_RECOMPRESS)
+                       return WIMLIB_ERR_COMPACTION_NOT_POSSIBLE;
+               write_flags &= ~WIMLIB_WRITE_FLAG_REBUILD;
+               write_flags |= WIMLIB_WRITE_FLAG_SOFT_DELETE;
+               write_flags |= WIMLIB_WRITE_FLAG_NO_SOLID_SORT;
+       }
+
        orig_hdr_flags = wim->hdr.flags;
        if (write_flags & WIMLIB_WRITE_FLAG_IGNORE_READONLY_FLAG)
                wim->hdr.flags &= ~WIM_HDR_FLAG_READONLY;
@@ -3177,5 +3361,7 @@ wimlib_overwrite(WIMStruct *wim, int write_flags, unsigned num_threads)
                        return ret;
                WARNING("Falling back to re-building entire WIM");
        }
+       if (write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT)
+               return WIMLIB_ERR_COMPACTION_NOT_POSSIBLE;
        return overwrite_wim_via_tmpfile(wim, write_flags, num_threads);
 }