X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fwrite.c;h=4805913de376fab95c1982eae45f0253a6dd13ec;hp=aa456e07fda54e470f3de09836be3ed154799ccf;hb=HEAD;hpb=fc938fc3886f0c8cacdbc83148484b55a32ca8e7

diff --git a/src/write.c b/src/write.c
index aa456e07..4bf1b2be 100644
--- a/src/write.c
+++ b/src/write.c
@@ -6,7 +6,7 @@
  */
 
 /*
- * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
+ * Copyright (C) 2012-2016 Eric Biggers
  *
  * This file is free software; you can redistribute it and/or modify it under
  * the terms of the GNU Lesser General Public License as published by the Free
@@ -19,7 +19,7 @@
  * details.
  *
  * You should have received a copy of the GNU Lesser General Public License
- * along with this file; if not, see http://www.gnu.org/licenses/.
+ * along with this file; if not, see https://www.gnu.org/licenses/.
  */
 
 #ifdef HAVE_CONFIG_H
@@ -174,7 +174,12 @@ can_raw_copy(const struct blob_descriptor *blob, int write_resource_flags,
 
 	rdesc = blob->rdesc;
 
-	/* Only reuse compressed resources.  */
+	/* In the case of an in-place compaction, always reuse resources located
+	 * in the WIM being compacted.  */
+	if (rdesc->wim->being_compacted)
+		return true;
+
+	/* Otherwise, only reuse compressed resources.  */
 	if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE ||
 	    !(rdesc->flags & (WIM_RESHDR_FLAG_COMPRESSED |
 			      WIM_RESHDR_FLAG_SOLID)))
@@ -283,7 +288,7 @@ write_pwm_blob_header(const struct blob_descriptor *blob,
 	blob_hdr.flags = cpu_to_le32(reshdr_flags);
 	ret = full_write(out_fd, &blob_hdr, sizeof(blob_hdr));
 	if (ret)
-		ERROR_WITH_ERRNO("Write error");
+		ERROR_WITH_ERRNO("Error writing blob header to WIM file");
 	return ret;
 }
 
@@ -296,7 +301,8 @@ struct write_blobs_progress_data {
 
 static int
 do_write_blobs_progress(struct write_blobs_progress_data *progress_data,
-			u64 complete_size, u32 complete_count, bool discarded)
+			u64 complete_size, u64 complete_compressed_size,
+			u32 complete_count, bool discarded)
 {
 	union wimlib_progress_info *progress = &progress_data->progress;
 	int ret;
@@ -311,6 +317,8 @@ do_write_blobs_progress(struct write_blobs_progress_data *progress_data,
 		}
 	} else {
 		progress->write_streams.completed_bytes += complete_size;
+		progress->write_streams.completed_compressed_bytes +=
+			complete_compressed_size;
 		progress->write_streams.completed_streams += complete_count;
 	}
 
@@ -372,12 +380,6 @@ struct write_blobs_ctx {
 	 * @blobs_being_compressed only when writing a solid resource.  */
 	struct list_head blobs_in_solid_resource;
 
-	/* Current uncompressed offset in the blob being read.  */
-	u64 cur_read_blob_offset;
-
-	/* Uncompressed size of the blob currently being read.  */
-	u64 cur_read_blob_size;
-
 	/* Current uncompressed offset in the blob being written.  */
 	u64 cur_write_blob_offset;
 
@@ -456,8 +458,11 @@ begin_chunk_table(struct write_blobs_ctx *ctx, u64 res_expected_size)
 			reserve_size += sizeof(struct alt_chunk_table_header_disk);
 		memset(ctx->chunk_csizes, 0, reserve_size);
 		ret = full_write(ctx->out_fd, ctx->chunk_csizes, reserve_size);
-		if (ret)
+		if (ret) {
+			ERROR_WITH_ERRNO("Error reserving space for chunk "
+					 "table in WIM file");
 			return ret;
+		}
 	}
 	return 0;
 }
@@ -501,8 +506,8 @@ end_chunk_table(struct write_blobs_ctx *ctx, u64 res_actual_size,
 						0 != (ctx->write_resource_flags &
 						      WRITE_RESOURCE_FLAG_SOLID));
 
-	typedef le64 _may_alias_attribute aliased_le64_t;
-	typedef le32 _may_alias_attribute aliased_le32_t;
+	typedef le64 __attribute__((may_alias)) aliased_le64_t;
+	typedef le32 __attribute__((may_alias)) aliased_le32_t;
 
 	if (chunk_entry_size == 4) {
 		aliased_le32_t *entries = (aliased_le32_t*)ctx->chunk_csizes;
@@ -583,7 +588,7 @@ end_chunk_table(struct write_blobs_ctx *ctx, u64 res_actual_size,
 	return 0;
 
 write_error:
-	ERROR_WITH_ERRNO("Write error");
+	ERROR_WITH_ERRNO("Error writing chunk table to WIM file");
 	return ret;
 }
 
@@ -634,6 +639,7 @@ do_done_with_blob(struct blob_descriptor *blob,
 {
 	int ret;
 	struct wim_inode *inode;
+	const tchar *path;
 	tchar *cookie1;
 	tchar *cookie2;
 
@@ -647,10 +653,12 @@ do_done_with_blob(struct blob_descriptor *blob,
 	if (--inode->i_num_remaining_streams > 0)
 		return 0;
 
-	cookie1 = progress_get_streamless_path(blob->file_on_disk);
-	cookie2 = progress_get_win32_path(blob->file_on_disk);
+	path = blob_file_path(blob);
+
+	cookie1 = progress_get_streamless_path(path);
+	cookie2 = progress_get_win32_path(path);
 
-	ret = done_with_file(blob->file_on_disk, progfunc, progctx);
+	ret = done_with_file(path, progfunc, progctx);
 
 	progress_put_win32_path(cookie2);
 	progress_put_streamless_path(cookie1);
@@ -678,9 +686,6 @@ write_blob_begin_read(struct blob_descriptor *blob, void *_ctx)
 
 	wimlib_assert(blob->size > 0);
 
-	ctx->cur_read_blob_offset = 0;
-	ctx->cur_read_blob_size = blob->size;
-
 	/* As an optimization, we allow some blobs to be "unhashed", meaning
 	 * their SHA-1 message digests are unknown.  This is the case with blobs
 	 * that are added by scanning a directory tree with wimlib_add_image(),
@@ -711,7 +716,9 @@ write_blob_begin_read(struct blob_descriptor *blob, void *_ctx)
 				 * output reference count to the duplicate blob
 				 * in the former case.  */
 				ret = do_write_blobs_progress(&ctx->progress_data,
-							      blob->size, 1, true);
+							      blob->size,
+							      blob->size,
+							      1, true);
 				list_del(&blob->write_blobs_list);
 				list_del(&blob->blob_table_list);
 				if (new_blob->will_be_in_output_wim)
@@ -760,7 +767,7 @@ write_blob_uncompressed(struct blob_descriptor *blob, struct filedes *out_fd)
 	if (filedes_seek(out_fd, begin_offset) == -1)
 		return 0;
 
-	ret = extract_blob_to_fd(blob, out_fd);
+	ret = extract_blob_to_fd(blob, out_fd, false);
 	if (ret) {
 		/* Error reading the uncompressed data.  */
 		if (out_fd->offset == begin_offset &&
@@ -818,7 +825,7 @@ should_rewrite_blob_uncompressed(const struct write_blobs_ctx *ctx,
 	 * Exception: if the compressed size happens to be *exactly* the same as
 	 * the uncompressed size, then the blob *must* be written uncompressed
 	 * in order to remain compatible with the Windows Overlay Filesystem
-	 * Filter Driver (WOF).
+	 * filter driver (WOF).
 	 *
 	 * TODO: we are currently assuming that the optimization for
 	 * single-chunk resources in maybe_rewrite_blob_uncompressed() prevents
@@ -865,8 +872,7 @@ write_chunk(struct write_blobs_ctx *ctx, const void *cchunk,
 {
 	int ret;
 	struct blob_descriptor *blob;
-	u32 completed_blob_count;
-	u32 completed_size;
+	u32 completed_blob_count = 0;
 
 	blob = list_entry(ctx->blobs_being_compressed.next,
 			  struct blob_descriptor, write_blobs_list);
@@ -913,8 +919,6 @@ write_chunk(struct write_blobs_ctx *ctx, const void *cchunk,
 
 	ctx->cur_write_blob_offset += usize;
 
-	completed_size = usize;
-	completed_blob_count = 0;
 	if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
 		/* Wrote chunk in solid mode.  It may have finished multiple
 		 * blobs.  */
@@ -971,11 +975,11 @@ write_chunk(struct write_blobs_ctx *ctx, const void *cchunk,
 		}
 	}
 
-	return do_write_blobs_progress(&ctx->progress_data, completed_size,
+	return do_write_blobs_progress(&ctx->progress_data, usize, csize,
 				       completed_blob_count, false);
 
 write_error:
-	ERROR_WITH_ERRNO("Write error");
+	ERROR_WITH_ERRNO("Error writing chunk data to WIM file");
 	return ret;
 }
 
@@ -1008,7 +1012,8 @@ prepare_chunk_buffer(struct write_blobs_ctx *ctx)
 
 /* Process the next chunk of data to be written to a WIM resource.  */
 static int
-write_blob_process_chunk(const void *chunk, size_t size, void *_ctx)
+write_blob_process_chunk(const struct blob_descriptor *blob, u64 offset,
+			 const void *chunk, size_t size, void *_ctx)
 {
 	struct write_blobs_ctx *ctx = _ctx;
 	int ret;
@@ -1021,7 +1026,6 @@ write_blob_process_chunk(const void *chunk, size_t size, void *_ctx)
 		 ret = write_chunk(ctx, chunk, size, size);
 		 if (ret)
 			 return ret;
-		 ctx->cur_read_blob_offset += size;
 		 return 0;
 	}
 
@@ -1045,8 +1049,7 @@ write_blob_process_chunk(const void *chunk, size_t size, void *_ctx)
 		} else {
 			needed_chunk_size = min(ctx->out_chunk_size,
 						ctx->cur_chunk_buf_filled +
-							(ctx->cur_read_blob_size -
-							 ctx->cur_read_blob_offset));
+							(blob->size - offset));
 		}
 
 		bytes_consumed = min(chunkend - chunkptr,
@@ -1056,7 +1059,7 @@ write_blob_process_chunk(const void *chunk, size_t size, void *_ctx)
 		       chunkptr, bytes_consumed);
 
 		chunkptr += bytes_consumed;
-		ctx->cur_read_blob_offset += bytes_consumed;
+		offset += bytes_consumed;
 		ctx->cur_chunk_buf_filled += bytes_consumed;
 
 		if (ctx->cur_chunk_buf_filled == needed_chunk_size) {
@@ -1077,8 +1080,6 @@ write_blob_end_read(struct blob_descriptor *blob, int status, void *_ctx)
 {
 	struct write_blobs_ctx *ctx = _ctx;
 
-	wimlib_assert(ctx->cur_read_blob_offset == ctx->cur_read_blob_size || status);
-
 	if (!blob->will_be_in_output_wim) {
 		/* The blob was a duplicate.  Now that its data has finished
 		 * being read, it is being discarded in favor of the duplicate
@@ -1104,11 +1105,16 @@ write_blob_end_read(struct blob_descriptor *blob, int status, void *_ctx)
 	return status;
 }
 
-/* Compute statistics about a list of blobs that will be written.
+/*
+ * Compute statistics about a list of blobs that will be written.
  *
  * Assumes the blobs are sorted such that all blobs located in each distinct WIM
- * (specified by WIMStruct) are together.  */
-static void
+ * (specified by WIMStruct) are together.
+ *
+ * For compactions, also verify that there are no overlapping resources.  This
+ * really should be checked earlier, but for now it's easiest to check here.
+ */
+static int
 compute_blob_list_stats(struct list_head *blob_list,
 			struct write_blobs_ctx *ctx)
 {
@@ -1117,15 +1123,32 @@ compute_blob_list_stats(struct list_head *blob_list,
 	u64 num_blobs = 0;
 	u64 total_parts = 0;
 	WIMStruct *prev_wim_part = NULL;
+	const struct wim_resource_descriptor *prev_rdesc = NULL;
 
 	list_for_each_entry(blob, blob_list, write_blobs_list) {
 		num_blobs++;
 		total_bytes += blob->size;
 		if (blob->blob_location == BLOB_IN_WIM) {
-			if (prev_wim_part != blob->rdesc->wim) {
-				prev_wim_part = blob->rdesc->wim;
+			const struct wim_resource_descriptor *rdesc = blob->rdesc;
+			WIMStruct *wim = rdesc->wim;
+
+			if (prev_wim_part != wim) {
+				prev_wim_part = wim;
 				total_parts++;
 			}
+			if (unlikely(wim->being_compacted) && rdesc != prev_rdesc) {
+				if (prev_rdesc != NULL &&
+				    rdesc->offset_in_wim <
+						prev_rdesc->offset_in_wim +
+						prev_rdesc->size_in_wim)
+				{
+					WARNING("WIM file contains overlapping "
+						"resources!  Compaction is not "
+						"possible.");
+					return WIMLIB_ERR_RESOURCE_ORDER;
+				}
+				prev_rdesc = rdesc;
+			}
 		}
 	}
 	ctx->progress_data.progress.write_streams.total_bytes       = total_bytes;
@@ -1136,6 +1159,7 @@ compute_blob_list_stats(struct list_head *blob_list,
 	ctx->progress_data.progress.write_streams.total_parts       = total_parts;
 	ctx->progress_data.progress.write_streams.completed_parts   = 0;
 	ctx->progress_data.next_progress = 0;
+	return 0;
 }
 
 /* Find blobs in @blob_list that can be copied to the output WIM in raw form
@@ -1200,21 +1224,43 @@ write_raw_copy_resource(struct wim_resource_descriptor *in_rdesc,
 	}
 	in_fd = &in_rdesc->wim->in_fd;
 	wimlib_assert(cur_read_offset != end_read_offset);
-	do {
 
-		bytes_to_read = min(sizeof(buf), end_read_offset - cur_read_offset);
+	if (likely(!in_rdesc->wim->being_compacted) ||
+	    in_rdesc->offset_in_wim > out_fd->offset) {
+		do {
+			bytes_to_read = min(sizeof(buf),
+					    end_read_offset - cur_read_offset);
 
-		ret = full_pread(in_fd, buf, bytes_to_read, cur_read_offset);
-		if (ret)
-			return ret;
+			ret = full_pread(in_fd, buf, bytes_to_read,
+					 cur_read_offset);
+			if (ret) {
+				ERROR_WITH_ERRNO("Error reading raw data "
+						 "from WIM file");
+				return ret;
+			}
 
-		ret = full_write(out_fd, buf, bytes_to_read);
-		if (ret)
-			return ret;
+			ret = full_write(out_fd, buf, bytes_to_read);
+			if (ret) {
+				ERROR_WITH_ERRNO("Error writing raw data "
+						 "to WIM file");
+				return ret;
+			}
 
-		cur_read_offset += bytes_to_read;
+			cur_read_offset += bytes_to_read;
 
-	} while (cur_read_offset != end_read_offset);
+		} while (cur_read_offset != end_read_offset);
+	} else {
+		/* Optimization: the WIM file is being compacted and the
+		 * resource being written is already in the desired location.
+		 * Skip over the data instead of re-writing it.  */
+
+		/* Due the earlier check for overlapping resources, it should
+		 * never be the case that we already overwrote the resource.  */
+		wimlib_assert(!(in_rdesc->offset_in_wim < out_fd->offset));
+
+		if (-1 == filedes_seek(out_fd, out_fd->offset + in_rdesc->size_in_wim))
+			return WIMLIB_ERR_WRITE;
+	}
 
 	list_for_each_entry(blob, &in_rdesc->blob_list, rdesc_node) {
 		if (blob->will_be_in_output_wim) {
@@ -1243,15 +1289,18 @@ write_raw_copy_resources(struct list_head *raw_copy_blobs,
 		blob->rdesc->raw_copy_ok = 1;
 
 	list_for_each_entry(blob, raw_copy_blobs, write_blobs_list) {
+		u64 compressed_size = 0;
+
 		if (blob->rdesc->raw_copy_ok) {
 			/* Write each solid resource only one time.  */
 			ret = write_raw_copy_resource(blob->rdesc, out_fd);
 			if (ret)
 				return ret;
 			blob->rdesc->raw_copy_ok = 0;
+			compressed_size = blob->rdesc->size_in_wim;
 		}
 		ret = do_write_blobs_progress(progress_data, blob->size,
-					      1, false);
+					      compressed_size, 1, false);
 		if (ret)
 			return ret;
 	}
@@ -1296,17 +1345,6 @@ validate_blob_list(struct list_head *blob_list)
 	}
 }
 
-static inline bool
-blob_is_in_file(const struct blob_descriptor *blob)
-{
-	return blob->blob_location == BLOB_IN_FILE_ON_DISK
-#ifdef __WIN32__
-	    || blob->blob_location == BLOB_IN_WINNT_FILE_ON_DISK
-	    || blob->blob_location == BLOB_WIN32_ENCRYPTED
-#endif
-	   ;
-}
-
 static void
 init_done_with_file_info(struct list_head *blob_list)
 {
@@ -1420,9 +1458,9 @@ init_done_with_file_info(struct list_head *blob_list)
  * identical to another blob already being written or one that would be filtered
  * out of the output WIM using blob_filtered() with the context @filter_ctx.
  * Each such duplicate blob will be removed from @blob_list, its reference count
- * transfered to the pre-existing duplicate blob, its memory freed, and will not
- * be written.  Alternatively, if a blob in @blob_list is a duplicate with any
- * blob in @blob_table that has not been marked for writing or would not be
+ * transferred to the pre-existing duplicate blob, its memory freed, and will
+ * not be written.  Alternatively, if a blob in @blob_list is a duplicate with
+ * any blob in @blob_table that has not been marked for writing or would not be
  * hard-filtered, it is freed and the pre-existing duplicate is written instead,
  * taking ownership of the reference count and slot in the @blob_table_list.
  *
@@ -1490,7 +1528,9 @@ write_blob_list(struct list_head *blob_list,
 	if (ret)
 		return ret;
 
-	compute_blob_list_stats(blob_list, &ctx);
+	ret = compute_blob_list_stats(blob_list, &ctx);
+	if (ret)
+		return ret;
 
 	if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID_SORT) {
 		ret = sort_blob_list_for_solid_compression(blob_list);
@@ -1505,22 +1545,12 @@ write_blob_list(struct list_head *blob_list,
 					       out_ctype, out_chunk_size,
 					       &raw_copy_blobs);
 
-	/* Copy any compressed resources for which the raw data can be reused
-	 * without decompression.  */
-	ret = write_raw_copy_resources(&raw_copy_blobs, ctx.out_fd,
-				       &ctx.progress_data);
-
-	if (ret || num_nonraw_bytes == 0)
-		goto out_destroy_context;
-
-	/* Unless uncompressed output was required, allocate a chunk_compressor
-	 * to do compression.  There are serial and parallel implementations of
-	 * the chunk_compressor interface.  We default to parallel using the
+	/* Unless no data needs to be compressed, allocate a chunk_compressor to
+	 * do compression.  There are serial and parallel implementations of the
+	 * chunk_compressor interface.  We default to parallel using the
 	 * specified number of threads, unless the upper bound on the number
 	 * bytes needing to be compressed is less than a heuristic value.  */
-	if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
-
-	#ifdef ENABLE_MULTITHREADED_COMPRESSION
+	if (num_nonraw_bytes != 0 && out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
 		if (num_nonraw_bytes > max(2000000, out_chunk_size)) {
 			ret = new_parallel_chunk_compressor(out_ctype,
 							    out_chunk_size,
@@ -1532,7 +1562,6 @@ write_blob_list(struct list_head *blob_list,
 					wimlib_get_error_string(ret));
 			}
 		}
-	#endif
 
 		if (ctx.compressor == NULL) {
 			ret = new_serial_chunk_compressor(out_ctype, out_chunk_size,
@@ -1547,9 +1576,6 @@ write_blob_list(struct list_head *blob_list,
 	else
 		ctx.progress_data.progress.write_streams.num_threads = 1;
 
-	INIT_LIST_HEAD(&ctx.blobs_being_compressed);
-	INIT_LIST_HEAD(&ctx.blobs_in_solid_resource);
-
 	ret = call_progress(ctx.progress_data.progfunc,
 			    WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
 			    &ctx.progress_data.progress,
@@ -1557,7 +1583,20 @@ write_blob_list(struct list_head *blob_list,
 	if (ret)
 		goto out_destroy_context;
 
+	/* Copy any compressed resources for which the raw data can be reused
+	 * without decompression.  */
+	ret = write_raw_copy_resources(&raw_copy_blobs, ctx.out_fd,
+				       &ctx.progress_data);
+
+	if (ret || num_nonraw_bytes == 0)
+		goto out_destroy_context;
+
+	INIT_LIST_HEAD(&ctx.blobs_being_compressed);
+
 	if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
+
+		INIT_LIST_HEAD(&ctx.blobs_in_solid_resource);
+
 		ret = begin_write_resource(&ctx, num_nonraw_bytes);
 		if (ret)
 			goto out_destroy_context;
@@ -1568,7 +1607,7 @@ write_blob_list(struct list_head *blob_list,
 
 	struct read_blob_callbacks cbs = {
 		.begin_blob	= write_blob_begin_read,
-		.consume_chunk	= write_blob_process_chunk,
+		.continue_blob	= write_blob_process_chunk,
 		.end_blob	= write_blob_end_read,
 		.ctx		= &ctx,
 	};
@@ -1698,7 +1737,7 @@ write_wim_resource_from_buffer(const void *buf,
 	}
 
 	blob_set_is_located_in_attached_buffer(&blob, (void *)buf, buf_size);
-	sha1_buffer(buf, buf_size, blob.hash);
+	sha1(buf, buf_size, blob.hash);
 	blob.unhashed = 0;
 	blob.is_metadata = is_metadata;
 
@@ -2133,16 +2172,28 @@ write_metadata_resources(WIMStruct *wim, int image, int write_flags)
 		struct wim_image_metadata *imd;
 
 		imd = wim->image_metadata[i - 1];
-		/* Build a new metadata resource only if image was modified from
-		 * the original (or was newly added).  Otherwise just copy the
-		 * existing one.  */
-		if (imd->modified) {
+		if (is_image_dirty(imd)) {
+			/* The image was modified from the original, or was
+			 * newly added, so we have to build and write a new
+			 * metadata resource.  */
 			ret = write_metadata_resource(wim, i,
 						      write_resource_flags);
-		} else if (write_flags & WIMLIB_WRITE_FLAG_APPEND) {
-			blob_set_out_reshdr_for_reuse(imd->metadata_blob);
+		} else if (is_image_unchanged_from_wim(imd, wim) &&
+			   (write_flags & (WIMLIB_WRITE_FLAG_UNSAFE_COMPACT |
+					   WIMLIB_WRITE_FLAG_APPEND)))
+		{
+			/* The metadata resource is already in the WIM file.
+			 * For appends, we don't need to write it at all.  For
+			 * compactions, we re-write existing metadata resources
+			 * along with the existing file resources, not here.  */
+			if (write_flags & WIMLIB_WRITE_FLAG_APPEND)
+				blob_set_out_reshdr_for_reuse(imd->metadata_blob);
 			ret = 0;
 		} else {
+			/* The metadata resource is in a WIM file other than the
+			 * one being written to.  We need to rewrite it,
+			 * possibly compressed differently; but rebuilding the
+			 * metadata itself isn't necessary.  */
 			ret = write_wim_resource(imd->metadata_blob,
 						 &wim->out_fd,
 						 wim->out_compression_type,
@@ -2321,10 +2372,8 @@ finish_write(WIMStruct *wim, int image, int write_flags,
 	if (!(write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS)) {
 		ret = write_blob_table(wim, image, write_flags,
 				       blob_table_list);
-		if (ret) {
-			free_integrity_table(old_integrity_table);
-			return ret;
-		}
+		if (ret)
+			goto out;
 	}
 
 	/* Write XML data.  */
@@ -2334,13 +2383,13 @@ finish_write(WIMStruct *wim, int image, int write_flags,
 	ret = write_wim_xml_data(wim, image, xml_totalbytes,
 				 &wim->out_hdr.xml_data_reshdr,
 				 write_resource_flags);
-	if (ret) {
-		free_integrity_table(old_integrity_table);
-		return ret;
-	}
+	if (ret)
+		goto out;
 
 	/* Write integrity table if needed.  */
-	if (write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) {
+	if ((write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) &&
+	    wim->out_hdr.blob_table_reshdr.offset_in_wim != 0)
+	{
 		if (write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS) {
 			/* The XML data we wrote may have overwritten part of
 			 * the old integrity table, so while calculating the new
@@ -2351,10 +2400,8 @@ finish_write(WIMStruct *wim, int image, int write_flags,
 			zero_reshdr(&checkpoint_hdr.integrity_table_reshdr);
 			checkpoint_hdr.flags |= WIM_HDR_FLAG_WRITE_IN_PROGRESS;
 			ret = write_wim_header(&checkpoint_hdr, &wim->out_fd, 0);
-			if (ret) {
-				free_integrity_table(old_integrity_table);
-				return ret;
-			}
+			if (ret)
+				goto out;
 		}
 
 		new_blob_table_end = wim->out_hdr.blob_table_reshdr.offset_in_wim +
@@ -2364,9 +2411,8 @@ finish_write(WIMStruct *wim, int image, int write_flags,
 					    new_blob_table_end,
 					    old_blob_table_end,
 					    old_integrity_table);
-		free_integrity_table(old_integrity_table);
 		if (ret)
-			return ret;
+			goto out;
 	} else {
 		/* No integrity table.  */
 		zero_reshdr(&wim->out_hdr.integrity_table_reshdr);
@@ -2382,7 +2428,19 @@ finish_write(WIMStruct *wim, int image, int write_flags,
 	else
 		ret = write_wim_header(&wim->out_hdr, &wim->out_fd, 0);
 	if (ret)
-		return ret;
+		goto out;
+
+	ret = WIMLIB_ERR_WRITE;
+	if (unlikely(write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT)) {
+		/* Truncate any data the compaction freed up.  */
+		if (ftruncate(wim->out_fd.fd, wim->out_fd.offset) &&
+		    errno != EINVAL) /* allow compaction on untruncatable files,
+					e.g. block devices  */
+		{
+			ERROR_WITH_ERRNO("Failed to truncate the output WIM file");
+			goto out;
+		}
+	}
 
 	/* Possibly sync file data to disk before closing.  On POSIX systems, it
 	 * is necessary to do this before using rename() to overwrite an
@@ -2390,19 +2448,24 @@ finish_write(WIMStruct *wim, int image, int write_flags,
 	 * the system is abruptly terminated when the metadata for the rename
 	 * operation has been written to disk, but the new file data has not.
 	 */
+	ret = WIMLIB_ERR_WRITE;
 	if (write_flags & WIMLIB_WRITE_FLAG_FSYNC) {
 		if (fsync(wim->out_fd.fd)) {
 			ERROR_WITH_ERRNO("Error syncing data to WIM file");
-			return WIMLIB_ERR_WRITE;
+			goto out;
 		}
 	}
 
+	ret = WIMLIB_ERR_WRITE;
 	if (close_wim_writable(wim, write_flags)) {
 		ERROR_WITH_ERRNO("Failed to close the output WIM file");
-		return WIMLIB_ERR_WRITE;
+		goto out;
 	}
 
-	return 0;
+	ret = 0;
+out:
+	free_integrity_table(old_integrity_table);
+	return ret;
 }
 
 #if defined(HAVE_SYS_FILE_H) && defined(HAVE_FLOCK)
@@ -2530,14 +2593,15 @@ write_pipable_wim(WIMStruct *wim, int image, int write_flags,
 	/* At this point, the header at the beginning of the file has already
 	 * been written.  */
 
-	/* For efficiency, when wimlib adds an image to the WIM with
-	 * wimlib_add_image(), the SHA-1 message digests of files are not
-	 * calculated; instead, they are calculated while the files are being
-	 * written.  However, this does not work when writing a pipable WIM,
-	 * since when writing a blob to a pipable WIM, its SHA-1 message digest
-	 * needs to be known before the blob data is written.  Therefore, before
-	 * getting much farther, we need to pre-calculate the SHA-1 message
-	 * digests of all blobs that will be written.  */
+	/*
+	 * For efficiency, wimlib normally delays calculating each newly added
+	 * stream's hash until while that stream being written, or just before
+	 * it is written.  However, when writing a pipable WIM (potentially to a
+	 * pipe), we first have to write the metadata resources, which contain
+	 * all the hashes.  Moreover each blob is prefixed with its hash (struct
+	 * pwm_blob_hdr).  Thus, we have to calculate all the hashes before
+	 * writing anything.
+	 */
 	ret = wim_checksum_unhashed_blobs(wim);
 	if (ret)
 		return ret;
@@ -2573,6 +2637,25 @@ should_default_to_solid_compression(WIMStruct *wim, int write_flags)
 		wim_has_solid_resources(wim);
 }
 
+/* Update the images' filecount/bytecount stats (in the XML info) to take into
+ * account any recent modifications.  */
+static int
+update_image_stats(WIMStruct *wim)
+{
+	if (!wim_has_metadata(wim))
+		return 0;
+	for (int i = 0; i < wim->hdr.image_count; i++) {
+		struct wim_image_metadata *imd = wim->image_metadata[i];
+		if (imd->stats_outdated) {
+			int ret = xml_update_image_info(wim, i + 1);
+			if (ret)
+				return ret;
+			imd->stats_outdated = false;
+		}
+	}
+	return 0;
+}
+
 /* Write a standalone WIM or split WIM (SWM) part to a new file or to a file
  * descriptor.  */
 int
@@ -2619,6 +2702,10 @@ write_wim_part(WIMStruct *wim,
 				    WIMLIB_WRITE_FLAG_NOT_PIPABLE))
 		return WIMLIB_ERR_INVALID_PARAM;
 
+	/* Only wimlib_overwrite() accepts UNSAFE_COMPACT.  */
+	if (write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT)
+		return WIMLIB_ERR_INVALID_PARAM;
+
 	/* Include an integrity table by default if no preference was given and
 	 * the WIM already had an integrity table.  */
 	if (!(write_flags & (WIMLIB_WRITE_FLAG_CHECK_INTEGRITY |
@@ -2716,6 +2803,11 @@ write_wim_part(WIMStruct *wim,
 			wim->out_hdr.boot_idx = 1;
 	}
 
+	/* Update image stats if needed.  */
+	ret = update_image_stats(wim);
+	if (ret)
+		return ret;
+
 	/* Set up the output file descriptor.  */
 	if (write_flags & WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR) {
 		/* File descriptor was explicitly provided.  */
@@ -2818,11 +2910,16 @@ wimlib_write_to_fd(WIMStruct *wim, int fd,
 	return write_standalone_wim(wim, &fd, image, write_flags, num_threads);
 }
 
+/* Have there been any changes to images in the specified WIM, including updates
+ * as well as deletions and additions of entire images, but excluding changes to
+ * the XML document?  */
 static bool
-any_images_modified(WIMStruct *wim)
+any_images_changed(WIMStruct *wim)
 {
+	if (wim->image_deletion_occurred)
+		return true;
 	for (int i = 0; i < wim->hdr.image_count; i++)
-		if (wim->image_metadata[i]->modified)
+		if (!is_image_unchanged_from_wim(wim->image_metadata[i], wim))
 			return true;
 	return false;
 }
@@ -2862,6 +2959,20 @@ check_resource_offsets(WIMStruct *wim, off_t end_offset)
 	return 0;
 }
 
+static int
+free_blob_if_invalidated(struct blob_descriptor *blob, void *_wim)
+{
+	const WIMStruct *wim = _wim;
+
+	if (!blob->will_be_in_output_wim &&
+	    blob->blob_location == BLOB_IN_WIM && blob->rdesc->wim == wim)
+	{
+		blob_table_unlink(wim->blob_table, blob);
+		free_blob_descriptor(blob);
+	}
+	return 0;
+}
+
 /*
  * Overwrite a WIM, possibly appending new resources to it.
  *
@@ -2917,13 +3028,25 @@ check_resource_offsets(WIMStruct *wim, off_t end_offset)
  * is that a small hole is left in the WIM where the old blob table, xml data,
  * and integrity table were.  (These usually only take up a small amount of
  * space compared to the blobs, however.)
+ *
+ * Finally, this function also supports "compaction" overwrites as an
+ * alternative to the normal "append" overwrites described above.  In a
+ * compaction, data is written starting immediately from the end of the header.
+ * All existing resources are written first, in order by file offset.  New
+ * resources are written afterwards, and at the end any extra data is truncated
+ * from the file.  The advantage of this approach is that is that the WIM file
+ * ends up fully optimized, without any holes remaining.  The main disadavantage
+ * is that this operation is fundamentally unsafe and cannot be interrupted
+ * without data corruption.  Consequently, compactions are only ever done when
+ * explicitly requested by the library user with the flag
+ * WIMLIB_WRITE_FLAG_UNSAFE_COMPACT.  (Another disadvantage is that a compaction
+ * can be much slower than an append.)
  */
 static int
 overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads)
 {
 	int ret;
 	off_t old_wim_end;
-	u64 old_blob_table_end, old_xml_begin, old_xml_end;
 	struct list_head blob_list;
 	struct list_head blob_table_list;
 	struct filter_context filter_ctx;
@@ -2949,66 +3072,113 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads)
 	if (should_default_to_solid_compression(wim, write_flags))
 		write_flags |= WIMLIB_WRITE_FLAG_SOLID;
 
-	/* Set additional flags for overwrite.  */
-	write_flags |= WIMLIB_WRITE_FLAG_APPEND |
-		       WIMLIB_WRITE_FLAG_STREAMS_OK;
-
-	/* Make sure there is no data after the XML data, except possibily an
-	 * integrity table.  If this were the case, then this data would be
-	 * overwritten.  */
-	old_xml_begin = wim->hdr.xml_data_reshdr.offset_in_wim;
-	old_xml_end = old_xml_begin + wim->hdr.xml_data_reshdr.size_in_wim;
-	old_blob_table_end = wim->hdr.blob_table_reshdr.offset_in_wim +
-			     wim->hdr.blob_table_reshdr.size_in_wim;
-	if (wim_has_integrity_table(wim) &&
-	    wim->hdr.integrity_table_reshdr.offset_in_wim < old_xml_end) {
-		WARNING("Didn't expect the integrity table to be before the XML data");
-		ret = WIMLIB_ERR_RESOURCE_ORDER;
-		goto out;
-	}
+	if (unlikely(write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT)) {
 
-	if (old_blob_table_end > old_xml_begin) {
-		WARNING("Didn't expect the blob table to be after the XML data");
-		ret = WIMLIB_ERR_RESOURCE_ORDER;
-		goto out;
-	}
+		/* In-place compaction  */
 
-	/* Set @old_wim_end, which indicates the point beyond which we don't
-	 * allow any file and metadata resources to appear without returning
-	 * WIMLIB_ERR_RESOURCE_ORDER (due to the fact that we would otherwise
-	 * overwrite these resources). */
-	if (!wim->image_deletion_occurred && !any_images_modified(wim)) {
-		/* If no images have been modified and no images have been
-		 * deleted, a new blob table does not need to be written.  We
-		 * shall write the new XML data and optional integrity table
-		 * immediately after the blob table.  Note that this may
-		 * overwrite an existing integrity table. */
-		old_wim_end = old_blob_table_end;
-		write_flags |= WIMLIB_WRITE_FLAG_NO_NEW_BLOBS;
-	} else if (wim_has_integrity_table(wim)) {
-		/* Old WIM has an integrity table; begin writing new blobs after
-		 * it. */
-		old_wim_end = wim->hdr.integrity_table_reshdr.offset_in_wim +
-			      wim->hdr.integrity_table_reshdr.size_in_wim;
+		WARNING("The WIM file \"%"TS"\" is being compacted in place.\n"
+			"          Do *not* interrupt the operation, or else "
+			"the WIM file will be\n"
+			"          corrupted!", wim->filename);
+		wim->being_compacted = 1;
+		old_wim_end = WIM_HEADER_DISK_SIZE;
+
+		ret = prepare_blob_list_for_write(wim, WIMLIB_ALL_IMAGES,
+						  write_flags, &blob_list,
+						  &blob_table_list, &filter_ctx);
+		if (ret)
+			goto out;
+
+		/* Prevent new files from being deduplicated with existing blobs
+		 * in the WIM that we haven't decided to write.  Such blobs will
+		 * be overwritten during the compaction.  */
+		for_blob_in_table(wim->blob_table, free_blob_if_invalidated, wim);
+
+		if (wim_has_metadata(wim)) {
+			/* Add existing metadata resources to be compacted along
+			 * with the file resources.  */
+			for (int i = 0; i < wim->hdr.image_count; i++) {
+				struct wim_image_metadata *imd = wim->image_metadata[i];
+				if (is_image_unchanged_from_wim(imd, wim)) {
+					fully_reference_blob_for_write(imd->metadata_blob,
+								       &blob_list);
+				}
+			}
+		}
 	} else {
-		/* No existing integrity table; begin writing new blobs after
-		 * the old XML data. */
-		old_wim_end = old_xml_end;
-	}
+		u64 old_blob_table_end, old_xml_begin, old_xml_end;
+
+		/* Set additional flags for append.  */
+		write_flags |= WIMLIB_WRITE_FLAG_APPEND |
+			       WIMLIB_WRITE_FLAG_STREAMS_OK;
+
+		/* Make sure there is no data after the XML data, except
+		 * possibily an integrity table.  If this were the case, then
+		 * this data would be overwritten.  */
+		old_xml_begin = wim->hdr.xml_data_reshdr.offset_in_wim;
+		old_xml_end = old_xml_begin + wim->hdr.xml_data_reshdr.size_in_wim;
+		if (wim->hdr.blob_table_reshdr.offset_in_wim == 0)
+			old_blob_table_end = WIM_HEADER_DISK_SIZE;
+		else
+			old_blob_table_end = wim->hdr.blob_table_reshdr.offset_in_wim +
+					     wim->hdr.blob_table_reshdr.size_in_wim;
+		if (wim_has_integrity_table(wim) &&
+		    wim->hdr.integrity_table_reshdr.offset_in_wim < old_xml_end) {
+			WARNING("Didn't expect the integrity table to be "
+				"before the XML data");
+			ret = WIMLIB_ERR_RESOURCE_ORDER;
+			goto out;
+		}
 
-	ret = check_resource_offsets(wim, old_wim_end);
-	if (ret)
-		goto out;
+		if (old_blob_table_end > old_xml_begin) {
+			WARNING("Didn't expect the blob table to be after "
+				"the XML data");
+			ret = WIMLIB_ERR_RESOURCE_ORDER;
+			goto out;
+		}
+		/* Set @old_wim_end, which indicates the point beyond which we
+		 * don't allow any file and metadata resources to appear without
+		 * returning WIMLIB_ERR_RESOURCE_ORDER (due to the fact that we
+		 * would otherwise overwrite these resources). */
+		if (!any_images_changed(wim)) {
+			/* If no images have been modified, added, or deleted,
+			 * then a new blob table does not need to be written.
+			 * We shall write the new XML data and optional
+			 * integrity table immediately after the blob table.
+			 * Note that this may overwrite an existing integrity
+			 * table.  */
+			old_wim_end = old_blob_table_end;
+			write_flags |= WIMLIB_WRITE_FLAG_NO_NEW_BLOBS;
+		} else if (wim_has_integrity_table(wim)) {
+			/* Old WIM has an integrity table; begin writing new
+			 * blobs after it. */
+			old_wim_end = wim->hdr.integrity_table_reshdr.offset_in_wim +
+				      wim->hdr.integrity_table_reshdr.size_in_wim;
+		} else {
+			/* No existing integrity table; begin writing new blobs
+			 * after the old XML data. */
+			old_wim_end = old_xml_end;
+		}
+
+		ret = check_resource_offsets(wim, old_wim_end);
+		if (ret)
+			goto out;
 
-	ret = prepare_blob_list_for_write(wim, WIMLIB_ALL_IMAGES, write_flags,
-					  &blob_list, &blob_table_list,
-					  &filter_ctx);
+		ret = prepare_blob_list_for_write(wim, WIMLIB_ALL_IMAGES,
+						  write_flags, &blob_list,
+						  &blob_table_list, &filter_ctx);
+		if (ret)
+			goto out;
+
+		if (write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS)
+			wimlib_assert(list_empty(&blob_list));
+	}
+
+	/* Update image stats if needed.  */
+	ret = update_image_stats(wim);
 	if (ret)
 		goto out;
 
-	if (write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS)
-		wimlib_assert(list_empty(&blob_list));
-
 	ret = open_wim_writable(wim, wim->filename, O_RDWR);
 	if (ret)
 		goto out;
@@ -3050,12 +3220,12 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads)
 	return 0;
 
 out_truncate:
-	if (!(write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS)) {
+	if (!(write_flags & (WIMLIB_WRITE_FLAG_NO_NEW_BLOBS |
+			     WIMLIB_WRITE_FLAG_UNSAFE_COMPACT))) {
 		WARNING("Truncating \"%"TS"\" to its original size "
 			"(%"PRIu64" bytes)", wim->filename, old_wim_end);
-		/* Return value of ftruncate() is ignored because this is
-		 * already an error path.  */
-		(void)ftruncate(wim->out_fd.fd, old_wim_end);
+		if (ftruncate(wim->out_fd.fd, old_wim_end))
+			WARNING_WITH_ERRNO("Failed to truncate WIM file!");
 	}
 out_restore_hdr:
 	(void)write_wim_header_flags(wim->hdr.flags, &wim->out_fd);
@@ -3064,6 +3234,7 @@ out_unlock_wim:
 out_close_wim:
 	(void)close_wim_writable(wim, write_flags);
 out:
+	wim->being_compacted = 0;
 	return ret;
 }
 
@@ -3078,7 +3249,7 @@ overwrite_wim_via_tmpfile(WIMStruct *wim, int write_flags, unsigned num_threads)
 	wim_name_len = tstrlen(wim->filename);
 	tchar tmpfile[wim_name_len + 10];
 	tmemcpy(tmpfile, wim->filename, wim_name_len);
-	randomize_char_array_with_alnum(tmpfile + wim_name_len, 9);
+	get_random_alnum_chars(tmpfile + wim_name_len, 9);
 	tmpfile[wim_name_len + 9] = T('\0');
 
 	ret = wimlib_write(wim, tmpfile, WIMLIB_ALL_IMAGES,
@@ -3103,7 +3274,7 @@ overwrite_wim_via_tmpfile(WIMStruct *wim, int write_flags, unsigned num_threads)
 	if (ret) {
 		ERROR_WITH_ERRNO("Failed to rename `%"TS"' to `%"TS"'",
 				 tmpfile, wim->filename);
-	#ifdef __WIN32__
+	#ifdef _WIN32
 		if (ret < 0)
 	#endif
 		{
@@ -3162,6 +3333,20 @@ wimlib_overwrite(WIMStruct *wim, int write_flags, unsigned num_threads)
 	if (!wim->filename)
 		return WIMLIB_ERR_NO_FILENAME;
 
+	if (unlikely(write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT)) {
+		/*
+		 * In UNSAFE_COMPACT mode:
+		 *	- RECOMPRESS is forbidden
+		 *	- REBUILD is ignored
+		 *	- SOFT_DELETE and NO_SOLID_SORT are implied
+		 */
+		if (write_flags & WIMLIB_WRITE_FLAG_RECOMPRESS)
+			return WIMLIB_ERR_COMPACTION_NOT_POSSIBLE;
+		write_flags &= ~WIMLIB_WRITE_FLAG_REBUILD;
+		write_flags |= WIMLIB_WRITE_FLAG_SOFT_DELETE;
+		write_flags |= WIMLIB_WRITE_FLAG_NO_SOLID_SORT;
+	}
+
 	orig_hdr_flags = wim->hdr.flags;
 	if (write_flags & WIMLIB_WRITE_FLAG_IGNORE_READONLY_FLAG)
 		wim->hdr.flags &= ~WIM_HDR_FLAG_READONLY;
@@ -3176,5 +3361,7 @@ wimlib_overwrite(WIMStruct *wim, int write_flags, unsigned num_threads)
 			return ret;
 		WARNING("Falling back to re-building entire WIM");
 	}
+	if (write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT)
+		return WIMLIB_ERR_COMPACTION_NOT_POSSIBLE;
 	return overwrite_wim_via_tmpfile(wim, write_flags, num_threads);
 }