X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fwrite.c;h=aa456e07fda54e470f3de09836be3ed154799ccf;hp=b595c875e08e08e80313aa3bd2e363e8577df2d3;hb=fc938fc3886f0c8cacdbc83148484b55a32ca8e7;hpb=c24f1c029572b67c7023aa06a7c24a46cf938367;ds=sidebyside

diff --git a/src/write.c b/src/write.c
index b595c875..aa456e07 100644
--- a/src/write.c
+++ b/src/write.c
@@ -28,7 +28,7 @@
 
 #if defined(HAVE_SYS_FILE_H) && defined(HAVE_FLOCK)
 /* On BSD, this should be included before "wimlib/list.h" so that "wimlib/list.h" can
- * overwrite the LIST_HEAD macro. */
+ * override the LIST_HEAD macro. */
 #  include <sys/file.h>
 #endif
 
@@ -118,7 +118,7 @@ blob_filtered(const struct blob_descriptor *blob,
 	write_flags = ctx->write_flags;
 	wim = ctx->wim;
 
-	if (write_flags & WIMLIB_WRITE_FLAG_OVERWRITE &&
+	if (write_flags & WIMLIB_WRITE_FLAG_APPEND &&
 	    blob->blob_location == BLOB_IN_WIM &&
 	    blob->rdesc->wim == wim)
 		return 1;
@@ -138,60 +138,72 @@ blob_hard_filtered(const struct blob_descriptor *blob,
 	return blob_filtered(blob, ctx) < 0;
 }
 
-static inline int
+static inline bool
 may_soft_filter_blobs(const struct filter_context *ctx)
 {
-	if (ctx == NULL)
-		return 0;
-	return ctx->write_flags & WIMLIB_WRITE_FLAG_OVERWRITE;
+	return ctx && (ctx->write_flags & WIMLIB_WRITE_FLAG_APPEND);
 }
 
-static inline int
+static inline bool
 may_hard_filter_blobs(const struct filter_context *ctx)
 {
-	if (ctx == NULL)
-		return 0;
-	return ctx->write_flags & WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS;
+	return ctx && (ctx->write_flags & WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS);
 }
 
-static inline int
+static inline bool
 may_filter_blobs(const struct filter_context *ctx)
 {
 	return (may_soft_filter_blobs(ctx) || may_hard_filter_blobs(ctx));
 }
 
-/* Return true if the specified resource is compressed and the compressed data
- * can be reused with the specified output parameters.  */
+/* Return true if the specified blob is located in a WIM resource which can be
+ * reused in the output WIM file, without being recompressed.  */
 static bool
-can_raw_copy(const struct blob_descriptor *blob,
-	     int write_resource_flags, int out_ctype, u32 out_chunk_size)
+can_raw_copy(const struct blob_descriptor *blob, int write_resource_flags,
+	     int out_ctype, u32 out_chunk_size)
 {
 	const struct wim_resource_descriptor *rdesc;
 
+	/* Recompress everything if requested.  */
 	if (write_resource_flags & WRITE_RESOURCE_FLAG_RECOMPRESS)
 		return false;
 
-	if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE)
-		return false;
-
+	/* A blob not located in a WIM resource cannot be reused.  */
 	if (blob->blob_location != BLOB_IN_WIM)
 		return false;
 
 	rdesc = blob->rdesc;
 
-	if (rdesc->is_pipable != !!(write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE))
+	/* Only reuse compressed resources.  */
+	if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE ||
+	    !(rdesc->flags & (WIM_RESHDR_FLAG_COMPRESSED |
+			      WIM_RESHDR_FLAG_SOLID)))
 		return false;
 
+	/* When writing a pipable WIM, we can only reuse pipable resources; and
+	 * when writing a non-pipable WIM, we can only reuse non-pipable
+	 * resources.  */
+	if (rdesc->is_pipable !=
+	    !!(write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE))
+		return false;
+
+	/* When writing a solid WIM, we can only reuse solid resources; and when
+	 * writing a non-solid WIM, we can only reuse non-solid resources.  */
+	if (!!(rdesc->flags & WIM_RESHDR_FLAG_SOLID) !=
+	    !!(write_resource_flags & WRITE_RESOURCE_FLAG_SOLID))
+		return false;
+
+	/* Note: it is theoretically possible to copy chunks of compressed data
+	 * between non-solid, solid, and pipable resources.  However, we don't
+	 * currently implement this optimization because it would be complex and
+	 * would usually go unused.  */
+
 	if (rdesc->flags & WIM_RESHDR_FLAG_COMPRESSED) {
-		/* Normal compressed resource: Must use same compression type
-		 * and chunk size.  */
+		/* To re-use a non-solid resource, it must use the desired
+		 * compression type and chunk size.  */
 		return (rdesc->compression_type == out_ctype &&
 			rdesc->chunk_size == out_chunk_size);
-	}
-
-	if ((rdesc->flags & WIM_RESHDR_FLAG_SOLID) &&
-	    (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID))
-	{
+	} else {
 		/* Solid resource: Such resources may contain multiple blobs,
 		 * and in general only a subset of them need to be written.  As
 		 * a heuristic, re-use the raw data if more than two-thirds the
@@ -202,6 +214,10 @@ can_raw_copy(const struct blob_descriptor *blob,
 		 * check if they are compatible with @out_ctype and
 		 * @out_chunk_size.  */
 
+		/* Did we already decide to reuse the resource?  */
+		if (rdesc->raw_copy_ok)
+			return true;
+
 		struct blob_descriptor *res_blob;
 		u64 write_size = 0;
 
@@ -211,8 +227,6 @@ can_raw_copy(const struct blob_descriptor *blob,
 
 		return (write_size > rdesc->uncompressed_size * 2 / 3);
 	}
-
-	return false;
 }
 
 static u32
@@ -338,10 +352,6 @@ struct write_blobs_ctx {
 
 	struct filter_context *filter_ctx;
 
-	/* Upper bound on the total number of bytes that need to be compressed.
-	 * */
-	u64 num_bytes_to_compress;
-
 	/* Pointer to the chunk_compressor implementation being used for
 	 * compressing chunks of data, or NULL if chunks are being written
 	 * uncompressed.  */
@@ -1133,14 +1143,12 @@ compute_blob_list_stats(struct list_head *blob_list,
  * @raw_copy_blobs.  Return the total uncompressed size of the blobs that need
  * to be compressed.  */
 static u64
-find_raw_copy_blobs(struct list_head *blob_list,
-		    int write_resource_flags,
-		    int out_ctype,
-		    u32 out_chunk_size,
+find_raw_copy_blobs(struct list_head *blob_list, int write_resource_flags,
+		    int out_ctype, u32 out_chunk_size,
 		    struct list_head *raw_copy_blobs)
 {
 	struct blob_descriptor *blob, *tmp;
-	u64 num_bytes_to_compress = 0;
+	u64 num_nonraw_bytes = 0;
 
 	INIT_LIST_HEAD(raw_copy_blobs);
 
@@ -1150,23 +1158,17 @@ find_raw_copy_blobs(struct list_head *blob_list,
 			blob->rdesc->raw_copy_ok = 0;
 
 	list_for_each_entry_safe(blob, tmp, blob_list, write_blobs_list) {
-		if (blob->blob_location == BLOB_IN_WIM &&
-		    blob->rdesc->raw_copy_ok)
-		{
-			list_move_tail(&blob->write_blobs_list,
-				       raw_copy_blobs);
-		} else if (can_raw_copy(blob, write_resource_flags,
-					out_ctype, out_chunk_size))
+		if (can_raw_copy(blob, write_resource_flags,
+				 out_ctype, out_chunk_size))
 		{
 			blob->rdesc->raw_copy_ok = 1;
-			list_move_tail(&blob->write_blobs_list,
-				       raw_copy_blobs);
+			list_move_tail(&blob->write_blobs_list, raw_copy_blobs);
 		} else {
-			num_bytes_to_compress += blob->size;
+			num_nonraw_bytes += blob->size;
 		}
 	}
 
-	return num_bytes_to_compress;
+	return num_nonraw_bytes;
 }
 
 /* Copy a raw compressed resource located in another WIM file to the WIM file
@@ -1442,6 +1444,7 @@ write_blob_list(struct list_head *blob_list,
 	int ret;
 	struct write_blobs_ctx ctx;
 	struct list_head raw_copy_blobs;
+	u64 num_nonraw_bytes;
 
 	wimlib_assert((write_resource_flags &
 		       (WRITE_RESOURCE_FLAG_SOLID |
@@ -1498,14 +1501,17 @@ write_blob_list(struct list_head *blob_list,
 	ctx.progress_data.progfunc = progfunc;
 	ctx.progress_data.progctx = progctx;
 
-	ctx.num_bytes_to_compress = find_raw_copy_blobs(blob_list,
-							write_resource_flags,
-							out_ctype,
-							out_chunk_size,
-							&raw_copy_blobs);
+	num_nonraw_bytes = find_raw_copy_blobs(blob_list, write_resource_flags,
+					       out_ctype, out_chunk_size,
+					       &raw_copy_blobs);
 
-	if (ctx.num_bytes_to_compress == 0)
-		goto out_write_raw_copy_resources;
+	/* Copy any compressed resources for which the raw data can be reused
+	 * without decompression.  */
+	ret = write_raw_copy_resources(&raw_copy_blobs, ctx.out_fd,
+				       &ctx.progress_data);
+
+	if (ret || num_nonraw_bytes == 0)
+		goto out_destroy_context;
 
 	/* Unless uncompressed output was required, allocate a chunk_compressor
 	 * to do compression.  There are serial and parallel implementations of
@@ -1515,7 +1521,7 @@ write_blob_list(struct list_head *blob_list,
 	if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
 
 	#ifdef ENABLE_MULTITHREADED_COMPRESSION
-		if (ctx.num_bytes_to_compress > max(2000000, out_chunk_size)) {
+		if (num_nonraw_bytes > max(2000000, out_chunk_size)) {
 			ret = new_parallel_chunk_compressor(out_ctype,
 							    out_chunk_size,
 							    num_threads, 0,
@@ -1552,7 +1558,7 @@ write_blob_list(struct list_head *blob_list,
 		goto out_destroy_context;
 
 	if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
-		ret = begin_write_resource(&ctx, ctx.num_bytes_to_compress);
+		ret = begin_write_resource(&ctx, num_nonraw_bytes);
 		if (ret)
 			goto out_destroy_context;
 	}
@@ -1605,12 +1611,6 @@ write_blob_list(struct list_head *blob_list,
 		wimlib_assert(offset_in_res == reshdr.uncompressed_size);
 	}
 
-out_write_raw_copy_resources:
-	/* Copy any compressed resources for which the raw data can be reused
-	 * without decompression.  */
-	ret = write_raw_copy_resources(&raw_copy_blobs, ctx.out_fd,
-				       &ctx.progress_data);
-
 out_destroy_context:
 	FREE(ctx.chunk_csizes);
 	if (ctx.compressor)
@@ -1971,9 +1971,9 @@ filter_blob_list_for_write(struct list_head *blob_list,
  *	STREAMS_OK:  For writes of all images, assume that all blobs in the blob
  *	table of @wim and the per-image lists of unhashed blobs should be taken
  *	as-is, and image metadata should not be searched for references.  This
- *	does not exclude filtering with OVERWRITE and SKIP_EXTERNAL_WIMS, below.
+ *	does not exclude filtering with APPEND and SKIP_EXTERNAL_WIMS, below.
  *
- *	OVERWRITE:  Blobs already present in @wim shall not be returned in
+ *	APPEND:  Blobs already present in @wim shall not be returned in
  *	@blob_list_ret.
  *
  *	SKIP_EXTERNAL_WIMS:  Blobs already present in a WIM file, but not @wim,
@@ -1993,9 +1993,9 @@ filter_blob_list_for_write(struct list_head *blob_list,
  *	the blobs in @blob_list_ret.
  *
  *	This list will be a proper superset of @blob_list_ret if and only if
- *	WIMLIB_WRITE_FLAG_OVERWRITE was specified in @write_flags and some of
- *	the blobs that would otherwise need to be written were already located
- *	in the WIM file.
+ *	WIMLIB_WRITE_FLAG_APPEND was specified in @write_flags and some of the
+ *	blobs that would otherwise need to be written were already located in
+ *	the WIM file.
  *
  *	All blobs in this list will have @out_refcnt set to the number of
  *	references to the blob in the output WIM.  If
@@ -2139,7 +2139,7 @@ write_metadata_resources(WIMStruct *wim, int image, int write_flags)
 		if (imd->modified) {
 			ret = write_metadata_resource(wim, i,
 						      write_resource_flags);
-		} else if (write_flags & WIMLIB_WRITE_FLAG_OVERWRITE) {
+		} else if (write_flags & WIMLIB_WRITE_FLAG_APPEND) {
 			blob_set_out_reshdr_for_reuse(imd->metadata_blob);
 			ret = 0;
 		} else {
@@ -2214,7 +2214,7 @@ write_blob_table(WIMStruct *wim, int image, int write_flags,
 	int ret;
 
 	/* Set output resource metadata for blobs already present in WIM.  */
-	if (write_flags & WIMLIB_WRITE_FLAG_OVERWRITE) {
+	if (write_flags & WIMLIB_WRITE_FLAG_APPEND) {
 		struct blob_descriptor *blob;
 		list_for_each_entry(blob, blob_table_list, blob_table_list) {
 			if (blob->blob_location == BLOB_IN_WIM &&
@@ -2297,14 +2297,13 @@ finish_write(WIMStruct *wim, int image, int write_flags,
 				wim->out_hdr.boot_idx - 1]->metadata_blob->out_reshdr);
 	}
 
-	/* If overwriting the WIM file containing an integrity table in-place,
-	 * we'd like to re-use the information in the old integrity table
-	 * instead of recalculating it.  But we might overwrite the old
-	 * integrity table when we expand the XML data.  Read it into memory
-	 * just in case.  */
-	if ((write_flags & (WIMLIB_WRITE_FLAG_OVERWRITE |
+	/* If appending to a WIM file containing an integrity table, we'd like
+	 * to re-use the information in the old integrity table instead of
+	 * recalculating it.  But we might overwrite the old integrity table
+	 * when we expand the XML data.  Read it into memory just in case.  */
+	if ((write_flags & (WIMLIB_WRITE_FLAG_APPEND |
 			    WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)) ==
-		(WIMLIB_WRITE_FLAG_OVERWRITE |
+		(WIMLIB_WRITE_FLAG_APPEND |
 		 WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
 	    && wim_has_integrity_table(wim))
 	{
@@ -2842,8 +2841,8 @@ check_resource_offset(struct blob_descriptor *blob, void *_wim)
 }
 
 /* Make sure no file or metadata resources are located after the XML data (or
- * integrity table if present)--- otherwise we can't safely overwrite the WIM in
- * place and we return WIMLIB_ERR_RESOURCE_ORDER.  */
+ * integrity table if present)--- otherwise we can't safely append to the WIM
+ * file and we return WIMLIB_ERR_RESOURCE_ORDER.  */
 static int
 check_resource_offsets(WIMStruct *wim, off_t end_offset)
 {
@@ -2913,7 +2912,7 @@ check_resource_offsets(WIMStruct *wim, off_t end_offset)
  *                   XML data (variable size)
  *                   Integrity table (optional) (variable size)
  *
- * This method allows an image to be appended to a large WIM very quickly, and
+ * This function allows an image to be appended to a large WIM very quickly, and
  * is crash-safe except in the case of write re-ordering, but the disadvantage
  * is that a small hole is left in the WIM where the old blob table, xml data,
  * and integrity table were.  (These usually only take up a small amount of
@@ -2951,7 +2950,7 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads)
 		write_flags |= WIMLIB_WRITE_FLAG_SOLID;
 
 	/* Set additional flags for overwrite.  */
-	write_flags |= WIMLIB_WRITE_FLAG_OVERWRITE |
+	write_flags |= WIMLIB_WRITE_FLAG_APPEND |
 		       WIMLIB_WRITE_FLAG_STREAMS_OK;
 
 	/* Make sure there is no data after the XML data, except possibily an
@@ -3120,8 +3119,8 @@ overwrite_wim_via_tmpfile(WIMStruct *wim, int write_flags, unsigned num_threads)
 			     &progress, wim->progctx);
 }
 
-/* Determine if the specified WIM file may be updated by appending in-place
- * rather than writing and replacing it with an entirely new file.  */
+/* Determine if the specified WIM file may be updated in-place rather than by
+ * writing and replacing it with an entirely new file.  */
 static bool
 can_overwrite_wim_inplace(const WIMStruct *wim, int write_flags)
 {