X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fwrite.c;h=34f6283325dd47bb6eaabce49a32fdae9cc839c8;hp=d0ede8bb9a42b6d87d964384254e04930f27c8c3;hb=7d0fbce6d1727cdf2a367943acf5a007b648dfd8;hpb=7fa4bcf7173df1166677ced5fbab51ed3ef07a08

diff --git a/src/write.c b/src/write.c
index d0ede8bb..34f62833 100644
--- a/src/write.c
+++ b/src/write.c
@@ -138,60 +138,72 @@ blob_hard_filtered(const struct blob_descriptor *blob,
 	return blob_filtered(blob, ctx) < 0;
 }
 
-static inline int
+static inline bool
 may_soft_filter_blobs(const struct filter_context *ctx)
 {
-	if (ctx == NULL)
-		return 0;
-	return ctx->write_flags & WIMLIB_WRITE_FLAG_OVERWRITE;
+	return ctx && (ctx->write_flags & WIMLIB_WRITE_FLAG_OVERWRITE);
 }
 
-static inline int
+static inline bool
 may_hard_filter_blobs(const struct filter_context *ctx)
 {
-	if (ctx == NULL)
-		return 0;
-	return ctx->write_flags & WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS;
+	return ctx && (ctx->write_flags & WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS);
 }
 
-static inline int
+static inline bool
 may_filter_blobs(const struct filter_context *ctx)
 {
 	return (may_soft_filter_blobs(ctx) || may_hard_filter_blobs(ctx));
 }
 
-/* Return true if the specified resource is compressed and the compressed data
- * can be reused with the specified output parameters.  */
+/* Return true if the specified blob is located in a WIM resource which can be
+ * reused in the output WIM file, without being recompressed.  */
 static bool
-can_raw_copy(const struct blob_descriptor *blob,
-	     int write_resource_flags, int out_ctype, u32 out_chunk_size)
+can_raw_copy(const struct blob_descriptor *blob, int write_resource_flags,
+	     int out_ctype, u32 out_chunk_size)
 {
 	const struct wim_resource_descriptor *rdesc;
 
+	/* Recompress everything if requested.  */
 	if (write_resource_flags & WRITE_RESOURCE_FLAG_RECOMPRESS)
 		return false;
 
-	if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE)
-		return false;
-
+	/* A blob not located in a WIM resource cannot be reused.  */
 	if (blob->blob_location != BLOB_IN_WIM)
 		return false;
 
 	rdesc = blob->rdesc;
 
-	if (rdesc->is_pipable != !!(write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE))
+	/* Only reuse compressed resources.  */
+	if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE ||
+	    !(rdesc->flags & (WIM_RESHDR_FLAG_COMPRESSED |
+			      WIM_RESHDR_FLAG_SOLID)))
+		return false;
+
+	/* When writing a pipable WIM, we can only reuse pipable resources; and
+	 * when writing a non-pipable WIM, we can only reuse non-pipable
+	 * resources.  */
+	if (rdesc->is_pipable !=
+	    !!(write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE))
 		return false;
 
+	/* When writing a solid WIM, we can only reuse solid resources; and when
+	 * writing a non-solid WIM, we can only reuse non-solid resources.  */
+	if (!!(rdesc->flags & WIM_RESHDR_FLAG_SOLID) !=
+	    !!(write_resource_flags & WRITE_RESOURCE_FLAG_SOLID))
+		return false;
+
+	/* Note: it is theoretically possible to copy chunks of compressed data
+	 * between non-solid, solid, and pipable resources.  However, we don't
+	 * currently implement this optimization because it would be complex and
+	 * would usually go unused.  */
+
 	if (rdesc->flags & WIM_RESHDR_FLAG_COMPRESSED) {
-		/* Normal compressed resource: Must use same compression type
-		 * and chunk size.  */
+		/* To re-use a non-solid resource, it must use the desired
+		 * compression type and chunk size.  */
 		return (rdesc->compression_type == out_ctype &&
 			rdesc->chunk_size == out_chunk_size);
-	}
-
-	if ((rdesc->flags & WIM_RESHDR_FLAG_SOLID) &&
-	    (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID))
-	{
+	} else {
 		/* Solid resource: Such resources may contain multiple blobs,
 		 * and in general only a subset of them need to be written.  As
 		 * a heuristic, re-use the raw data if more than two-thirds the
@@ -202,6 +214,10 @@ can_raw_copy(const struct blob_descriptor *blob,
 		 * check if they are compatible with @out_ctype and
 		 * @out_chunk_size.  */
 
+		/* Did we already decide to reuse the resource?  */
+		if (rdesc->raw_copy_ok)
+			return true;
+
 		struct blob_descriptor *res_blob;
 		u64 write_size = 0;
 
@@ -211,8 +227,6 @@ can_raw_copy(const struct blob_descriptor *blob,
 
 		return (write_size > rdesc->uncompressed_size * 2 / 3);
 	}
-
-	return false;
 }
 
 static u32
@@ -300,8 +314,8 @@ do_write_blobs_progress(struct write_blobs_progress_data *progress_data,
 		progress->write_streams.completed_streams += complete_count;
 	}
 
-	if (progress->write_streams.completed_bytes >= progress_data->next_progress)
-	{
+	if (progress->write_streams.completed_bytes >= progress_data->next_progress) {
+
 		ret = call_progress(progress_data->progfunc,
 				    WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
 				    progress,
@@ -309,32 +323,9 @@ do_write_blobs_progress(struct write_blobs_progress_data *progress_data,
 		if (ret)
 			return ret;
 
-		if (progress_data->next_progress == progress->write_streams.total_bytes) {
-			progress_data->next_progress = ~(u64)0;
-		} else {
-			/* Handle rate-limiting of messages  */
-
-			/* Send new message as soon as another 1/128 of the
-			 * total has been written.  (Arbitrary number.)  */
-			progress_data->next_progress =
-				progress->write_streams.completed_bytes +
-					progress->write_streams.total_bytes / 128;
-
-			/* ... Unless that would be more than 5000000 bytes, in
-			 * which case send the next after the next 5000000
-			 * bytes.  (Another arbitrary number.)  */
-			if (progress->write_streams.completed_bytes + 5000000 <
-			    progress_data->next_progress)
-				progress_data->next_progress =
-					progress->write_streams.completed_bytes + 5000000;
-
-			/* ... But always send a message as soon as we're
-			 * completely done.  */
-			if (progress->write_streams.total_bytes <
-			    progress_data->next_progress)
-				progress_data->next_progress =
-					progress->write_streams.total_bytes;
-		}
+		set_next_progress(progress->write_streams.completed_bytes,
+				  progress->write_streams.total_bytes,
+				  &progress_data->next_progress);
 	}
 	return 0;
 }
@@ -361,10 +352,6 @@ struct write_blobs_ctx {
 
 	struct filter_context *filter_ctx;
 
-	/* Upper bound on the total number of bytes that need to be compressed.
-	 * */
-	u64 num_bytes_to_compress;
-
 	/* Pointer to the chunk_compressor implementation being used for
 	 * compressing chunks of data, or NULL if chunks are being written
 	 * uncompressed.  */
@@ -571,9 +558,9 @@ end_chunk_table(struct write_blobs_ctx *ctx, u64 res_actual_size,
 			hdr.chunk_size = cpu_to_le32(ctx->out_chunk_size);
 			hdr.compression_format = cpu_to_le32(ctx->out_ctype);
 
-			BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1);
-			BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2);
-			BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3);
+			STATIC_ASSERT(WIMLIB_COMPRESSION_TYPE_XPRESS == 1);
+			STATIC_ASSERT(WIMLIB_COMPRESSION_TYPE_LZX == 2);
+			STATIC_ASSERT(WIMLIB_COMPRESSION_TYPE_LZMS == 3);
 
 			ret = full_pwrite(ctx->out_fd, &hdr, sizeof(hdr),
 					  chunk_table_offset - sizeof(hdr));
@@ -647,6 +634,8 @@ do_done_with_blob(struct blob_descriptor *blob,
 {
 	int ret;
 	struct wim_inode *inode;
+	tchar *cookie1;
+	tchar *cookie2;
 
 	if (!blob->may_send_done_with_file)
 		return 0;
@@ -654,42 +643,18 @@ do_done_with_blob(struct blob_descriptor *blob,
 	inode = blob->file_inode;
 
 	wimlib_assert(inode != NULL);
-	wimlib_assert(inode->num_remaining_streams > 0);
-	if (--inode->num_remaining_streams > 0)
+	wimlib_assert(inode->i_num_remaining_streams > 0);
+	if (--inode->i_num_remaining_streams > 0)
 		return 0;
 
-#ifdef __WIN32__
-	/* XXX: This logic really should be somewhere else.  */
-
-	/* We want the path to the file, but blob->file_on_disk might actually
-	 * refer to a named data stream.  Temporarily strip the named data
-	 * stream from the path.  */
-	wchar_t *p_colon = NULL;
-	wchar_t *p_question_mark = NULL;
-	const wchar_t *p_stream_name;
-
-	p_stream_name = path_stream_name(blob->file_on_disk);
-	if (unlikely(p_stream_name)) {
-		p_colon = (wchar_t *)(p_stream_name - 1);
-		wimlib_assert(*p_colon == L':');
-		*p_colon = L'\0';
-	}
-
-	/* We also should use a fake Win32 path instead of a NT path  */
-	if (!wcsncmp(blob->file_on_disk, L"\\??\\", 4)) {
-		p_question_mark = &blob->file_on_disk[1];
-		*p_question_mark = L'\\';
-	}
-#endif
+	cookie1 = progress_get_streamless_path(blob->file_on_disk);
+	cookie2 = progress_get_win32_path(blob->file_on_disk);
 
 	ret = done_with_file(blob->file_on_disk, progfunc, progctx);
 
-#ifdef __WIN32__
-	if (p_colon)
-		*p_colon = L':';
-	if (p_question_mark)
-		*p_question_mark = L'?';
-#endif
+	progress_put_win32_path(cookie2);
+	progress_put_streamless_path(cookie1);
+
 	return ret;
 }
 
@@ -795,7 +760,7 @@ write_blob_uncompressed(struct blob_descriptor *blob, struct filedes *out_fd)
 	if (filedes_seek(out_fd, begin_offset) == -1)
 		return 0;
 
-	ret = extract_full_blob_to_fd(blob, out_fd);
+	ret = extract_blob_to_fd(blob, out_fd);
 	if (ret) {
 		/* Error reading the uncompressed data.  */
 		if (out_fd->offset == begin_offset &&
@@ -814,13 +779,9 @@ write_blob_uncompressed(struct blob_descriptor *blob, struct filedes *out_fd)
 
 	wimlib_assert(out_fd->offset - begin_offset == blob->size);
 
-	if (out_fd->offset < end_offset &&
-	    0 != ftruncate(out_fd->fd, out_fd->offset))
-	{
-		ERROR_WITH_ERRNO("Can't truncate output file to "
-				 "offset %"PRIu64, out_fd->offset);
-		return WIMLIB_ERR_WRITE;
-	}
+	/* We could ftruncate() the file to 'out_fd->offset' here, but there
+	 * isn't much point.  Usually we will only be truncating by a few bytes
+	 * and will just overwrite the data immediately.  */
 
 	blob->out_reshdr.size_in_wim = blob->size;
 	blob->out_reshdr.flags &= ~(WIM_RESHDR_FLAG_COMPRESSED |
@@ -1182,14 +1143,12 @@ compute_blob_list_stats(struct list_head *blob_list,
  * @raw_copy_blobs.  Return the total uncompressed size of the blobs that need
  * to be compressed.  */
 static u64
-find_raw_copy_blobs(struct list_head *blob_list,
-		    int write_resource_flags,
-		    int out_ctype,
-		    u32 out_chunk_size,
+find_raw_copy_blobs(struct list_head *blob_list, int write_resource_flags,
+		    int out_ctype, u32 out_chunk_size,
 		    struct list_head *raw_copy_blobs)
 {
 	struct blob_descriptor *blob, *tmp;
-	u64 num_bytes_to_compress = 0;
+	u64 num_nonraw_bytes = 0;
 
 	INIT_LIST_HEAD(raw_copy_blobs);
 
@@ -1199,23 +1158,17 @@ find_raw_copy_blobs(struct list_head *blob_list,
 			blob->rdesc->raw_copy_ok = 0;
 
 	list_for_each_entry_safe(blob, tmp, blob_list, write_blobs_list) {
-		if (blob->blob_location == BLOB_IN_WIM &&
-		    blob->rdesc->raw_copy_ok)
-		{
-			list_move_tail(&blob->write_blobs_list,
-				       raw_copy_blobs);
-		} else if (can_raw_copy(blob, write_resource_flags,
-					out_ctype, out_chunk_size))
+		if (can_raw_copy(blob, write_resource_flags,
+				 out_ctype, out_chunk_size))
 		{
 			blob->rdesc->raw_copy_ok = 1;
-			list_move_tail(&blob->write_blobs_list,
-				       raw_copy_blobs);
+			list_move_tail(&blob->write_blobs_list, raw_copy_blobs);
 		} else {
-			num_bytes_to_compress += blob->size;
+			num_nonraw_bytes += blob->size;
 		}
 	}
 
-	return num_bytes_to_compress;
+	return num_nonraw_bytes;
 }
 
 /* Copy a raw compressed resource located in another WIM file to the WIM file
@@ -1333,19 +1286,13 @@ finish_remaining_chunks(struct write_blobs_ctx *ctx)
 }
 
 static void
-remove_empty_blobs(struct list_head *blob_list)
+validate_blob_list(struct list_head *blob_list)
 {
-	struct blob_descriptor *blob, *tmp;
+	struct blob_descriptor *blob;
 
-	list_for_each_entry_safe(blob, tmp, blob_list, write_blobs_list) {
+	list_for_each_entry(blob, blob_list, write_blobs_list) {
 		wimlib_assert(blob->will_be_in_output_wim);
-		if (blob->size == 0) {
-			list_del(&blob->write_blobs_list);
-			blob->out_reshdr.offset_in_wim = 0;
-			blob->out_reshdr.size_in_wim = 0;
-			blob->out_reshdr.uncompressed_size = 0;
-			blob->out_reshdr.flags = reshdr_flags_for_blob(blob);
-		}
+		wimlib_assert(blob->size != 0);
 	}
 }
 
@@ -1367,7 +1314,7 @@ init_done_with_file_info(struct list_head *blob_list)
 
 	list_for_each_entry(blob, blob_list, write_blobs_list) {
 		if (blob_is_in_file(blob)) {
-			blob->file_inode->num_remaining_streams = 0;
+			blob->file_inode->i_num_remaining_streams = 0;
 			blob->may_send_done_with_file = 1;
 		} else {
 			blob->may_send_done_with_file = 0;
@@ -1376,7 +1323,7 @@ init_done_with_file_info(struct list_head *blob_list)
 
 	list_for_each_entry(blob, blob_list, write_blobs_list)
 		if (blob->may_send_done_with_file)
-			blob->file_inode->num_remaining_streams++;
+			blob->file_inode->i_num_remaining_streams++;
 }
 
 /*
@@ -1497,6 +1444,7 @@ write_blob_list(struct list_head *blob_list,
 	int ret;
 	struct write_blobs_ctx ctx;
 	struct list_head raw_copy_blobs;
+	u64 num_nonraw_bytes;
 
 	wimlib_assert((write_resource_flags &
 		       (WRITE_RESOURCE_FLAG_SOLID |
@@ -1504,7 +1452,7 @@ write_blob_list(struct list_head *blob_list,
 				(WRITE_RESOURCE_FLAG_SOLID |
 				 WRITE_RESOURCE_FLAG_PIPABLE));
 
-	remove_empty_blobs(blob_list);
+	validate_blob_list(blob_list);
 
 	if (list_empty(blob_list))
 		return 0;
@@ -1553,13 +1501,11 @@ write_blob_list(struct list_head *blob_list,
 	ctx.progress_data.progfunc = progfunc;
 	ctx.progress_data.progctx = progctx;
 
-	ctx.num_bytes_to_compress = find_raw_copy_blobs(blob_list,
-							write_resource_flags,
-							out_ctype,
-							out_chunk_size,
-							&raw_copy_blobs);
+	num_nonraw_bytes = find_raw_copy_blobs(blob_list, write_resource_flags,
+					       out_ctype, out_chunk_size,
+					       &raw_copy_blobs);
 
-	if (ctx.num_bytes_to_compress == 0)
+	if (num_nonraw_bytes == 0)
 		goto out_write_raw_copy_resources;
 
 	/* Unless uncompressed output was required, allocate a chunk_compressor
@@ -1570,7 +1516,7 @@ write_blob_list(struct list_head *blob_list,
 	if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
 
 	#ifdef ENABLE_MULTITHREADED_COMPRESSION
-		if (ctx.num_bytes_to_compress > max(2000000, out_chunk_size)) {
+		if (num_nonraw_bytes > max(2000000, out_chunk_size)) {
 			ret = new_parallel_chunk_compressor(out_ctype,
 							    out_chunk_size,
 							    num_threads, 0,
@@ -1607,7 +1553,7 @@ write_blob_list(struct list_head *blob_list,
 		goto out_destroy_context;
 
 	if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
-		ret = begin_write_resource(&ctx, ctx.num_bytes_to_compress);
+		ret = begin_write_resource(&ctx, num_nonraw_bytes);
 		if (ret)
 			goto out_destroy_context;
 	}
@@ -1615,13 +1561,11 @@ write_blob_list(struct list_head *blob_list,
 	/* Read the list of blobs needing to be compressed, using the specified
 	 * callbacks to execute processing of the data.  */
 
-	struct read_blob_list_callbacks cbs = {
-		.begin_blob		= write_blob_begin_read,
-		.begin_blob_ctx		= &ctx,
-		.consume_chunk		= write_blob_process_chunk,
-		.consume_chunk_ctx	= &ctx,
-		.end_blob		= write_blob_end_read,
-		.end_blob_ctx		= &ctx,
+	struct read_blob_callbacks cbs = {
+		.begin_blob	= write_blob_begin_read,
+		.consume_chunk	= write_blob_process_chunk,
+		.end_blob	= write_blob_end_read,
+		.ctx		= &ctx,
 	};
 
 	ret = read_blob_list(blob_list,
@@ -1747,6 +1691,13 @@ write_wim_resource_from_buffer(const void *buf,
 	int ret;
 	struct blob_descriptor blob;
 
+	if (unlikely(buf_size == 0)) {
+		zero_reshdr(out_reshdr);
+		if (hash_ret)
+			copy_hash(hash_ret, zero_hash);
+		return 0;
+	}
+
 	blob_set_is_located_in_attached_buffer(&blob, (void *)buf, buf_size);
 	sha1_buffer(buf, buf_size, blob.hash);
 	blob.unhashed = 0;
@@ -2744,9 +2695,9 @@ write_wim_part(WIMStruct *wim,
 	if (write_flags & WIMLIB_WRITE_FLAG_RETAIN_GUID)
 		guid = wim->hdr.guid;
 	if (guid)
-		memcpy(wim->out_hdr.guid, guid, WIMLIB_GUID_LEN);
+		copy_guid(wim->out_hdr.guid, guid);
 	else
-		randomize_byte_array(wim->out_hdr.guid, WIMLIB_GUID_LEN);
+		generate_guid(wim->out_hdr.guid);
 
 	/* Set the part number and total parts.  */
 	wim->out_hdr.part_number = part_number;