X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;f=src%2Fwrite.c;h=0e15da71bffb7b3b7f3ff5f18e4a62b26831fed5;hb=29ac4319aa9c75811cd5629cd3471a681fbeb552;hp=2baba967390bd4cdda51c079c3cb8f66f409449b;hpb=b98b25b85877d1bccdc8673a23576b1fac0ab1c6;p=wimlib

diff --git a/src/write.c b/src/write.c
index 2baba967..0e15da71 100644
--- a/src/write.c
+++ b/src/write.c
@@ -8,20 +8,18 @@
 /*
  * Copyright (C) 2012, 2013, 2014 Eric Biggers
  *
- * This file is part of wimlib, a library for working with WIM files.
- *
- * wimlib is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 3 of the License, or (at your option)
- * any later version.
- *
- * wimlib is distributed in the hope that it will be useful, but WITHOUT ANY
- * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE. See the GNU General Public License for more
+ * This file is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
  * details.
  *
- * You should have received a copy of the GNU General Public License
- * along with wimlib; if not, see http://www.gnu.org/licenses/.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this file; if not, see http://www.gnu.org/licenses/.
  */
 
 #ifdef HAVE_CONFIG_H
@@ -43,6 +41,7 @@
 #include "wimlib/integrity.h"
 #include "wimlib/lookup_table.h"
 #include "wimlib/metadata.h"
+#include "wimlib/paths.h"
 #include "wimlib/progress.h"
 #include "wimlib/resource.h"
 #ifdef __WIN32__
@@ -64,6 +63,7 @@
 #define WRITE_RESOURCE_FLAG_RECOMPRESS		0x00000001
 #define WRITE_RESOURCE_FLAG_PIPABLE		0x00000002
 #define WRITE_RESOURCE_FLAG_PACK_STREAMS	0x00000004
+#define WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE	0x00000008
 
 static inline int
 write_flags_to_resource_flags(int write_flags)
@@ -76,6 +76,8 @@ write_flags_to_resource_flags(int write_flags)
 		write_resource_flags |= WRITE_RESOURCE_FLAG_PIPABLE;
 	if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS)
 		write_resource_flags |= WRITE_RESOURCE_FLAG_PACK_STREAMS;
+	if (write_flags & WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES)
+		write_resource_flags |= WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE;
 	return write_resource_flags;
 }
 
@@ -98,12 +100,15 @@ static int
 stream_filtered(const struct wim_lookup_table_entry *lte,
 		const struct filter_context *ctx)
 {
-	int write_flags = ctx->write_flags;
-	WIMStruct *wim = ctx->wim;
+	int write_flags;
+	WIMStruct *wim;
 
 	if (ctx == NULL)
 		return 0;
 
+	write_flags = ctx->write_flags;
+	wim = ctx->wim;
+
 	if (write_flags & WIMLIB_WRITE_FLAG_OVERWRITE &&
 	    lte->resource_location == RESOURCE_IN_WIM &&
 	    lte->rspec->wim == wim)
@@ -279,7 +284,6 @@ struct write_streams_progress_data {
 
 static int
 do_write_streams_progress(struct write_streams_progress_data *progress_data,
-			  struct wim_lookup_table_entry *cur_stream,
 			  u64 complete_size,
 			  u32 complete_count,
 			  bool discarded)
@@ -312,10 +316,28 @@ do_write_streams_progress(struct write_streams_progress_data *progress_data,
 		if (progress_data->next_progress == progress->write_streams.total_bytes) {
 			progress_data->next_progress = ~(uint64_t)0;
 		} else {
+			/* Handle rate-limiting of messages  */
+
+			/* Send new message as soon as another 1/128 of the
+			 * total has been written.  (Arbitrary number.)  */
 			progress_data->next_progress =
-				min(progress->write_streams.total_bytes,
-				    progress->write_streams.completed_bytes +
-				        progress->write_streams.total_bytes / 100);
+				progress->write_streams.completed_bytes +
+					progress->write_streams.total_bytes / 128;
+
+			/* ... Unless that would be more than 5000000 bytes, in
+			 * which case send the next after the next 5000000
+			 * bytes.  (Another arbitrary number.)  */
+			if (progress->write_streams.completed_bytes + 5000000 <
+			    progress_data->next_progress)
+				progress_data->next_progress =
+					progress->write_streams.completed_bytes + 5000000;
+
+			/* ... But always send a message as soon as we're
+			 * completely done.  */
+			if (progress->write_streams.total_bytes <
+			    progress_data->next_progress)
+				progress_data->next_progress =
+					progress->write_streams.total_bytes;
 		}
 	}
 	return 0;
@@ -366,12 +388,6 @@ struct write_streams_ctx {
 	 * @pending_streams only when writing a packed resource.  */
 	struct list_head pack_streams;
 
-	/* Set to true if the stream currently being read was a duplicate, and
-	 * therefore the corresponding stream entry needs to be freed once the
-	 * read finishes.  (In this case we add the duplicate entry to
-	 * pending_streams rather than the entry being read.)  */
-	bool stream_was_duplicate;
-
 	/* Current uncompressed offset in the stream being read.  */
 	u64 cur_read_stream_offset;
 
@@ -541,7 +557,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size,
 	if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) {
 		ret = full_write(ctx->out_fd, ctx->chunk_csizes, chunk_table_size);
 		if (ret)
-			goto error;
+			goto write_error;
 		res_end_offset = ctx->out_fd->offset;
 		res_start_offset = ctx->chunks_start_offset;
 	} else {
@@ -565,7 +581,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size,
 			ret = full_pwrite(ctx->out_fd, &hdr, sizeof(hdr),
 					  chunk_table_offset - sizeof(hdr));
 			if (ret)
-				goto error;
+				goto write_error;
 			res_start_offset = chunk_table_offset - sizeof(hdr);
 		} else {
 			res_start_offset = chunk_table_offset;
@@ -574,7 +590,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size,
 		ret = full_pwrite(ctx->out_fd, ctx->chunk_csizes,
 				  chunk_table_size, chunk_table_offset);
 		if (ret)
-			goto error;
+			goto write_error;
 	}
 
 	*res_start_offset_ret = res_start_offset;
@@ -582,7 +598,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size,
 
 	return 0;
 
-error:
+write_error:
 	ERROR_WITH_ERRNO("Write error");
 	return ret;
 }
@@ -618,10 +634,96 @@ end_write_resource(struct write_streams_ctx *ctx, struct wim_reshdr *out_reshdr)
 	return 0;
 }
 
+/* No more data streams of the file at @path are needed.  */
+static int
+done_with_file(const tchar *path, wimlib_progress_func_t progfunc, void *progctx)
+{
+	union wimlib_progress_info info;
+
+	info.done_with_file.path_to_file = path;
+
+	return call_progress(progfunc, WIMLIB_PROGRESS_MSG_DONE_WITH_FILE,
+			     &info, progctx);
+}
+
+static inline bool
+is_file_stream(const struct wim_lookup_table_entry *lte)
+{
+	return lte->resource_location == RESOURCE_IN_FILE_ON_DISK
+#ifdef __WIN32__
+	    || lte->resource_location == RESOURCE_IN_WINNT_FILE_ON_DISK
+	    || lte->resource_location == RESOURCE_WIN32_ENCRYPTED
+#endif
+	   ;
+}
+
+static int
+do_done_with_stream(struct wim_lookup_table_entry *lte,
+		    wimlib_progress_func_t progfunc, void *progctx)
+{
+	int ret;
+	struct wim_inode *inode;
+
+	if (!lte->may_send_done_with_file)
+		return 0;
+
+	inode = lte->file_inode;
+
+	wimlib_assert(inode != NULL);
+	wimlib_assert(inode->num_remaining_streams > 0);
+	if (--inode->num_remaining_streams > 0)
+		return 0;
+
+#ifdef __WIN32__
+	/* XXX: This logic really should be somewhere else.  */
+
+	/* We want the path to the file, but lte->file_on_disk might actually
+	 * refer to a named data stream.  Temporarily strip the named data
+	 * stream from the path.  */
+	wchar_t *p_colon = NULL;
+	wchar_t *p_question_mark = NULL;
+	const wchar_t *p_stream_name;
+
+	p_stream_name = path_stream_name(lte->file_on_disk);
+	if (unlikely(p_stream_name)) {
+		p_colon = (wchar_t *)(p_stream_name - 1);
+		wimlib_assert(*p_colon == L':');
+		*p_colon = L'\0';
+	}
+
+	/* We also should use a fake Win32 path instead of a NT path  */
+	if (!wcsncmp(lte->file_on_disk, L"\\??\\", 4)) {
+		p_question_mark = &lte->file_on_disk[1];
+		*p_question_mark = L'\\';
+	}
+#endif
+
+	ret = done_with_file(lte->file_on_disk, progfunc, progctx);
+
+#ifdef __WIN32__
+	if (p_colon)
+		*p_colon = L':';
+	if (p_question_mark)
+		*p_question_mark = L'?';
+#endif
+	return ret;
+}
+
+/* Handle WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES mode.  */
+static inline int
+done_with_stream(struct wim_lookup_table_entry *lte,
+		 struct write_streams_ctx *ctx)
+{
+	if (likely(!(ctx->write_resource_flags &
+		     WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE)))
+		return 0;
+	return do_done_with_stream(lte, ctx->progress_data.progfunc,
+				   ctx->progress_data.progctx);
+}
+
 /* Begin processing a stream for writing.  */
 static int
-write_stream_begin_read(struct wim_lookup_table_entry *lte,
-			u32 flags, void *_ctx)
+write_stream_begin_read(struct wim_lookup_table_entry *lte, void *_ctx)
 {
 	struct write_streams_ctx *ctx = _ctx;
 	int ret;
@@ -642,11 +744,8 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte,
 	 * still provide the data again to write_stream_process_chunk().  This
 	 * is okay because an unhashed stream cannot be in a WIM resource, which
 	 * might be costly to decompress.  */
-	ctx->stream_was_duplicate = false;
 	if (ctx->lookup_table != NULL && lte->unhashed && !lte->unique_size) {
 
-		wimlib_assert(!(flags & BEGIN_STREAM_FLAG_PARTIAL_RESOURCE));
-
 		struct wim_lookup_table_entry *lte_new;
 
 		ret = hash_unhashed_stream(lte, ctx->lookup_table, &lte_new);
@@ -667,7 +766,7 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte,
 				DEBUG("Discarding duplicate stream of "
 				      "length %"PRIu64, lte->size);
 				ret = do_write_streams_progress(&ctx->progress_data,
-								lte, lte->size,
+								lte->size,
 								1, true);
 				list_del(&lte->write_streams_list);
 				list_del(&lte->lookup_table_list);
@@ -675,6 +774,8 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte,
 					lte_new->out_refcnt += lte->out_refcnt;
 				if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)
 					ctx->cur_write_res_size -= lte->size;
+				if (!ret)
+					ret = done_with_stream(lte, ctx);
 				free_lookup_table_entry(lte);
 				if (ret)
 					return ret;
@@ -692,9 +793,10 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte,
 					     &lte_new->write_streams_list);
 				list_replace(&lte->lookup_table_list,
 					     &lte_new->lookup_table_list);
+				lte->will_be_in_output_wim = 0;
 				lte_new->out_refcnt = lte->out_refcnt;
 				lte_new->will_be_in_output_wim = 1;
-				ctx->stream_was_duplicate = true;
+				lte_new->may_send_done_with_file = 0;
 				lte = lte_new;
 			}
 		}
@@ -751,6 +853,43 @@ write_stream_uncompressed(struct wim_lookup_table_entry *lte,
 	return 0;
 }
 
+/* Returns true if the specified stream should be truncated from the WIM file
+ * and re-written as uncompressed.  lte->out_reshdr must be filled in from the
+ * initial write of the stream.  */
+static bool
+should_rewrite_stream_uncompressed(const struct write_streams_ctx *ctx,
+				   const struct wim_lookup_table_entry *lte)
+{
+	/* If the compressed data is smaller than the uncompressed data, prefer
+	 * the compressed data.  */
+	if (lte->out_reshdr.size_in_wim < lte->out_reshdr.uncompressed_size)
+		return false;
+
+	/* If we're not actually writing compressed data, then there's no need
+	 * for re-writing.  */
+	if (!ctx->compressor)
+		return false;
+
+	/* If writing a pipable WIM, everything we write to the output is final
+	 * (it might actually be a pipe!).  */
+	if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE)
+		return false;
+
+	/* If the stream that would need to be re-read is located in a solid
+	 * block in another WIM file, then re-reading it would be costly.  So
+	 * don't do it.
+	 *
+	 * Exception: if the compressed size happens to be *exactly* the same as
+	 * the uncompressed size, then the stream *must* be written uncompressed
+	 * in order to remain compatible with the Windows Overlay Filesystem
+	 * Filter Driver (WOF).  */
+	if ((lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) &&
+	    (lte->out_reshdr.size_in_wim != lte->out_reshdr.uncompressed_size))
+		return false;
+
+	return true;
+}
+
 /* Write the next chunk of (typically compressed) data to the output WIM,
  * handling the writing of the chunk table.  */
 static int
@@ -804,14 +943,14 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk,
 			ret = full_write(ctx->out_fd, &chunk_hdr,
 					 sizeof(chunk_hdr));
 			if (ret)
-				goto error;
+				goto write_error;
 		}
 	}
 
 	/* Write the chunk data.  */
 	ret = full_write(ctx->out_fd, cchunk, csize);
 	if (ret)
-		goto error;
+		goto write_error;
 
 	ctx->cur_write_stream_offset += usize;
 
@@ -820,36 +959,32 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk,
 	if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) {
 		/* Wrote chunk in packed mode.  It may have finished multiple
 		 * streams.  */
-		while (ctx->cur_write_stream_offset > lte->size) {
-			struct wim_lookup_table_entry *next;
+		struct wim_lookup_table_entry *next_lte;
+
+		while (lte && ctx->cur_write_stream_offset >= lte->size) {
 
 			ctx->cur_write_stream_offset -= lte->size;
 
-			wimlib_assert(!list_is_singular(&ctx->pending_streams) &&
-				      !list_empty(&ctx->pending_streams));
-			next = list_entry(lte->write_streams_list.next,
-					  struct wim_lookup_table_entry,
-					  write_streams_list);
-			list_move_tail(&lte->write_streams_list,
-				       &ctx->pack_streams);
-			lte = next;
-			completed_stream_count++;
-		}
-		if (ctx->cur_write_stream_offset == lte->size) {
-			ctx->cur_write_stream_offset = 0;
-			list_move_tail(&lte->write_streams_list,
-				       &ctx->pack_streams);
+			if (ctx->cur_write_stream_offset)
+				next_lte = list_entry(lte->write_streams_list.next,
+						      struct wim_lookup_table_entry,
+						      write_streams_list);
+			else
+				next_lte = NULL;
+
+			ret = done_with_stream(lte, ctx);
+			if (ret)
+				return ret;
+			list_move_tail(&lte->write_streams_list, &ctx->pack_streams);
 			completed_stream_count++;
+
+			lte = next_lte;
 		}
 	} else {
 		/* Wrote chunk in non-packed mode.  It may have finished a
 		 * stream.  */
 		if (ctx->cur_write_stream_offset == lte->size) {
 
-			completed_stream_count++;
-
-			list_del(&lte->write_streams_list);
-
 			wimlib_assert(ctx->cur_write_stream_offset ==
 				      ctx->cur_write_res_size);
 
@@ -861,18 +996,7 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk,
 			if (ctx->compressor != NULL)
 				lte->out_reshdr.flags |= WIM_RESHDR_FLAG_COMPRESSED;
 
-			if (ctx->compressor != NULL &&
-			    lte->out_reshdr.size_in_wim >= lte->out_reshdr.uncompressed_size &&
-			    !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) &&
-			    !(lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS))
-			{
-				/* Stream did not compress to less than its original
-				 * size.  If we're not writing a pipable WIM (which
-				 * could mean the output file descriptor is
-				 * non-seekable), and the stream isn't located in a
-				 * resource pack (which would make reading it again
-				 * costly), truncate the file to the start of the stream
-				 * and write it uncompressed instead.  */
+			if (should_rewrite_stream_uncompressed(ctx, lte)) {
 				DEBUG("Stream of size %"PRIu64" did not compress to "
 				      "less than original size; writing uncompressed.",
 				      lte->size);
@@ -883,14 +1007,20 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk,
 			wimlib_assert(lte->out_reshdr.uncompressed_size == lte->size);
 
 			ctx->cur_write_stream_offset = 0;
+
+			ret = done_with_stream(lte, ctx);
+			if (ret)
+				return ret;
+			list_del(&lte->write_streams_list);
+			completed_stream_count++;
 		}
 	}
 
-	return do_write_streams_progress(&ctx->progress_data, lte,
+	return do_write_streams_progress(&ctx->progress_data,
 					 completed_size, completed_stream_count,
 					 false);
 
-error:
+write_error:
 	ERROR_WITH_ERRNO("Write error");
 	return ret;
 }
@@ -1006,11 +1136,27 @@ static int
 write_stream_end_read(struct wim_lookup_table_entry *lte, int status, void *_ctx)
 {
 	struct write_streams_ctx *ctx = _ctx;
-	if (status == 0)
-		wimlib_assert(ctx->cur_read_stream_offset == ctx->cur_read_stream_size);
-	if (ctx->stream_was_duplicate) {
+
+	wimlib_assert(ctx->cur_read_stream_offset == ctx->cur_read_stream_size || status);
+
+	if (!lte->will_be_in_output_wim) {
+		/* The 'lte' stream was a duplicate.  Now that its data has
+		 * finished being read, it is being discarded in favor of the
+		 * duplicate entry.  It therefore is no longer needed, and we
+		 * can fire the DONE_WITH_FILE callback because the file will
+		 * not be read again.
+		 *
+		 * Note: we can't yet fire DONE_WITH_FILE for non-duplicate
+		 * streams, since it needs to be possible to re-read the file if
+		 * it does not compress to less than its original size.  */
+		if (!status)
+			status = done_with_stream(lte, ctx);
 		free_lookup_table_entry(lte);
-	} else if (lte->unhashed && ctx->lookup_table != NULL) {
+	} else if (!status && lte->unhashed && ctx->lookup_table != NULL) {
+		/* The 'lte' stream was not a duplicate and was previously
+		 * unhashed.  Since we passed COMPUTE_MISSING_STREAM_HASHES to
+		 * read_stream_list(), lte->hash is now computed and valid.  So
+		 * turn this stream into a "hashed" stream.  */
 		list_del(&lte->unhashed_list);
 		lookup_table_insert(ctx->lookup_table, lte);
 		lte->unhashed = 0;
@@ -1177,7 +1323,7 @@ write_raw_copy_resources(struct list_head *raw_copy_streams,
 				return ret;
 			lte->rspec->raw_copy_ok = 0;
 		}
-		ret = do_write_streams_progress(progress_data, lte, lte->size,
+		ret = do_write_streams_progress(progress_data, lte->size,
 						1, false);
 		if (ret)
 			return ret;
@@ -1229,6 +1375,25 @@ remove_zero_length_streams(struct list_head *stream_list)
 	}
 }
 
+static void
+init_done_with_file_info(struct list_head *stream_list)
+{
+	struct wim_lookup_table_entry *lte;
+
+	list_for_each_entry(lte, stream_list, write_streams_list) {
+		if (is_file_stream(lte)) {
+			lte->file_inode->num_remaining_streams = 0;
+			lte->may_send_done_with_file = 1;
+		} else {
+			lte->may_send_done_with_file = 0;
+		}
+	}
+
+	list_for_each_entry(lte, stream_list, write_streams_list)
+		if (lte->may_send_done_with_file)
+			lte->file_inode->num_remaining_streams++;
+}
+
 /*
  * Write a list of streams to the output WIM file.
  *
@@ -1364,6 +1529,11 @@ write_stream_list(struct list_head *stream_list,
 		return 0;
 	}
 
+	/* If needed, set auxiliary information so that we can detect when the
+	 * library has finished using each external file.  */
+	if (unlikely(write_resource_flags & WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE))
+		init_done_with_file_info(stream_list);
+
 	memset(&ctx, 0, sizeof(ctx));
 
 	/* Pre-sorting the streams is required for compute_stream_list_stats().