write.c: cleanups and bug fixes

[wimlib] / src / write.c
diff --git a/src/write.c b/src/write.c

index bb8f185623bf660e933689bc69c2bc64c05de855..34f6283325dd47bb6eaabce49a32fdae9cc839c8 100644 (file)
--- a/src/write.c
+++ b/src/write.c
@@ -138,60 +138,72 @@ blob_hard_filtered(const struct blob_descriptor *blob,
         return blob_filtered(blob, ctx) < 0;
  }
  
-static inline int
+static inline bool
  may_soft_filter_blobs(const struct filter_context *ctx)
  {
-       if (ctx == NULL)
-               return 0;
-       return ctx->write_flags & WIMLIB_WRITE_FLAG_OVERWRITE;
+       return ctx && (ctx->write_flags & WIMLIB_WRITE_FLAG_OVERWRITE);
  }
  
-static inline int
+static inline bool
  may_hard_filter_blobs(const struct filter_context *ctx)
  {
-       if (ctx == NULL)
-               return 0;
-       return ctx->write_flags & WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS;
+       return ctx && (ctx->write_flags & WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS);
  }
  
-static inline int
+static inline bool
  may_filter_blobs(const struct filter_context *ctx)
  {
         return (may_soft_filter_blobs(ctx) || may_hard_filter_blobs(ctx));
  }
  
-/* Return true if the specified resource is compressed and the compressed data
- * can be reused with the specified output parameters.  */
+/* Return true if the specified blob is located in a WIM resource which can be
+ * reused in the output WIM file, without being recompressed.  */
  static bool
-can_raw_copy(const struct blob_descriptor *blob,
-            int write_resource_flags, int out_ctype, u32 out_chunk_size)
+can_raw_copy(const struct blob_descriptor *blob, int write_resource_flags,
+            int out_ctype, u32 out_chunk_size)
  {
         const struct wim_resource_descriptor *rdesc;
  
+       /* Recompress everything if requested.  */
         if (write_resource_flags & WRITE_RESOURCE_FLAG_RECOMPRESS)
                 return false;
  
-       if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE)
-               return false;
-
+       /* A blob not located in a WIM resource cannot be reused.  */
         if (blob->blob_location != BLOB_IN_WIM)
                 return false;
  
         rdesc = blob->rdesc;
  
-       if (rdesc->is_pipable != !!(write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE))
+       /* Only reuse compressed resources.  */
+       if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE ||
+           !(rdesc->flags & (WIM_RESHDR_FLAG_COMPRESSED |
+                             WIM_RESHDR_FLAG_SOLID)))
+               return false;
+
+       /* When writing a pipable WIM, we can only reuse pipable resources; and
+        * when writing a non-pipable WIM, we can only reuse non-pipable
+        * resources.  */
+       if (rdesc->is_pipable !=
+           !!(write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE))
                 return false;
  
+       /* When writing a solid WIM, we can only reuse solid resources; and when
+        * writing a non-solid WIM, we can only reuse non-solid resources.  */
+       if (!!(rdesc->flags & WIM_RESHDR_FLAG_SOLID) !=
+           !!(write_resource_flags & WRITE_RESOURCE_FLAG_SOLID))
+               return false;
+
+       /* Note: it is theoretically possible to copy chunks of compressed data
+        * between non-solid, solid, and pipable resources.  However, we don't
+        * currently implement this optimization because it would be complex and
+        * would usually go unused.  */
+
         if (rdesc->flags & WIM_RESHDR_FLAG_COMPRESSED) {
-               /* Normal compressed resource: Must use same compression type
-                * and chunk size.  */
+               /* To re-use a non-solid resource, it must use the desired
+                * compression type and chunk size.  */
                 return (rdesc->compression_type == out_ctype &&
                         rdesc->chunk_size == out_chunk_size);
-       }
-
-       if ((rdesc->flags & WIM_RESHDR_FLAG_SOLID) &&
-           (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID))
-       {
+       } else {
                 /* Solid resource: Such resources may contain multiple blobs,
                  * and in general only a subset of them need to be written.  As
                  * a heuristic, re-use the raw data if more than two-thirds the
@@ -202,6 +214,10 @@ can_raw_copy(const struct blob_descriptor *blob,
                  * check if they are compatible with @out_ctype and
                  * @out_chunk_size.  */
  
+               /* Did we already decide to reuse the resource?  */
+               if (rdesc->raw_copy_ok)
+                       return true;
+
                 struct blob_descriptor *res_blob;
                 u64 write_size = 0;
  
@@ -211,8 +227,6 @@ can_raw_copy(const struct blob_descriptor *blob,
  
                 return (write_size > rdesc->uncompressed_size * 2 / 3);
         }
-
-       return false;
  }
  
  static u32
@@ -300,8 +314,8 @@ do_write_blobs_progress(struct write_blobs_progress_data *progress_data,
                 progress->write_streams.completed_streams += complete_count;
         }
  
-       if (progress->write_streams.completed_bytes >= progress_data->next_progress)
-       {
+       if (progress->write_streams.completed_bytes >= progress_data->next_progress) {
+
                 ret = call_progress(progress_data->progfunc,
                                     WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
                                     progress,
@@ -309,32 +323,9 @@ do_write_blobs_progress(struct write_blobs_progress_data *progress_data,
                 if (ret)
                         return ret;
  
-               if (progress_data->next_progress == progress->write_streams.total_bytes) {
-                       progress_data->next_progress = ~(u64)0;
-               } else {
-                       /* Handle rate-limiting of messages  */
-
-                       /* Send new message as soon as another 1/128 of the
-                        * total has been written.  (Arbitrary number.)  */
-                       progress_data->next_progress =
-                               progress->write_streams.completed_bytes +
-                                       progress->write_streams.total_bytes / 128;
-
-                       /* ... Unless that would be more than 5000000 bytes, in
-                        * which case send the next after the next 5000000
-                        * bytes.  (Another arbitrary number.)  */
-                       if (progress->write_streams.completed_bytes + 5000000 <
-                           progress_data->next_progress)
-                               progress_data->next_progress =
-                                       progress->write_streams.completed_bytes + 5000000;
-
-                       /* ... But always send a message as soon as we're
-                        * completely done.  */
-                       if (progress->write_streams.total_bytes <
-                           progress_data->next_progress)
-                               progress_data->next_progress =
-                                       progress->write_streams.total_bytes;
-               }
+               set_next_progress(progress->write_streams.completed_bytes,
+                                 progress->write_streams.total_bytes,
+                                 &progress_data->next_progress);
         }
         return 0;
  }
@@ -361,10 +352,6 @@ struct write_blobs_ctx {
  
         struct filter_context *filter_ctx;
  
-       /* Upper bound on the total number of bytes that need to be compressed.
-        * */
-       u64 num_bytes_to_compress;
-
         /* Pointer to the chunk_compressor implementation being used for
          * compressing chunks of data, or NULL if chunks are being written
          * uncompressed.  */
@@ -571,9 +558,9 @@ end_chunk_table(struct write_blobs_ctx *ctx, u64 res_actual_size,
                         hdr.chunk_size = cpu_to_le32(ctx->out_chunk_size);
                         hdr.compression_format = cpu_to_le32(ctx->out_ctype);
  
-                       BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1);
-                       BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2);
-                       BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3);
+                       STATIC_ASSERT(WIMLIB_COMPRESSION_TYPE_XPRESS == 1);
+                       STATIC_ASSERT(WIMLIB_COMPRESSION_TYPE_LZX == 2);
+                       STATIC_ASSERT(WIMLIB_COMPRESSION_TYPE_LZMS == 3);
  
                         ret = full_pwrite(ctx->out_fd, &hdr, sizeof(hdr),
                                           chunk_table_offset - sizeof(hdr));
@@ -647,6 +634,8 @@ do_done_with_blob(struct blob_descriptor *blob,
  {
         int ret;
         struct wim_inode *inode;
+       tchar *cookie1;
+       tchar *cookie2;
  
         if (!blob->may_send_done_with_file)
                 return 0;
@@ -658,38 +647,14 @@ do_done_with_blob(struct blob_descriptor *blob,
         if (--inode->i_num_remaining_streams > 0)
                 return 0;
  
-#ifdef __WIN32__
-       /* XXX: This logic really should be somewhere else.  */
-
-       /* We want the path to the file, but blob->file_on_disk might actually
-        * refer to a named data stream.  Temporarily strip the named data
-        * stream from the path.  */
-       wchar_t *p_colon = NULL;
-       wchar_t *p_question_mark = NULL;
-       const wchar_t *p_stream_name;
-
-       p_stream_name = path_stream_name(blob->file_on_disk);
-       if (unlikely(p_stream_name)) {
-               p_colon = (wchar_t *)(p_stream_name - 1);
-               wimlib_assert(*p_colon == L':');
-               *p_colon = L'\0';
-       }
-
-       /* We also should use a fake Win32 path instead of a NT path  */
-       if (!wcsncmp(blob->file_on_disk, L"\\??\\", 4)) {
-               p_question_mark = &blob->file_on_disk[1];
-               *p_question_mark = L'\\';
-       }
-#endif
+       cookie1 = progress_get_streamless_path(blob->file_on_disk);
+       cookie2 = progress_get_win32_path(blob->file_on_disk);
  
         ret = done_with_file(blob->file_on_disk, progfunc, progctx);
  
-#ifdef __WIN32__
-       if (p_colon)
-               *p_colon = L':';
-       if (p_question_mark)
-               *p_question_mark = L'?';
-#endif
+       progress_put_win32_path(cookie2);
+       progress_put_streamless_path(cookie1);
+
         return ret;
  }
  
@@ -814,13 +779,9 @@ write_blob_uncompressed(struct blob_descriptor *blob, struct filedes *out_fd)
  
         wimlib_assert(out_fd->offset - begin_offset == blob->size);
  
-       if (out_fd->offset < end_offset &&
-           0 != ftruncate(out_fd->fd, out_fd->offset))
-       {
-               ERROR_WITH_ERRNO("Can't truncate output file to "
-                                "offset %"PRIu64, out_fd->offset);
-               return WIMLIB_ERR_WRITE;
-       }
+       /* We could ftruncate() the file to 'out_fd->offset' here, but there
+        * isn't much point.  Usually we will only be truncating by a few bytes
+        * and will just overwrite the data immediately.  */
  
         blob->out_reshdr.size_in_wim = blob->size;
         blob->out_reshdr.flags &= ~(WIM_RESHDR_FLAG_COMPRESSED |
@@ -1182,14 +1143,12 @@ compute_blob_list_stats(struct list_head *blob_list,
   * @raw_copy_blobs.  Return the total uncompressed size of the blobs that need
   * to be compressed.  */
  static u64
-find_raw_copy_blobs(struct list_head *blob_list,
-                   int write_resource_flags,
-                   int out_ctype,
-                   u32 out_chunk_size,
+find_raw_copy_blobs(struct list_head *blob_list, int write_resource_flags,
+                   int out_ctype, u32 out_chunk_size,
                     struct list_head *raw_copy_blobs)
  {
         struct blob_descriptor *blob, *tmp;
-       u64 num_bytes_to_compress = 0;
+       u64 num_nonraw_bytes = 0;
  
         INIT_LIST_HEAD(raw_copy_blobs);
  
@@ -1199,23 +1158,17 @@ find_raw_copy_blobs(struct list_head *blob_list,
                         blob->rdesc->raw_copy_ok = 0;
  
         list_for_each_entry_safe(blob, tmp, blob_list, write_blobs_list) {
-               if (blob->blob_location == BLOB_IN_WIM &&
-                   blob->rdesc->raw_copy_ok)
-               {
-                       list_move_tail(&blob->write_blobs_list,
-                                      raw_copy_blobs);
-               } else if (can_raw_copy(blob, write_resource_flags,
-                                       out_ctype, out_chunk_size))
+               if (can_raw_copy(blob, write_resource_flags,
+                                out_ctype, out_chunk_size))
                 {
                         blob->rdesc->raw_copy_ok = 1;
-                       list_move_tail(&blob->write_blobs_list,
-                                      raw_copy_blobs);
+                       list_move_tail(&blob->write_blobs_list, raw_copy_blobs);
                 } else {
-                       num_bytes_to_compress += blob->size;
+                       num_nonraw_bytes += blob->size;
                 }
         }
  
-       return num_bytes_to_compress;
+       return num_nonraw_bytes;
  }
  
  /* Copy a raw compressed resource located in another WIM file to the WIM file
@@ -1491,6 +1444,7 @@ write_blob_list(struct list_head *blob_list,
         int ret;
         struct write_blobs_ctx ctx;
         struct list_head raw_copy_blobs;
+       u64 num_nonraw_bytes;
  
         wimlib_assert((write_resource_flags &
                        (WRITE_RESOURCE_FLAG_SOLID |
@@ -1547,13 +1501,11 @@ write_blob_list(struct list_head *blob_list,
         ctx.progress_data.progfunc = progfunc;
         ctx.progress_data.progctx = progctx;
  
-       ctx.num_bytes_to_compress = find_raw_copy_blobs(blob_list,
-                                                       write_resource_flags,
-                                                       out_ctype,
-                                                       out_chunk_size,
-                                                       &raw_copy_blobs);
+       num_nonraw_bytes = find_raw_copy_blobs(blob_list, write_resource_flags,
+                                              out_ctype, out_chunk_size,
+                                              &raw_copy_blobs);
  
-       if (ctx.num_bytes_to_compress == 0)
+       if (num_nonraw_bytes == 0)
                 goto out_write_raw_copy_resources;
  
         /* Unless uncompressed output was required, allocate a chunk_compressor
@@ -1564,7 +1516,7 @@ write_blob_list(struct list_head *blob_list,
         if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
  
         #ifdef ENABLE_MULTITHREADED_COMPRESSION
-               if (ctx.num_bytes_to_compress > max(2000000, out_chunk_size)) {
+               if (num_nonraw_bytes > max(2000000, out_chunk_size)) {
                         ret = new_parallel_chunk_compressor(out_ctype,
                                                             out_chunk_size,
                                                             num_threads, 0,
@@ -1601,7 +1553,7 @@ write_blob_list(struct list_head *blob_list,
                 goto out_destroy_context;
  
         if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
-               ret = begin_write_resource(&ctx, ctx.num_bytes_to_compress);
+               ret = begin_write_resource(&ctx, num_nonraw_bytes);
                 if (ret)
                         goto out_destroy_context;
         }