]> wimlib.net Git - wimlib/blobdiff - src/write.c
Reduce unnecessary copying during chunk compression
[wimlib] / src / write.c
index aab567f86c7817f1e8d132eb7d8d3e28d3ee961b..842ca9b111dd38f0c9458be29af2858a205a538b 100644 (file)
@@ -6,7 +6,7 @@
  */
 
 /*
- * Copyright (C) 2012, 2013, 2014 Eric Biggers
+ * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
  *
  * This file is free software; you can redistribute it and/or modify it under
  * the terms of the GNU Lesser General Public License as published by the Free
@@ -51,6 +51,7 @@
 #include "wimlib/paths.h"
 #include "wimlib/progress.h"
 #include "wimlib/resource.h"
+#include "wimlib/solid.h"
 #ifdef __WIN32__
 #  include "wimlib/win32.h" /* win32_rename_replacement() */
 #endif
@@ -63,6 +64,7 @@
 #define WRITE_RESOURCE_FLAG_PIPABLE            0x00000002
 #define WRITE_RESOURCE_FLAG_SOLID              0x00000004
 #define WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE        0x00000008
+#define WRITE_RESOURCE_FLAG_SOLID_SORT         0x00000010
 
 static inline int
 write_flags_to_resource_flags(int write_flags)
@@ -77,6 +79,10 @@ write_flags_to_resource_flags(int write_flags)
                write_resource_flags |= WRITE_RESOURCE_FLAG_SOLID;
        if (write_flags & WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES)
                write_resource_flags |= WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE;
+       if ((write_flags & (WIMLIB_WRITE_FLAG_SOLID |
+                           WIMLIB_WRITE_FLAG_NO_SOLID_SORT)) ==
+           WIMLIB_WRITE_FLAG_SOLID)
+               write_resource_flags |= WRITE_RESOURCE_FLAG_SOLID_SORT;
        return write_resource_flags;
 }
 
@@ -373,12 +379,13 @@ struct write_streams_ctx {
         * uncompressed.  */
        struct chunk_compressor *compressor;
 
-       /* Buffer for dividing the read data into chunks of size
-        * @out_chunk_size.  */
-       u8 *chunk_buf;
+       /* A buffer of size @out_chunk_size that has been loaned out from the
+        * chunk compressor and is currently being filled with the uncompressed
+        * data of the next chunk.  */
+       u8 *cur_chunk_buf;
 
-       /* Number of bytes in @chunk_buf that are currently filled.  */
-       size_t chunk_buf_filled;
+       /* Number of bytes in @cur_chunk_buf that are currently filled.  */
+       size_t cur_chunk_buf_filled;
 
        /* List of streams that currently have chunks being compressed.  */
        struct list_head pending_streams;
@@ -1051,22 +1058,23 @@ write_error:
 }
 
 static int
-submit_chunk_for_compression(struct write_streams_ctx *ctx,
-                            const void *chunk, size_t size)
+prepare_chunk_buffer(struct write_streams_ctx *ctx)
 {
-       /* While we are unable to submit the chunk for compression (due to too
-        * many chunks already outstanding), retrieve and write the next
-        * compressed chunk.  */
-       while (!ctx->compressor->submit_chunk(ctx->compressor, chunk, size)) {
+       /* While we are unable to get a new chunk buffer due to too many chunks
+        * already outstanding, retrieve and write the next compressed chunk. */
+       while (!(ctx->cur_chunk_buf =
+                ctx->compressor->get_chunk_buffer(ctx->compressor)))
+       {
                const void *cchunk;
                u32 csize;
                u32 usize;
                bool bret;
                int ret;
 
-               bret = ctx->compressor->get_chunk(ctx->compressor,
-                                                 &cchunk, &csize, &usize);
-
+               bret = ctx->compressor->get_compression_result(ctx->compressor,
+                                                              &cchunk,
+                                                              &csize,
+                                                              &usize);
                wimlib_assert(bret);
 
                ret = write_chunk(ctx, cchunk, csize, usize);
@@ -1101,55 +1109,40 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx)
        chunkptr = chunk;
        chunkend = chunkptr + size;
        do {
-               const u8 *resized_chunk;
                size_t needed_chunk_size;
+               size_t bytes_consumed;
+
+               if (!ctx->cur_chunk_buf) {
+                       ret = prepare_chunk_buffer(ctx);
+                       if (ret)
+                               return ret;
+               }
 
                if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
                        needed_chunk_size = ctx->out_chunk_size;
                } else {
-                       u64 res_bytes_remaining;
-
-                       res_bytes_remaining = ctx->cur_read_stream_size -
-                                             ctx->cur_read_stream_offset;
                        needed_chunk_size = min(ctx->out_chunk_size,
-                                               ctx->chunk_buf_filled +
-                                                       res_bytes_remaining);
+                                               ctx->cur_chunk_buf_filled +
+                                                       (ctx->cur_read_stream_size -
+                                                        ctx->cur_read_stream_offset));
                }
 
-               if (ctx->chunk_buf_filled == 0 &&
-                   chunkend - chunkptr >= needed_chunk_size)
-               {
-                       /* No intermediate buffering needed.  */
-                       resized_chunk = chunkptr;
-                       chunkptr += needed_chunk_size;
-                       ctx->cur_read_stream_offset += needed_chunk_size;
-               } else {
-                       /* Intermediate buffering needed.  */
-                       size_t bytes_consumed;
-
-                       bytes_consumed = min(chunkend - chunkptr,
-                                            needed_chunk_size - ctx->chunk_buf_filled);
+               bytes_consumed = min(chunkend - chunkptr,
+                                    needed_chunk_size - ctx->cur_chunk_buf_filled);
 
-                       memcpy(&ctx->chunk_buf[ctx->chunk_buf_filled],
-                              chunkptr, bytes_consumed);
+               memcpy(&ctx->cur_chunk_buf[ctx->cur_chunk_buf_filled],
+                      chunkptr, bytes_consumed);
 
-                       chunkptr += bytes_consumed;
-                       ctx->cur_read_stream_offset += bytes_consumed;
-                       ctx->chunk_buf_filled += bytes_consumed;
-                       if (ctx->chunk_buf_filled == needed_chunk_size) {
-                               resized_chunk = ctx->chunk_buf;
-                               ctx->chunk_buf_filled = 0;
-                       } else {
-                               break;
-                       }
+               chunkptr += bytes_consumed;
+               ctx->cur_read_stream_offset += bytes_consumed;
+               ctx->cur_chunk_buf_filled += bytes_consumed;
 
+               if (ctx->cur_chunk_buf_filled == needed_chunk_size) {
+                       ctx->compressor->signal_chunk_filled(ctx->compressor,
+                                                            ctx->cur_chunk_buf_filled);
+                       ctx->cur_chunk_buf = NULL;
+                       ctx->cur_chunk_buf_filled = 0;
                }
-
-               ret = submit_chunk_for_compression(ctx, resized_chunk,
-                                                  needed_chunk_size);
-               if (ret)
-                       return ret;
-
        } while (chunkptr != chunkend);
        return 0;
 }
@@ -1368,14 +1361,14 @@ finish_remaining_chunks(struct write_streams_ctx *ctx)
        if (ctx->compressor == NULL)
                return 0;
 
-       if (ctx->chunk_buf_filled != 0) {
-               ret = submit_chunk_for_compression(ctx, ctx->chunk_buf,
-                                                  ctx->chunk_buf_filled);
-               if (ret)
-                       return ret;
+       if (ctx->cur_chunk_buf_filled != 0) {
+               ctx->compressor->signal_chunk_filled(ctx->compressor,
+                                                    ctx->cur_chunk_buf_filled);
        }
 
-       while (ctx->compressor->get_chunk(ctx->compressor, &cdata, &csize, &usize)) {
+       while (ctx->compressor->get_compression_result(ctx->compressor, &cdata,
+                                                      &csize, &usize))
+       {
                ret = write_chunk(ctx, cdata, csize, usize);
                if (ret)
                        return ret;
@@ -1561,14 +1554,6 @@ write_stream_list(struct list_head *stream_list,
 
        memset(&ctx, 0, sizeof(ctx));
 
-       /* Pre-sorting the streams is required for compute_stream_list_stats().
-        * Afterwards, read_stream_list() need not sort them again.  */
-       ret = sort_stream_list_by_sequential_order(stream_list,
-                                                  offsetof(struct wim_lookup_table_entry,
-                                                           write_streams_list));
-       if (ret)
-               return ret;
-
        ctx.out_fd = out_fd;
        ctx.lookup_table = lookup_table;
        ctx.out_ctype = out_ctype;
@@ -1576,22 +1561,33 @@ write_stream_list(struct list_head *stream_list,
        ctx.write_resource_flags = write_resource_flags;
        ctx.filter_ctx = filter_ctx;
 
-       if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
-               wimlib_assert(out_chunk_size != 0);
-               if (out_chunk_size <= STACK_MAX) {
-                       ctx.chunk_buf = alloca(out_chunk_size);
-               } else {
-                       ctx.chunk_buf = MALLOC(out_chunk_size);
-                       if (ctx.chunk_buf == NULL) {
-                               ret = WIMLIB_ERR_NOMEM;
-                               goto out_destroy_context;
-                       }
-               }
-       }
-       ctx.chunk_buf_filled = 0;
+       /*
+        * We normally sort the streams to write by a "sequential" order that is
+        * optimized for reading.  But when using solid compression, we instead
+        * sort the streams by file extension and file name (when applicable;
+        * and we don't do this for streams from solid resources) so that
+        * similar files are grouped together, which improves the compression
+        * ratio.  This is somewhat of a hack since a stream does not
+        * necessarily correspond one-to-one with a filename, nor is there any
+        * guarantee that two files with similar names or extensions are
+        * actually similar in content.  A potential TODO is to sort the streams
+        * based on some measure of similarity of their actual contents.
+        */
+
+       ret = sort_stream_list_by_sequential_order(stream_list,
+                                                  offsetof(struct wim_lookup_table_entry,
+                                                           write_streams_list));
+       if (ret)
+               return ret;
 
        compute_stream_list_stats(stream_list, &ctx);
 
+       if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID_SORT) {
+               ret = sort_stream_list_for_solid_compression(stream_list);
+               if (unlikely(ret))
+                       WARNING("Failed to sort streams for solid compression. Continuing anyways.");
+       }
+
        ctx.progress_data.progfunc = progfunc;
        ctx.progress_data.progctx = progctx;
 
@@ -1730,8 +1726,6 @@ out_write_raw_copy_resources:
                                       &ctx.progress_data);
 
 out_destroy_context:
-       if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE && out_chunk_size > STACK_MAX)
-               FREE(ctx.chunk_buf);
        FREE(ctx.chunk_csizes);
        if (ctx.compressor)
                ctx.compressor->destroy(ctx.compressor);