*/
/*
- * Copyright (C) 2012, 2013, 2014 Eric Biggers
+ * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers
*
* This file is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License as published by the Free
#include "wimlib/paths.h"
#include "wimlib/progress.h"
#include "wimlib/resource.h"
+#include "wimlib/solid.h"
#ifdef __WIN32__
# include "wimlib/win32.h" /* win32_rename_replacement() */
#endif
#define WRITE_RESOURCE_FLAG_PIPABLE 0x00000002
#define WRITE_RESOURCE_FLAG_SOLID 0x00000004
#define WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE 0x00000008
+#define WRITE_RESOURCE_FLAG_SOLID_SORT 0x00000010
static inline int
write_flags_to_resource_flags(int write_flags)
write_resource_flags |= WRITE_RESOURCE_FLAG_SOLID;
if (write_flags & WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES)
write_resource_flags |= WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE;
+ if ((write_flags & (WIMLIB_WRITE_FLAG_SOLID |
+ WIMLIB_WRITE_FLAG_NO_SOLID_SORT)) ==
+ WIMLIB_WRITE_FLAG_SOLID)
+ write_resource_flags |= WRITE_RESOURCE_FLAG_SOLID_SORT;
return write_resource_flags;
}
* uncompressed. */
struct chunk_compressor *compressor;
- /* Buffer for dividing the read data into chunks of size
- * @out_chunk_size. */
- u8 *chunk_buf;
+ /* A buffer of size @out_chunk_size that has been loaned out from the
+ * chunk compressor and is currently being filled with the uncompressed
+ * data of the next chunk. */
+ u8 *cur_chunk_buf;
- /* Number of bytes in @chunk_buf that are currently filled. */
- size_t chunk_buf_filled;
+ /* Number of bytes in @cur_chunk_buf that are currently filled. */
+ size_t cur_chunk_buf_filled;
/* List of streams that currently have chunks being compressed. */
struct list_head pending_streams;
}
static int
-submit_chunk_for_compression(struct write_streams_ctx *ctx,
- const void *chunk, size_t size)
+prepare_chunk_buffer(struct write_streams_ctx *ctx)
{
- /* While we are unable to submit the chunk for compression (due to too
- * many chunks already outstanding), retrieve and write the next
- * compressed chunk. */
- while (!ctx->compressor->submit_chunk(ctx->compressor, chunk, size)) {
+ /* While we are unable to get a new chunk buffer due to too many chunks
+ * already outstanding, retrieve and write the next compressed chunk. */
+ while (!(ctx->cur_chunk_buf =
+ ctx->compressor->get_chunk_buffer(ctx->compressor)))
+ {
const void *cchunk;
u32 csize;
u32 usize;
bool bret;
int ret;
- bret = ctx->compressor->get_chunk(ctx->compressor,
- &cchunk, &csize, &usize);
-
+ bret = ctx->compressor->get_compression_result(ctx->compressor,
+ &cchunk,
+ &csize,
+ &usize);
wimlib_assert(bret);
ret = write_chunk(ctx, cchunk, csize, usize);
chunkptr = chunk;
chunkend = chunkptr + size;
do {
- const u8 *resized_chunk;
size_t needed_chunk_size;
+ size_t bytes_consumed;
+
+ if (!ctx->cur_chunk_buf) {
+ ret = prepare_chunk_buffer(ctx);
+ if (ret)
+ return ret;
+ }
if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
needed_chunk_size = ctx->out_chunk_size;
} else {
- u64 res_bytes_remaining;
-
- res_bytes_remaining = ctx->cur_read_stream_size -
- ctx->cur_read_stream_offset;
needed_chunk_size = min(ctx->out_chunk_size,
- ctx->chunk_buf_filled +
- res_bytes_remaining);
+ ctx->cur_chunk_buf_filled +
+ (ctx->cur_read_stream_size -
+ ctx->cur_read_stream_offset));
}
- if (ctx->chunk_buf_filled == 0 &&
- chunkend - chunkptr >= needed_chunk_size)
- {
- /* No intermediate buffering needed. */
- resized_chunk = chunkptr;
- chunkptr += needed_chunk_size;
- ctx->cur_read_stream_offset += needed_chunk_size;
- } else {
- /* Intermediate buffering needed. */
- size_t bytes_consumed;
-
- bytes_consumed = min(chunkend - chunkptr,
- needed_chunk_size - ctx->chunk_buf_filled);
+ bytes_consumed = min(chunkend - chunkptr,
+ needed_chunk_size - ctx->cur_chunk_buf_filled);
- memcpy(&ctx->chunk_buf[ctx->chunk_buf_filled],
- chunkptr, bytes_consumed);
+ memcpy(&ctx->cur_chunk_buf[ctx->cur_chunk_buf_filled],
+ chunkptr, bytes_consumed);
- chunkptr += bytes_consumed;
- ctx->cur_read_stream_offset += bytes_consumed;
- ctx->chunk_buf_filled += bytes_consumed;
- if (ctx->chunk_buf_filled == needed_chunk_size) {
- resized_chunk = ctx->chunk_buf;
- ctx->chunk_buf_filled = 0;
- } else {
- break;
- }
+ chunkptr += bytes_consumed;
+ ctx->cur_read_stream_offset += bytes_consumed;
+ ctx->cur_chunk_buf_filled += bytes_consumed;
+ if (ctx->cur_chunk_buf_filled == needed_chunk_size) {
+ ctx->compressor->signal_chunk_filled(ctx->compressor,
+ ctx->cur_chunk_buf_filled);
+ ctx->cur_chunk_buf = NULL;
+ ctx->cur_chunk_buf_filled = 0;
}
-
- ret = submit_chunk_for_compression(ctx, resized_chunk,
- needed_chunk_size);
- if (ret)
- return ret;
-
} while (chunkptr != chunkend);
return 0;
}
if (ctx->compressor == NULL)
return 0;
- if (ctx->chunk_buf_filled != 0) {
- ret = submit_chunk_for_compression(ctx, ctx->chunk_buf,
- ctx->chunk_buf_filled);
- if (ret)
- return ret;
+ if (ctx->cur_chunk_buf_filled != 0) {
+ ctx->compressor->signal_chunk_filled(ctx->compressor,
+ ctx->cur_chunk_buf_filled);
}
- while (ctx->compressor->get_chunk(ctx->compressor, &cdata, &csize, &usize)) {
+ while (ctx->compressor->get_compression_result(ctx->compressor, &cdata,
+ &csize, &usize))
+ {
ret = write_chunk(ctx, cdata, csize, usize);
if (ret)
return ret;
memset(&ctx, 0, sizeof(ctx));
- /* Pre-sorting the streams is required for compute_stream_list_stats().
- * Afterwards, read_stream_list() need not sort them again. */
- ret = sort_stream_list_by_sequential_order(stream_list,
- offsetof(struct wim_lookup_table_entry,
- write_streams_list));
- if (ret)
- return ret;
-
ctx.out_fd = out_fd;
ctx.lookup_table = lookup_table;
ctx.out_ctype = out_ctype;
ctx.write_resource_flags = write_resource_flags;
ctx.filter_ctx = filter_ctx;
- if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
- wimlib_assert(out_chunk_size != 0);
- if (out_chunk_size <= STACK_MAX) {
- ctx.chunk_buf = alloca(out_chunk_size);
- } else {
- ctx.chunk_buf = MALLOC(out_chunk_size);
- if (ctx.chunk_buf == NULL) {
- ret = WIMLIB_ERR_NOMEM;
- goto out_destroy_context;
- }
- }
- }
- ctx.chunk_buf_filled = 0;
+ /*
+ * We normally sort the streams to write by a "sequential" order that is
+ * optimized for reading. But when using solid compression, we instead
+ * sort the streams by file extension and file name (when applicable;
+ * and we don't do this for streams from solid resources) so that
+ * similar files are grouped together, which improves the compression
+ * ratio. This is somewhat of a hack since a stream does not
+ * necessarily correspond one-to-one with a filename, nor is there any
+ * guarantee that two files with similar names or extensions are
+ * actually similar in content. A potential TODO is to sort the streams
+ * based on some measure of similarity of their actual contents.
+ */
+
+ ret = sort_stream_list_by_sequential_order(stream_list,
+ offsetof(struct wim_lookup_table_entry,
+ write_streams_list));
+ if (ret)
+ return ret;
compute_stream_list_stats(stream_list, &ctx);
+ if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID_SORT) {
+ ret = sort_stream_list_for_solid_compression(stream_list);
+ if (unlikely(ret))
+ WARNING("Failed to sort streams for solid compression. Continuing anyways.");
+ }
+
ctx.progress_data.progfunc = progfunc;
ctx.progress_data.progctx = progctx;
&ctx.progress_data);
out_destroy_context:
- if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE && out_chunk_size > STACK_MAX)
- FREE(ctx.chunk_buf);
FREE(ctx.chunk_csizes);
if (ctx.compressor)
ctx.compressor->destroy(ctx.compressor);