X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;ds=sidebyside;f=src%2Fwrite.c;h=842ca9b111dd38f0c9458be29af2858a205a538b;hb=d8e380e8314cdb592149a651a19690d102a1865b;hp=aab567f86c7817f1e8d132eb7d8d3e28d3ee961b;hpb=894f8dab7f174bf289e6b5e9ea54374d10d6e62f;p=wimlib diff --git a/src/write.c b/src/write.c index aab567f8..842ca9b1 100644 --- a/src/write.c +++ b/src/write.c @@ -6,7 +6,7 @@ */ /* - * Copyright (C) 2012, 2013, 2014 Eric Biggers + * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -51,6 +51,7 @@ #include "wimlib/paths.h" #include "wimlib/progress.h" #include "wimlib/resource.h" +#include "wimlib/solid.h" #ifdef __WIN32__ # include "wimlib/win32.h" /* win32_rename_replacement() */ #endif @@ -63,6 +64,7 @@ #define WRITE_RESOURCE_FLAG_PIPABLE 0x00000002 #define WRITE_RESOURCE_FLAG_SOLID 0x00000004 #define WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE 0x00000008 +#define WRITE_RESOURCE_FLAG_SOLID_SORT 0x00000010 static inline int write_flags_to_resource_flags(int write_flags) @@ -77,6 +79,10 @@ write_flags_to_resource_flags(int write_flags) write_resource_flags |= WRITE_RESOURCE_FLAG_SOLID; if (write_flags & WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES) write_resource_flags |= WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE; + if ((write_flags & (WIMLIB_WRITE_FLAG_SOLID | + WIMLIB_WRITE_FLAG_NO_SOLID_SORT)) == + WIMLIB_WRITE_FLAG_SOLID) + write_resource_flags |= WRITE_RESOURCE_FLAG_SOLID_SORT; return write_resource_flags; } @@ -373,12 +379,13 @@ struct write_streams_ctx { * uncompressed. */ struct chunk_compressor *compressor; - /* Buffer for dividing the read data into chunks of size - * @out_chunk_size. */ - u8 *chunk_buf; + /* A buffer of size @out_chunk_size that has been loaned out from the + * chunk compressor and is currently being filled with the uncompressed + * data of the next chunk. */ + u8 *cur_chunk_buf; - /* Number of bytes in @chunk_buf that are currently filled. */ - size_t chunk_buf_filled; + /* Number of bytes in @cur_chunk_buf that are currently filled. */ + size_t cur_chunk_buf_filled; /* List of streams that currently have chunks being compressed. */ struct list_head pending_streams; @@ -1051,22 +1058,23 @@ write_error: } static int -submit_chunk_for_compression(struct write_streams_ctx *ctx, - const void *chunk, size_t size) +prepare_chunk_buffer(struct write_streams_ctx *ctx) { - /* While we are unable to submit the chunk for compression (due to too - * many chunks already outstanding), retrieve and write the next - * compressed chunk. */ - while (!ctx->compressor->submit_chunk(ctx->compressor, chunk, size)) { + /* While we are unable to get a new chunk buffer due to too many chunks + * already outstanding, retrieve and write the next compressed chunk. */ + while (!(ctx->cur_chunk_buf = + ctx->compressor->get_chunk_buffer(ctx->compressor))) + { const void *cchunk; u32 csize; u32 usize; bool bret; int ret; - bret = ctx->compressor->get_chunk(ctx->compressor, - &cchunk, &csize, &usize); - + bret = ctx->compressor->get_compression_result(ctx->compressor, + &cchunk, + &csize, + &usize); wimlib_assert(bret); ret = write_chunk(ctx, cchunk, csize, usize); @@ -1101,55 +1109,40 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) chunkptr = chunk; chunkend = chunkptr + size; do { - const u8 *resized_chunk; size_t needed_chunk_size; + size_t bytes_consumed; + + if (!ctx->cur_chunk_buf) { + ret = prepare_chunk_buffer(ctx); + if (ret) + return ret; + } if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) { needed_chunk_size = ctx->out_chunk_size; } else { - u64 res_bytes_remaining; - - res_bytes_remaining = ctx->cur_read_stream_size - - ctx->cur_read_stream_offset; needed_chunk_size = min(ctx->out_chunk_size, - ctx->chunk_buf_filled + - res_bytes_remaining); + ctx->cur_chunk_buf_filled + + (ctx->cur_read_stream_size - + ctx->cur_read_stream_offset)); } - if (ctx->chunk_buf_filled == 0 && - chunkend - chunkptr >= needed_chunk_size) - { - /* No intermediate buffering needed. */ - resized_chunk = chunkptr; - chunkptr += needed_chunk_size; - ctx->cur_read_stream_offset += needed_chunk_size; - } else { - /* Intermediate buffering needed. */ - size_t bytes_consumed; - - bytes_consumed = min(chunkend - chunkptr, - needed_chunk_size - ctx->chunk_buf_filled); + bytes_consumed = min(chunkend - chunkptr, + needed_chunk_size - ctx->cur_chunk_buf_filled); - memcpy(&ctx->chunk_buf[ctx->chunk_buf_filled], - chunkptr, bytes_consumed); + memcpy(&ctx->cur_chunk_buf[ctx->cur_chunk_buf_filled], + chunkptr, bytes_consumed); - chunkptr += bytes_consumed; - ctx->cur_read_stream_offset += bytes_consumed; - ctx->chunk_buf_filled += bytes_consumed; - if (ctx->chunk_buf_filled == needed_chunk_size) { - resized_chunk = ctx->chunk_buf; - ctx->chunk_buf_filled = 0; - } else { - break; - } + chunkptr += bytes_consumed; + ctx->cur_read_stream_offset += bytes_consumed; + ctx->cur_chunk_buf_filled += bytes_consumed; + if (ctx->cur_chunk_buf_filled == needed_chunk_size) { + ctx->compressor->signal_chunk_filled(ctx->compressor, + ctx->cur_chunk_buf_filled); + ctx->cur_chunk_buf = NULL; + ctx->cur_chunk_buf_filled = 0; } - - ret = submit_chunk_for_compression(ctx, resized_chunk, - needed_chunk_size); - if (ret) - return ret; - } while (chunkptr != chunkend); return 0; } @@ -1368,14 +1361,14 @@ finish_remaining_chunks(struct write_streams_ctx *ctx) if (ctx->compressor == NULL) return 0; - if (ctx->chunk_buf_filled != 0) { - ret = submit_chunk_for_compression(ctx, ctx->chunk_buf, - ctx->chunk_buf_filled); - if (ret) - return ret; + if (ctx->cur_chunk_buf_filled != 0) { + ctx->compressor->signal_chunk_filled(ctx->compressor, + ctx->cur_chunk_buf_filled); } - while (ctx->compressor->get_chunk(ctx->compressor, &cdata, &csize, &usize)) { + while (ctx->compressor->get_compression_result(ctx->compressor, &cdata, + &csize, &usize)) + { ret = write_chunk(ctx, cdata, csize, usize); if (ret) return ret; @@ -1561,14 +1554,6 @@ write_stream_list(struct list_head *stream_list, memset(&ctx, 0, sizeof(ctx)); - /* Pre-sorting the streams is required for compute_stream_list_stats(). - * Afterwards, read_stream_list() need not sort them again. */ - ret = sort_stream_list_by_sequential_order(stream_list, - offsetof(struct wim_lookup_table_entry, - write_streams_list)); - if (ret) - return ret; - ctx.out_fd = out_fd; ctx.lookup_table = lookup_table; ctx.out_ctype = out_ctype; @@ -1576,22 +1561,33 @@ write_stream_list(struct list_head *stream_list, ctx.write_resource_flags = write_resource_flags; ctx.filter_ctx = filter_ctx; - if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) { - wimlib_assert(out_chunk_size != 0); - if (out_chunk_size <= STACK_MAX) { - ctx.chunk_buf = alloca(out_chunk_size); - } else { - ctx.chunk_buf = MALLOC(out_chunk_size); - if (ctx.chunk_buf == NULL) { - ret = WIMLIB_ERR_NOMEM; - goto out_destroy_context; - } - } - } - ctx.chunk_buf_filled = 0; + /* + * We normally sort the streams to write by a "sequential" order that is + * optimized for reading. But when using solid compression, we instead + * sort the streams by file extension and file name (when applicable; + * and we don't do this for streams from solid resources) so that + * similar files are grouped together, which improves the compression + * ratio. This is somewhat of a hack since a stream does not + * necessarily correspond one-to-one with a filename, nor is there any + * guarantee that two files with similar names or extensions are + * actually similar in content. A potential TODO is to sort the streams + * based on some measure of similarity of their actual contents. + */ + + ret = sort_stream_list_by_sequential_order(stream_list, + offsetof(struct wim_lookup_table_entry, + write_streams_list)); + if (ret) + return ret; compute_stream_list_stats(stream_list, &ctx); + if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID_SORT) { + ret = sort_stream_list_for_solid_compression(stream_list); + if (unlikely(ret)) + WARNING("Failed to sort streams for solid compression. Continuing anyways."); + } + ctx.progress_data.progfunc = progfunc; ctx.progress_data.progctx = progctx; @@ -1730,8 +1726,6 @@ out_write_raw_copy_resources: &ctx.progress_data); out_destroy_context: - if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE && out_chunk_size > STACK_MAX) - FREE(ctx.chunk_buf); FREE(ctx.chunk_csizes); if (ctx.compressor) ctx.compressor->destroy(ctx.compressor);