X-Git-Url: https://wimlib.net/git/?a=blobdiff_plain;ds=sidebyside;f=src%2Fwrite.c;h=842ca9b111dd38f0c9458be29af2858a205a538b;hb=d8e380e8314cdb592149a651a19690d102a1865b;hp=fcc5983fca50ee2c300342a2e5a360285c8ef43d;hpb=7251c7d0afac3b738dda1c4f45e6d3d3090f2622;p=wimlib diff --git a/src/write.c b/src/write.c index fcc5983f..842ca9b1 100644 --- a/src/write.c +++ b/src/write.c @@ -6,7 +6,7 @@ */ /* - * Copyright (C) 2012, 2013, 2014 Eric Biggers + * Copyright (C) 2012, 2013, 2014, 2015 Eric Biggers * * This file is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License as published by the Free @@ -32,6 +32,12 @@ # include #endif +#include +#include +#include +#include + +#include "wimlib/alloca.h" #include "wimlib/assert.h" #include "wimlib/chunk_compressor.h" #include "wimlib/endianness.h" @@ -45,26 +51,20 @@ #include "wimlib/paths.h" #include "wimlib/progress.h" #include "wimlib/resource.h" +#include "wimlib/solid.h" #ifdef __WIN32__ # include "wimlib/win32.h" /* win32_rename_replacement() */ #endif #include "wimlib/write.h" #include "wimlib/xml.h" -#include -#include -#include -#include - -#ifdef HAVE_ALLOCA_H -# include -#endif /* wimlib internal flags used when writing resources. */ #define WRITE_RESOURCE_FLAG_RECOMPRESS 0x00000001 #define WRITE_RESOURCE_FLAG_PIPABLE 0x00000002 -#define WRITE_RESOURCE_FLAG_PACK_STREAMS 0x00000004 +#define WRITE_RESOURCE_FLAG_SOLID 0x00000004 #define WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE 0x00000008 +#define WRITE_RESOURCE_FLAG_SOLID_SORT 0x00000010 static inline int write_flags_to_resource_flags(int write_flags) @@ -75,10 +75,14 @@ write_flags_to_resource_flags(int write_flags) write_resource_flags |= WRITE_RESOURCE_FLAG_RECOMPRESS; if (write_flags & WIMLIB_WRITE_FLAG_PIPABLE) write_resource_flags |= WRITE_RESOURCE_FLAG_PIPABLE; - if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS) - write_resource_flags |= WRITE_RESOURCE_FLAG_PACK_STREAMS; + if (write_flags & WIMLIB_WRITE_FLAG_SOLID) + write_resource_flags |= WRITE_RESOURCE_FLAG_SOLID; if (write_flags & WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES) write_resource_flags |= WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE; + if ((write_flags & (WIMLIB_WRITE_FLAG_SOLID | + WIMLIB_WRITE_FLAG_NO_SOLID_SORT)) == + WIMLIB_WRITE_FLAG_SOLID) + write_resource_flags |= WRITE_RESOURCE_FLAG_SOLID_SORT; return write_resource_flags; } @@ -183,15 +187,15 @@ can_raw_copy(const struct wim_lookup_table_entry *lte, rspec->chunk_size == out_chunk_size); } - if ((rspec->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) && - (write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)) + if ((rspec->flags & WIM_RESHDR_FLAG_SOLID) && + (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID)) { - /* Packed resource: Such resources may contain multiple streams, + /* Solid resource: Such resources may contain multiple streams, * and in general only a subset of them need to be written. As * a heuristic, re-use the raw data if more than two-thirds the * uncompressed size is being written. */ - /* Note: packed resources contain a header that specifies the + /* Note: solid resources contain a header that specifies the * compression type and chunk size; therefore we don't need to * check if they are compatible with @out_ctype and * @out_chunk_size. */ @@ -212,7 +216,7 @@ can_raw_copy(const struct wim_lookup_table_entry *lte, static u8 filter_resource_flags(u8 flags) { - return (flags & ~(WIM_RESHDR_FLAG_PACKED_STREAMS | + return (flags & ~(WIM_RESHDR_FLAG_SOLID | WIM_RESHDR_FLAG_COMPRESSED | WIM_RESHDR_FLAG_SPANNED | WIM_RESHDR_FLAG_FREE)); @@ -226,9 +230,9 @@ stream_set_out_reshdr_for_reuse(struct wim_lookup_table_entry *lte) wimlib_assert(lte->resource_location == RESOURCE_IN_WIM); rspec = lte->rspec; - if (rspec->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { + if (rspec->flags & WIM_RESHDR_FLAG_SOLID) { - wimlib_assert(lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS); + wimlib_assert(lte->flags & WIM_RESHDR_FLAG_SOLID); lte->out_reshdr.offset_in_wim = lte->offset_in_res; lte->out_reshdr.uncompressed_size = 0; @@ -238,7 +242,7 @@ stream_set_out_reshdr_for_reuse(struct wim_lookup_table_entry *lte) lte->out_res_size_in_wim = rspec->size_in_wim; lte->out_res_uncompressed_size = rspec->uncompressed_size; } else { - wimlib_assert(!(lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS)); + wimlib_assert(!(lte->flags & WIM_RESHDR_FLAG_SOLID)); lte->out_reshdr.offset_in_wim = rspec->offset_in_wim; lte->out_reshdr.uncompressed_size = rspec->uncompressed_size; @@ -375,19 +379,20 @@ struct write_streams_ctx { * uncompressed. */ struct chunk_compressor *compressor; - /* Buffer for dividing the read data into chunks of size - * @out_chunk_size. */ - u8 *chunk_buf; + /* A buffer of size @out_chunk_size that has been loaned out from the + * chunk compressor and is currently being filled with the uncompressed + * data of the next chunk. */ + u8 *cur_chunk_buf; - /* Number of bytes in @chunk_buf that are currently filled. */ - size_t chunk_buf_filled; + /* Number of bytes in @cur_chunk_buf that are currently filled. */ + size_t cur_chunk_buf_filled; /* List of streams that currently have chunks being compressed. */ struct list_head pending_streams; - /* List of streams in the resource pack. Streams are moved here after - * @pending_streams only when writing a packed resource. */ - struct list_head pack_streams; + /* List of streams in the solid resource. Streams are moved here after + * @pending_streams only when writing a solid resource. */ + struct list_head solid_streams; /* Current uncompressed offset in the stream being read. */ u64 cur_read_stream_offset; @@ -428,12 +433,12 @@ begin_chunk_table(struct write_streams_ctx *ctx, u64 res_expected_size) /* Calculate the number of chunks and chunk entries that should be * needed for the resource. These normally will be the final values, - * but in PACKED_STREAMS mode some of the streams we're planning to - * write into the resource may be duplicates, and therefore discarded, - * potentially decreasing the number of chunk entries needed. */ + * but in SOLID mode some of the streams we're planning to write into + * the resource may be duplicates, and therefore discarded, potentially + * decreasing the number of chunk entries needed. */ expected_num_chunks = DIV_ROUND_UP(res_expected_size, ctx->out_chunk_size); expected_num_chunk_entries = expected_num_chunks; - if (!(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)) + if (!(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID)) expected_num_chunk_entries--; /* Make sure the chunk_csizes array is long enough to store the @@ -460,16 +465,16 @@ begin_chunk_table(struct write_streams_ctx *ctx, u64 res_expected_size) if (!(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE)) { /* Reserve space for the chunk table in the output file. In the - * case of packed resources this reserves the upper bound for - * the needed space, not necessarily the exact space which will + * case of solid resources this reserves the upper bound for the + * needed space, not necessarily the exact space which will * prove to be needed. At this point, we just use @chunk_csizes * for a buffer of 0's because the actual compressed chunk sizes * are unknown. */ reserve_size = expected_num_chunk_entries * get_chunk_entry_size(res_expected_size, 0 != (ctx->write_resource_flags & - WRITE_RESOURCE_FLAG_PACK_STREAMS)); - if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) + WRITE_RESOURCE_FLAG_SOLID)); + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) reserve_size += sizeof(struct alt_chunk_table_header_disk); memset(ctx->chunk_csizes, 0, reserve_size); ret = full_write(ctx->out_fd, ctx->chunk_csizes, reserve_size); @@ -511,12 +516,12 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, actual_num_chunks = ctx->chunk_index; actual_num_chunk_entries = actual_num_chunks; - if (!(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)) + if (!(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID)) actual_num_chunk_entries--; chunk_entry_size = get_chunk_entry_size(res_actual_size, 0 != (ctx->write_resource_flags & - WRITE_RESOURCE_FLAG_PACK_STREAMS)); + WRITE_RESOURCE_FLAG_SOLID)); typedef le64 _may_alias_attribute aliased_le64_t; typedef le32 _may_alias_attribute aliased_le32_t; @@ -524,7 +529,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, if (chunk_entry_size == 4) { aliased_le32_t *entries = (aliased_le32_t*)ctx->chunk_csizes; - if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) { for (size_t i = 0; i < actual_num_chunk_entries; i++) entries[i] = cpu_to_le32(ctx->chunk_csizes[i]); } else { @@ -538,7 +543,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, } else { aliased_le64_t *entries = (aliased_le64_t*)ctx->chunk_csizes; - if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) { for (size_t i = 0; i < actual_num_chunk_entries; i++) entries[i] = cpu_to_le64(ctx->chunk_csizes[i]); } else { @@ -568,7 +573,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, chunk_table_offset = ctx->chunks_start_offset - chunk_table_size; - if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) { struct alt_chunk_table_header_disk hdr; hdr.res_usize = cpu_to_le64(res_actual_size); @@ -615,7 +620,7 @@ end_write_resource(struct write_streams_ctx *ctx, struct wim_reshdr *out_reshdr) u64 res_offset_in_wim; wimlib_assert(ctx->cur_write_stream_offset == ctx->cur_write_res_size || - (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)); + (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID)); res_uncompressed_size = ctx->cur_write_res_size; if (ctx->compressor) { @@ -773,7 +778,7 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, void *_ctx) list_del(<e->lookup_table_list); if (lte_new->will_be_in_output_wim) lte_new->out_refcnt += lte->out_refcnt; - if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) ctx->cur_write_res_size -= lte->size; if (!ret) ret = done_with_stream(lte, ctx); @@ -850,7 +855,7 @@ write_stream_uncompressed(struct wim_lookup_table_entry *lte, lte->out_reshdr.size_in_wim = lte->size; lte->out_reshdr.flags &= ~(WIM_RESHDR_FLAG_COMPRESSED | - WIM_RESHDR_FLAG_PACKED_STREAMS); + WIM_RESHDR_FLAG_SOLID); return 0; } @@ -877,7 +882,7 @@ should_rewrite_stream_uncompressed(const struct write_streams_ctx *ctx, return false; /* If the stream that would need to be re-read is located in a solid - * block in another WIM file, then re-reading it would be costly. So + * resource in another WIM file, then re-reading it would be costly. So * don't do it. * * Exception: if the compressed size happens to be *exactly* the same as @@ -892,7 +897,7 @@ should_rewrite_stream_uncompressed(const struct write_streams_ctx *ctx, * obtain the uncompressed data by decompressing the compressed data we * wrote to the output file. */ - if ((lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) && + if ((lte->flags & WIM_RESHDR_FLAG_SOLID) && (lte->out_reshdr.size_in_wim != lte->out_reshdr.uncompressed_size)) return false; @@ -937,9 +942,9 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk, struct wim_lookup_table_entry, write_streams_list); if (ctx->cur_write_stream_offset == 0 && - !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)) + !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID)) { - /* Starting to write a new stream in non-packed mode. */ + /* Starting to write a new stream in non-solid mode. */ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) { int additional_reshdr_flags = 0; @@ -987,8 +992,8 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk, completed_size = usize; completed_stream_count = 0; - if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { - /* Wrote chunk in packed mode. It may have finished multiple + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) { + /* Wrote chunk in solid mode. It may have finished multiple * streams. */ struct wim_lookup_table_entry *next_lte; @@ -1006,13 +1011,13 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk, ret = done_with_stream(lte, ctx); if (ret) return ret; - list_move_tail(<e->write_streams_list, &ctx->pack_streams); + list_move_tail(<e->write_streams_list, &ctx->solid_streams); completed_stream_count++; lte = next_lte; } } else { - /* Wrote chunk in non-packed mode. It may have finished a + /* Wrote chunk in non-solid mode. It may have finished a * stream. */ if (ctx->cur_write_stream_offset == lte->size) { @@ -1053,22 +1058,23 @@ write_error: } static int -submit_chunk_for_compression(struct write_streams_ctx *ctx, - const void *chunk, size_t size) +prepare_chunk_buffer(struct write_streams_ctx *ctx) { - /* While we are unable to submit the chunk for compression (due to too - * many chunks already outstanding), retrieve and write the next - * compressed chunk. */ - while (!ctx->compressor->submit_chunk(ctx->compressor, chunk, size)) { + /* While we are unable to get a new chunk buffer due to too many chunks + * already outstanding, retrieve and write the next compressed chunk. */ + while (!(ctx->cur_chunk_buf = + ctx->compressor->get_chunk_buffer(ctx->compressor))) + { const void *cchunk; u32 csize; u32 usize; bool bret; int ret; - bret = ctx->compressor->get_chunk(ctx->compressor, - &cchunk, &csize, &usize); - + bret = ctx->compressor->get_compression_result(ctx->compressor, + &cchunk, + &csize, + &usize); wimlib_assert(bret); ret = write_chunk(ctx, cchunk, csize, usize); @@ -1103,55 +1109,40 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) chunkptr = chunk; chunkend = chunkptr + size; do { - const u8 *resized_chunk; size_t needed_chunk_size; + size_t bytes_consumed; - if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (!ctx->cur_chunk_buf) { + ret = prepare_chunk_buffer(ctx); + if (ret) + return ret; + } + + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) { needed_chunk_size = ctx->out_chunk_size; } else { - u64 res_bytes_remaining; - - res_bytes_remaining = ctx->cur_read_stream_size - - ctx->cur_read_stream_offset; needed_chunk_size = min(ctx->out_chunk_size, - ctx->chunk_buf_filled + - res_bytes_remaining); + ctx->cur_chunk_buf_filled + + (ctx->cur_read_stream_size - + ctx->cur_read_stream_offset)); } - if (ctx->chunk_buf_filled == 0 && - chunkend - chunkptr >= needed_chunk_size) - { - /* No intermediate buffering needed. */ - resized_chunk = chunkptr; - chunkptr += needed_chunk_size; - ctx->cur_read_stream_offset += needed_chunk_size; - } else { - /* Intermediate buffering needed. */ - size_t bytes_consumed; + bytes_consumed = min(chunkend - chunkptr, + needed_chunk_size - ctx->cur_chunk_buf_filled); - bytes_consumed = min(chunkend - chunkptr, - needed_chunk_size - ctx->chunk_buf_filled); + memcpy(&ctx->cur_chunk_buf[ctx->cur_chunk_buf_filled], + chunkptr, bytes_consumed); - memcpy(&ctx->chunk_buf[ctx->chunk_buf_filled], - chunkptr, bytes_consumed); - - chunkptr += bytes_consumed; - ctx->cur_read_stream_offset += bytes_consumed; - ctx->chunk_buf_filled += bytes_consumed; - if (ctx->chunk_buf_filled == needed_chunk_size) { - resized_chunk = ctx->chunk_buf; - ctx->chunk_buf_filled = 0; - } else { - break; - } + chunkptr += bytes_consumed; + ctx->cur_read_stream_offset += bytes_consumed; + ctx->cur_chunk_buf_filled += bytes_consumed; + if (ctx->cur_chunk_buf_filled == needed_chunk_size) { + ctx->compressor->signal_chunk_filled(ctx->compressor, + ctx->cur_chunk_buf_filled); + ctx->cur_chunk_buf = NULL; + ctx->cur_chunk_buf_filled = 0; } - - ret = submit_chunk_for_compression(ctx, resized_chunk, - needed_chunk_size); - if (ret) - return ret; - } while (chunkptr != chunkend); return 0; } @@ -1318,7 +1309,7 @@ write_raw_copy_resource(struct wim_resource_spec *in_rspec, list_for_each_entry(lte, &in_rspec->stream_list, rspec_node) { if (lte->will_be_in_output_wim) { stream_set_out_reshdr_for_reuse(lte); - if (in_rspec->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) + if (in_rspec->flags & WIM_RESHDR_FLAG_SOLID) lte->out_res_offset_in_wim = out_offset_in_wim; else lte->out_reshdr.offset_in_wim = out_offset_in_wim; @@ -1343,7 +1334,7 @@ write_raw_copy_resources(struct list_head *raw_copy_streams, list_for_each_entry(lte, raw_copy_streams, write_streams_list) { if (lte->rspec->raw_copy_ok) { - /* Write each packed resource only one time, no matter + /* Write each solid resource only one time, no matter * how many streams reference it. */ ret = write_raw_copy_resource(lte->rspec, out_fd); if (ret) @@ -1370,14 +1361,14 @@ finish_remaining_chunks(struct write_streams_ctx *ctx) if (ctx->compressor == NULL) return 0; - if (ctx->chunk_buf_filled != 0) { - ret = submit_chunk_for_compression(ctx, ctx->chunk_buf, - ctx->chunk_buf_filled); - if (ret) - return ret; + if (ctx->cur_chunk_buf_filled != 0) { + ctx->compressor->signal_chunk_filled(ctx->compressor, + ctx->cur_chunk_buf_filled); } - while (ctx->compressor->get_chunk(ctx->compressor, &cdata, &csize, &usize)) { + while (ctx->compressor->get_compression_result(ctx->compressor, &cdata, + &csize, &usize)) + { ret = write_chunk(ctx, cdata, csize, usize); if (ret) return ret; @@ -1444,11 +1435,11 @@ init_done_with_file_info(struct list_head *stream_list) * furthermore do so in such a way that no seeking backwards in * @out_fd will be performed (so it may be a pipe). * - * WRITE_RESOURCE_FLAG_PACK_STREAMS: - * Pack all the streams into a single resource rather than writing - * them in separate resources. This flag is only valid if the WIM - * version number has been, or will be, set to - * WIM_VERSION_PACKED_STREAMS. This flag may not be combined with + * WRITE_RESOURCE_FLAG_SOLID: + * Combine all the streams into a single resource rather than + * writing them in separate resources. This flag is only valid if + * the WIM version number has been, or will be, set to + * WIM_VERSION_SOLID. This flag may not be combined with * WRITE_RESOURCE_FLAG_PIPABLE. * * @out_ctype @@ -1480,15 +1471,15 @@ init_done_with_file_info(struct list_head *stream_list) * can be NULL. * * This function will write the streams in @stream_list to resources in - * consecutive positions in the output WIM file, or to a single packed resource - * if WRITE_RESOURCE_FLAG_PACK_STREAMS was specified in @write_resource_flags. - * In both cases, the @out_reshdr of the `struct wim_lookup_table_entry' for - * each stream written will be updated to specify its location, size, and flags - * in the output WIM. In the packed resource case, - * WIM_RESHDR_FLAG_PACKED_STREAMS will be set in the @flags field of each - * @out_reshdr, and furthermore @out_res_offset_in_wim and @out_res_size_in_wim - * of each @out_reshdr will be set to the offset and size, respectively, in the - * output WIM of the packed resource containing the corresponding stream. + * consecutive positions in the output WIM file, or to a single solid resource + * if WRITE_RESOURCE_FLAG_SOLID was specified in @write_resource_flags. In both + * cases, the @out_reshdr of the `struct wim_lookup_table_entry' for each stream + * written will be updated to specify its location, size, and flags in the + * output WIM. In the solid resource case, WIM_RESHDR_FLAG_SOLID will be set in + * the @flags field of each @out_reshdr, and furthermore @out_res_offset_in_wim + * and @out_res_size_in_wim of each @out_reshdr will be set to the offset and + * size, respectively, in the output WIM of the solid resource containing the + * corresponding stream. * * Each of the streams to write may be in any location supported by the * resource-handling code (specifically, read_stream_list()), such as the @@ -1504,7 +1495,7 @@ init_done_with_file_info(struct list_head *stream_list) * @will_be_in_output_wim member be set to 1 on all streams in @stream_list as * well as any other streams not in @stream_list that will be in the output WIM * file, but set to 0 on any other streams in the output WIM's lookup table or - * sharing a packed resource with a stream in @stream_list. Still furthermore, + * sharing a solid resource with a stream in @stream_list. Still furthermore, * if on-the-fly deduplication of streams is possible, then all streams in * @stream_list must also be linked by @lookup_table_list along with any other * streams that have @will_be_in_output_wim set. @@ -1544,9 +1535,9 @@ write_stream_list(struct list_head *stream_list, struct list_head raw_copy_streams; wimlib_assert((write_resource_flags & - (WRITE_RESOURCE_FLAG_PACK_STREAMS | + (WRITE_RESOURCE_FLAG_SOLID | WRITE_RESOURCE_FLAG_PIPABLE)) != - (WRITE_RESOURCE_FLAG_PACK_STREAMS | + (WRITE_RESOURCE_FLAG_SOLID | WRITE_RESOURCE_FLAG_PIPABLE)); remove_zero_length_streams(stream_list); @@ -1563,14 +1554,6 @@ write_stream_list(struct list_head *stream_list, memset(&ctx, 0, sizeof(ctx)); - /* Pre-sorting the streams is required for compute_stream_list_stats(). - * Afterwards, read_stream_list() need not sort them again. */ - ret = sort_stream_list_by_sequential_order(stream_list, - offsetof(struct wim_lookup_table_entry, - write_streams_list)); - if (ret) - return ret; - ctx.out_fd = out_fd; ctx.lookup_table = lookup_table; ctx.out_ctype = out_ctype; @@ -1578,22 +1561,33 @@ write_stream_list(struct list_head *stream_list, ctx.write_resource_flags = write_resource_flags; ctx.filter_ctx = filter_ctx; - if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) { - wimlib_assert(out_chunk_size != 0); - if (out_chunk_size <= STACK_MAX) { - ctx.chunk_buf = alloca(out_chunk_size); - } else { - ctx.chunk_buf = MALLOC(out_chunk_size); - if (ctx.chunk_buf == NULL) { - ret = WIMLIB_ERR_NOMEM; - goto out_destroy_context; - } - } - } - ctx.chunk_buf_filled = 0; + /* + * We normally sort the streams to write by a "sequential" order that is + * optimized for reading. But when using solid compression, we instead + * sort the streams by file extension and file name (when applicable; + * and we don't do this for streams from solid resources) so that + * similar files are grouped together, which improves the compression + * ratio. This is somewhat of a hack since a stream does not + * necessarily correspond one-to-one with a filename, nor is there any + * guarantee that two files with similar names or extensions are + * actually similar in content. A potential TODO is to sort the streams + * based on some measure of similarity of their actual contents. + */ + + ret = sort_stream_list_by_sequential_order(stream_list, + offsetof(struct wim_lookup_table_entry, + write_streams_list)); + if (ret) + return ret; compute_stream_list_stats(stream_list, &ctx); + if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID_SORT) { + ret = sort_stream_list_for_solid_compression(stream_list); + if (unlikely(ret)) + WARNING("Failed to sort streams for solid compression. Continuing anyways."); + } + ctx.progress_data.progfunc = progfunc; ctx.progress_data.progctx = progctx; @@ -1655,7 +1649,7 @@ write_stream_list(struct list_head *stream_list, ctx.progress_data.progress.write_streams.num_threads); INIT_LIST_HEAD(&ctx.pending_streams); - INIT_LIST_HEAD(&ctx.pack_streams); + INIT_LIST_HEAD(&ctx.solid_streams); ret = call_progress(ctx.progress_data.progfunc, WIMLIB_PROGRESS_MSG_WRITE_STREAMS, @@ -1664,7 +1658,7 @@ write_stream_list(struct list_head *stream_list, if (ret) goto out_destroy_context; - if (write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) { ret = begin_write_resource(&ctx, ctx.num_bytes_to_compress); if (ret) goto out_destroy_context; @@ -1696,7 +1690,7 @@ write_stream_list(struct list_head *stream_list, if (ret) goto out_destroy_context; - if (write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) { struct wim_reshdr reshdr; struct wim_lookup_table_entry *lte; u64 offset_in_res; @@ -1705,16 +1699,16 @@ write_stream_list(struct list_head *stream_list, if (ret) goto out_destroy_context; - DEBUG("Ending packed resource: %lu %lu %lu.", + DEBUG("Ending solid resource: %lu %lu %lu.", reshdr.offset_in_wim, reshdr.size_in_wim, reshdr.uncompressed_size); offset_in_res = 0; - list_for_each_entry(lte, &ctx.pack_streams, write_streams_list) { + list_for_each_entry(lte, &ctx.solid_streams, write_streams_list) { lte->out_reshdr.size_in_wim = lte->size; lte->out_reshdr.flags = filter_resource_flags(lte->flags); - lte->out_reshdr.flags |= WIM_RESHDR_FLAG_PACKED_STREAMS; + lte->out_reshdr.flags |= WIM_RESHDR_FLAG_SOLID; lte->out_reshdr.uncompressed_size = 0; lte->out_reshdr.offset_in_wim = offset_in_res; lte->out_res_offset_in_wim = reshdr.offset_in_wim; @@ -1732,8 +1726,6 @@ out_write_raw_copy_resources: &ctx.progress_data); out_destroy_context: - if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE && out_chunk_size > STACK_MAX) - FREE(ctx.chunk_buf); FREE(ctx.chunk_csizes); if (ctx.compressor) ctx.compressor->destroy(ctx.compressor); @@ -1742,15 +1734,16 @@ out_destroy_context: } static int -is_stream_packed(struct wim_lookup_table_entry *lte, void *_ignore) +is_stream_in_solid_resource(struct wim_lookup_table_entry *lte, void *_ignore) { return lte_is_partial(lte); } static bool -wim_has_packed_streams(WIMStruct *wim) +wim_has_solid_resources(WIMStruct *wim) { - return for_lookup_table_entry(wim->lookup_table, is_stream_packed, NULL); + return for_lookup_table_entry(wim->lookup_table, + is_stream_in_solid_resource, NULL); } static int @@ -1766,19 +1759,19 @@ wim_write_stream_list(WIMStruct *wim, write_resource_flags = write_flags_to_resource_flags(write_flags); - /* wimlib v1.7.0: pack streams by default if the WIM version has been - * set to WIM_VERSION_PACKED_STREAMS and at least one stream in the - * WIM's lookup table is located in a packed resource (may be the same + /* wimlib v1.7.0: create a solid WIM file by default if the WIM version + * has been set to WIM_VERSION_SOLID and at least one stream in the + * WIM's lookup table is located in a solid resource (may be the same * WIM, or a different one in the case of export). */ - if (wim->hdr.wim_version == WIM_VERSION_PACKED_STREAMS && - wim_has_packed_streams(wim)) + if (wim->hdr.wim_version == WIM_VERSION_SOLID && + wim_has_solid_resources(wim)) { - write_resource_flags |= WRITE_RESOURCE_FLAG_PACK_STREAMS; + write_resource_flags |= WRITE_RESOURCE_FLAG_SOLID; } - if (write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { - out_chunk_size = wim->out_pack_chunk_size; - out_ctype = wim->out_pack_compression_type; + if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) { + out_chunk_size = wim->out_solid_chunk_size; + out_ctype = wim->out_solid_compression_type; } else { out_chunk_size = wim->out_chunk_size; out_ctype = wim->out_compression_type; @@ -1808,7 +1801,7 @@ write_wim_resource(struct wim_lookup_table_entry *lte, lte->will_be_in_output_wim = 1; return write_stream_list(&stream_list, out_fd, - write_resource_flags & ~WRITE_RESOURCE_FLAG_PACK_STREAMS, + write_resource_flags & ~WRITE_RESOURCE_FLAG_SOLID, out_ctype, out_chunk_size, 1, @@ -2270,7 +2263,7 @@ write_wim_metadata_resources(WIMStruct *wim, int image, int write_flags) write_resource_flags = write_flags_to_resource_flags(write_flags); - write_resource_flags &= ~WRITE_RESOURCE_FLAG_PACK_STREAMS; + write_resource_flags &= ~WRITE_RESOURCE_FLAG_SOLID; DEBUG("Writing metadata resources (offset=%"PRIu64")", wim->out_fd.offset); @@ -2362,8 +2355,8 @@ cmp_streams_by_out_rspec(const void *p1, const void *p2) lte1 = *(const struct wim_lookup_table_entry**)p1; lte2 = *(const struct wim_lookup_table_entry**)p2; - if (lte1->out_reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { - if (lte2->out_reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) { + if (lte1->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) { + if (lte2->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) { if (lte1->out_res_offset_in_wim != lte2->out_res_offset_in_wim) return cmp_u64(lte1->out_res_offset_in_wim, lte2->out_res_offset_in_wim); @@ -2371,7 +2364,7 @@ cmp_streams_by_out_rspec(const void *p1, const void *p2) return 1; } } else { - if (lte2->out_reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) + if (lte2->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) return -1; } return cmp_u64(lte1->out_reshdr.offset_in_wim, @@ -2843,8 +2836,8 @@ write_wim_part(WIMStruct *wim, if (write_flags & WIMLIB_WRITE_FLAG_RETAIN_GUID) DEBUG("\tRETAIN_GUID"); - if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS) - DEBUG("\tPACK_STREAMS"); + if (write_flags & WIMLIB_WRITE_FLAG_SOLID) + DEBUG("\tSOLID"); if (write_flags & WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR) DEBUG("\tFILE_DESCRIPTOR"); @@ -2898,7 +2891,7 @@ write_wim_part(WIMStruct *wim, /* Save previous header, then start initializing the new one. */ memcpy(&hdr_save, &wim->hdr, sizeof(struct wim_header)); - /* Set default integrity, pipable, and packed stream flags. */ + /* Set default integrity, pipable, and solid flags. */ if (!(write_flags & (WIMLIB_WRITE_FLAG_PIPABLE | WIMLIB_WRITE_FLAG_NOT_PIPABLE))) if (wim_is_pipable(wim)) { @@ -2914,11 +2907,11 @@ write_wim_part(WIMStruct *wim, } if ((write_flags & (WIMLIB_WRITE_FLAG_PIPABLE | - WIMLIB_WRITE_FLAG_PACK_STREAMS)) + WIMLIB_WRITE_FLAG_SOLID)) == (WIMLIB_WRITE_FLAG_PIPABLE | - WIMLIB_WRITE_FLAG_PACK_STREAMS)) + WIMLIB_WRITE_FLAG_SOLID)) { - ERROR("Cannot specify both PIPABLE and PACK_STREAMS!"); + ERROR("Cannot specify both PIPABLE and SOLID!"); return WIMLIB_ERR_INVALID_PARAM; } @@ -2929,9 +2922,9 @@ write_wim_part(WIMStruct *wim, wim->hdr.magic = WIM_MAGIC; /* Set appropriate version number. */ - if ((write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS) || + if ((write_flags & WIMLIB_WRITE_FLAG_SOLID) || wim->out_compression_type == WIMLIB_COMPRESSION_TYPE_LZMS) - wim->hdr.wim_version = WIM_VERSION_PACKED_STREAMS; + wim->hdr.wim_version = WIM_VERSION_SOLID; else wim->hdr.wim_version = WIM_VERSION_DEFAULT; @@ -3216,9 +3209,9 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, unsigned num_threads) if (wim_has_integrity_table(wim)) write_flags |= WIMLIB_WRITE_FLAG_CHECK_INTEGRITY; - /* Set WIM version if adding packed streams. */ - if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS) - wim->hdr.wim_version = WIM_VERSION_PACKED_STREAMS; + /* Set WIM version if writing solid resources. */ + if (write_flags & WIMLIB_WRITE_FLAG_SOLID) + wim->hdr.wim_version = WIM_VERSION_SOLID; /* Set additional flags for overwrite. */ write_flags |= WIMLIB_WRITE_FLAG_OVERWRITE |