X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fwrite.c;h=411fed9a349baa6f9c1411c9cae0cb8833bc4a14;hp=09b74ec16c8b273ef9c94360c75eaea428fdf36f;hb=f36b35733a7e0bdb6b3e1920d7e6893bd0dc4d55;hpb=c4f28b5ca129af1050dee36690d01e4941efa18f diff --git a/src/write.c b/src/write.c index 09b74ec1..411fed9a 100644 --- a/src/write.c +++ b/src/write.c @@ -34,7 +34,7 @@ # include #endif -#include "wimlib/compress_chunks.h" +#include "wimlib/chunk_compressor.h" #include "wimlib/endianness.h" #include "wimlib/error.h" #include "wimlib/file_io.h" @@ -58,17 +58,22 @@ # include #endif +/* wimlib internal flags used when writing resources. */ +#define WRITE_RESOURCE_FLAG_RECOMPRESS 0x00000001 +#define WRITE_RESOURCE_FLAG_PIPABLE 0x00000002 +#define WRITE_RESOURCE_FLAG_PACK_STREAMS 0x00000004 + static inline int write_flags_to_resource_flags(int write_flags) { int write_resource_flags = 0; if (write_flags & WIMLIB_WRITE_FLAG_RECOMPRESS) - write_resource_flags |= WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS; + write_resource_flags |= WRITE_RESOURCE_FLAG_RECOMPRESS; if (write_flags & WIMLIB_WRITE_FLAG_PIPABLE) - write_resource_flags |= WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE; + write_resource_flags |= WRITE_RESOURCE_FLAG_PIPABLE; if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS) - write_resource_flags |= WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS; + write_resource_flags |= WRITE_RESOURCE_FLAG_PACK_STREAMS; return write_resource_flags; } @@ -149,7 +154,7 @@ can_raw_copy(const struct wim_lookup_table_entry *lte, { const struct wim_resource_spec *rspec; - if (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS) + if (write_resource_flags & WRITE_RESOURCE_FLAG_RECOMPRESS) return false; if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE) @@ -160,7 +165,7 @@ can_raw_copy(const struct wim_lookup_table_entry *lte, rspec = lte->rspec; - if (rspec->is_pipable != !!(write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE)) + if (rspec->is_pipable != !!(write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE)) return false; @@ -171,8 +176,11 @@ can_raw_copy(const struct wim_lookup_table_entry *lte, rspec->wim->chunk_size == out_chunk_size); } + /* XXX: For compatibility, we can't allow multiple packed resources per + * WIM. */ +#if 0 if ((rspec->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) && - (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS)) + (write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)) { /* Packed resource: Such resources may contain multiple streams, * and in general only a subset of them need to be written. As @@ -193,6 +201,8 @@ can_raw_copy(const struct wim_lookup_table_entry *lte, return (write_size > rspec->uncompressed_size / 2); } +#endif + return false; } @@ -263,20 +273,6 @@ write_pwm_stream_header(const struct wim_lookup_table_entry *lte, return ret; } -#if 0 -static int -seek_and_truncate(struct filedes *out_fd, off_t offset) -{ - if (filedes_seek(out_fd, offset) == -1 || - ftruncate(out_fd->fd, offset)) - { - ERROR_WITH_ERRNO("Failed to truncate output WIM file"); - return WIMLIB_ERR_WRITE; - } - return 0; -} -#endif - struct write_streams_progress_data { wimlib_progress_func_t progress_func; union wimlib_progress_info progress; @@ -286,23 +282,27 @@ struct write_streams_progress_data { static void do_write_streams_progress(struct write_streams_progress_data *progress_data, - u64 size, - bool discarded, - struct wim_lookup_table_entry *cur_stream) + struct wim_lookup_table_entry *cur_stream, + u64 complete_size, + u32 complete_count, + bool discarded) { union wimlib_progress_info *progress = &progress_data->progress; bool new_wim_part; if (discarded) { - progress->write_streams.total_bytes -= size; + progress->write_streams.total_bytes -= complete_size; + progress->write_streams.total_streams -= complete_count; if (progress_data->next_progress != ~(uint64_t)0 && progress_data->next_progress > progress->write_streams.total_bytes) { progress_data->next_progress = progress->write_streams.total_bytes; } } else { - progress->write_streams.completed_bytes += size; + progress->write_streams.completed_bytes += complete_size; + progress->write_streams.completed_streams += complete_count; } + new_wim_part = false; if (cur_stream->resource_location == RESOURCE_IN_WIM && cur_stream->rspec->wim != progress_data->prev_wim_part) @@ -313,7 +313,7 @@ do_write_streams_progress(struct write_streams_progress_data *progress_data, } progress_data->prev_wim_part = cur_stream->rspec->wim; } - progress->write_streams.completed_streams++; + if (progress_data->progress_func && (progress->write_streams.completed_bytes >= progress_data->next_progress || new_wim_part)) @@ -372,20 +372,24 @@ struct write_streams_ctx { /* List of streams that currently have chunks being compressed. */ struct list_head pending_streams; + /* List of streams in the resource pack. Streams are moved here after + * @pending_streams only when writing a packed resource. */ + struct list_head pack_streams; + /* Set to true if the stream currently being read was a duplicate, and * therefore the corresponding stream entry needs to be freed once the * read finishes. (In this case we add the duplicate entry to * pending_streams rather than the entry being read.) */ bool stream_was_duplicate; - /* Current uncompressed offset in the resource being read. */ - u64 cur_read_res_offset; + /* Current uncompressed offset in the stream being read. */ + u64 cur_read_stream_offset; - /* Uncompressed size of the resource currently being read. */ - u64 cur_read_res_size; + /* Uncompressed size of the stream currently being read. */ + u64 cur_read_stream_size; - /* Current uncompressed offset in the resource being written. */ - u64 cur_write_res_offset; + /* Current uncompressed offset in the stream being written. */ + u64 cur_write_stream_offset; /* Uncompressed size of resource currently being written. */ u64 cur_write_res_size; @@ -405,16 +409,6 @@ struct write_streams_ctx { u64 chunks_start_offset; }; -static u64 -get_chunk_entry_size(u64 res_size, int write_resource_flags) -{ - if (res_size <= UINT32_MAX || - (write_resource_flags & WIM_RESHDR_FLAG_PACKED_STREAMS)) - return 4; - else - return 8; -} - /* Reserve space for the chunk table and prepare to accumulate the chunk table * in memory. */ static int @@ -432,7 +426,7 @@ begin_chunk_table(struct write_streams_ctx *ctx, u64 res_expected_size) * potentially decreasing the number of chunk entries needed. */ expected_num_chunks = DIV_ROUND_UP(res_expected_size, ctx->out_chunk_size); expected_num_chunk_entries = expected_num_chunks; - if (!(ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS)) + if (!(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)) expected_num_chunk_entries--; /* Make sure the chunk_csizes array is long enough to store the @@ -457,7 +451,7 @@ begin_chunk_table(struct write_streams_ctx *ctx, u64 res_expected_size) ctx->chunk_index = 0; - if (!(ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE)) { + if (!(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE)) { /* Reserve space for the chunk table in the output file. In the * case of packed resources this reserves the upper bound for * the needed space, not necessarily the exact space which will @@ -466,8 +460,9 @@ begin_chunk_table(struct write_streams_ctx *ctx, u64 res_expected_size) * are unknown. */ reserve_size = expected_num_chunk_entries * get_chunk_entry_size(res_expected_size, - ctx->write_resource_flags); - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS) + 0 != (ctx->write_resource_flags & + WIM_RESHDR_FLAG_PACKED_STREAMS)); + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) reserve_size += sizeof(struct alt_chunk_table_header_disk); memset(ctx->chunk_csizes, 0, reserve_size); ret = full_write(ctx->out_fd, ctx->chunk_csizes, reserve_size); @@ -493,7 +488,7 @@ begin_write_resource(struct write_streams_ctx *ctx, u64 res_expected_size) /* Output file descriptor is now positioned at the offset at which to * write the first chunk of the resource. */ ctx->chunks_start_offset = ctx->out_fd->offset; - ctx->cur_write_res_offset = 0; + ctx->cur_write_stream_offset = 0; ctx->cur_write_res_size = res_expected_size; return 0; } @@ -509,11 +504,12 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, actual_num_chunks = ctx->chunk_index; actual_num_chunk_entries = actual_num_chunks; - if (!(ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS)) + if (!(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)) actual_num_chunk_entries--; chunk_entry_size = get_chunk_entry_size(res_actual_size, - ctx->write_resource_flags); + 0 != (ctx->write_resource_flags & + WIM_RESHDR_FLAG_PACKED_STREAMS)); typedef le64 __attribute__((may_alias)) aliased_le64_t; typedef le32 __attribute__((may_alias)) aliased_le32_t; @@ -521,7 +517,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, if (chunk_entry_size == 4) { aliased_le32_t *entries = (aliased_le32_t*)ctx->chunk_csizes; - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { for (size_t i = 0; i < actual_num_chunk_entries; i++) entries[i] = cpu_to_le32(ctx->chunk_csizes[i]); } else { @@ -535,7 +531,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, } else { aliased_le64_t *entries = (aliased_le64_t*)ctx->chunk_csizes; - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { for (size_t i = 0; i < actual_num_chunk_entries; i++) entries[i] = cpu_to_le64(ctx->chunk_csizes[i]); } else { @@ -552,7 +548,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, u64 res_start_offset; u64 res_end_offset; - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) { ret = full_write(ctx->out_fd, ctx->chunk_csizes, chunk_table_size); if (ret) goto error; @@ -565,7 +561,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, chunk_table_offset = ctx->chunks_start_offset - chunk_table_size; - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { struct alt_chunk_table_header_disk hdr; hdr.res_usize = cpu_to_le64(res_actual_size); @@ -611,7 +607,8 @@ end_write_resource(struct write_streams_ctx *ctx, struct wim_reshdr *out_reshdr) u64 res_uncompressed_size; u64 res_offset_in_wim; - wimlib_assert(ctx->cur_write_res_size == ctx->cur_write_res_offset); + wimlib_assert(ctx->cur_write_stream_offset == ctx->cur_write_res_size || + (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)); res_uncompressed_size = ctx->cur_write_res_size; if (ctx->compressor) { @@ -641,8 +638,8 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, wimlib_assert(lte->size > 0); - ctx->cur_read_res_offset = 0; - ctx->cur_read_res_size = lte->size; + ctx->cur_read_stream_offset = 0; + ctx->cur_read_stream_size = lte->size; /* As an optimization, we allow some streams to be "unhashed", meaning * their SHA1 message digests are unknown. This is the case with @@ -680,12 +677,13 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, DEBUG("Discarding duplicate stream of " "length %"PRIu64, lte->size); do_write_streams_progress(&ctx->progress_data, - lte->size, true, lte); + lte, lte->size, + 1, true); list_del(<e->write_streams_list); list_del(<e->lookup_table_list); if (lte_new->will_be_in_output_wim) lte_new->out_refcnt += lte->out_refcnt; - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS) + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) ctx->cur_write_res_size -= lte->size; free_lookup_table_entry(lte); return BEGIN_STREAM_STATUS_SKIP_STREAM; @@ -713,6 +711,54 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, return 0; } +/* Rewrite a stream that was just written compressed as uncompressed instead. + * This function is optional, but if a stream did not compress to less than its + * original size, it might as well be written uncompressed. */ +static int +write_stream_uncompressed(struct wim_lookup_table_entry *lte, + struct filedes *out_fd) +{ + int ret; + u64 begin_offset = lte->out_reshdr.offset_in_wim; + u64 end_offset = out_fd->offset; + + if (filedes_seek(out_fd, begin_offset) == -1) + return 0; + + ret = extract_full_stream_to_fd(lte, out_fd); + if (ret) { + /* Error reading the uncompressed data. */ + if (out_fd->offset == begin_offset && + filedes_seek(out_fd, end_offset) != -1) + { + /* Nothing was actually written yet, and we successfully + * seeked to the end of the compressed resource, so + * don't issue a hard error; just keep the compressed + * resource instead. */ + WARNING("Recovered compressed stream of " + "size %"PRIu64", continuing on.", + lte->size); + return 0; + } + return ret; + } + + wimlib_assert(out_fd->offset - begin_offset == lte->size); + + if (out_fd->offset < end_offset && + 0 != ftruncate(out_fd->fd, out_fd->offset)) + { + ERROR_WITH_ERRNO("Can't truncate output file to " + "offset %"PRIu64, out_fd->offset); + return WIMLIB_ERR_WRITE; + } + + lte->out_reshdr.size_in_wim = lte->size; + lte->out_reshdr.flags &= ~(WIM_RESHDR_FLAG_COMPRESSED | + WIM_RESHDR_FLAG_PACKED_STREAMS); + return 0; +} + /* Write the next chunk of (typically compressed) data to the output WIM, * handling the writing of the chunk table. */ static int @@ -722,16 +768,18 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk, int ret; struct wim_lookup_table_entry *lte; + u32 completed_stream_count; + u32 completed_size; lte = list_entry(ctx->pending_streams.next, struct wim_lookup_table_entry, write_streams_list); - if (ctx->cur_write_res_offset == 0 && - !(ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS)) + if (ctx->cur_write_stream_offset == 0 && + !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)) { /* Starting to write a new stream in non-packed mode. */ - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) { int additional_reshdr_flags = 0; if (ctx->compressor != NULL) additional_reshdr_flags |= WIM_RESHDR_FLAG_COMPRESSED; @@ -757,7 +805,7 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk, /* If writing a pipable WIM, before the chunk data write a chunk * header that provides the compressed chunk size. */ - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) { struct pwm_chunk_hdr chunk_hdr = { .compressed_size = cpu_to_le32(csize), }; @@ -773,36 +821,83 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk, if (ret) goto error; - ctx->cur_write_res_offset += usize; + ctx->cur_write_stream_offset += usize; - do_write_streams_progress(&ctx->progress_data, - usize, false, lte); + completed_size = usize; + completed_stream_count = 0; + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { + /* Wrote chunk in packed mode. It may have finished multiple + * streams. */ + while (ctx->cur_write_stream_offset > lte->size) { + struct wim_lookup_table_entry *next; - if (ctx->cur_write_res_offset == ctx->cur_write_res_size && - !(ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS)) - { - struct wim_lookup_table_entry *lte; + ctx->cur_write_stream_offset -= lte->size; - lte = list_entry(ctx->pending_streams.next, - struct wim_lookup_table_entry, write_streams_list); - wimlib_assert(ctx->cur_write_res_offset == lte->size); + wimlib_assert(!list_is_singular(&ctx->pending_streams) && + !list_empty(&ctx->pending_streams)); + next = list_entry(lte->write_streams_list.next, + struct wim_lookup_table_entry, + write_streams_list); + list_move_tail(<e->write_streams_list, + &ctx->pack_streams); + lte = next; + completed_stream_count++; + } + if (ctx->cur_write_stream_offset == lte->size) { + ctx->cur_write_stream_offset = 0; + list_move_tail(<e->write_streams_list, + &ctx->pack_streams); + completed_stream_count++; + } + } else { + /* Wrote chunk in non-packed mode. It may have finished a + * stream. */ + if (ctx->cur_write_stream_offset == lte->size) { - /* Finished writing a stream in non-packed mode. */ + completed_stream_count++; - ret = end_write_resource(ctx, <e->out_reshdr); - if (ret) - return ret; + list_del(<e->write_streams_list); - wimlib_assert(lte->out_reshdr.uncompressed_size == lte->size); + wimlib_assert(ctx->cur_write_stream_offset == + ctx->cur_write_res_size); - lte->out_reshdr.flags = filter_resource_flags(lte->flags); - if (ctx->compressor != NULL) - lte->out_reshdr.flags |= WIM_RESHDR_FLAG_COMPRESSED; + ret = end_write_resource(ctx, <e->out_reshdr); + if (ret) + return ret; - list_del(<e->write_streams_list); - ctx->cur_write_res_offset = 0; + lte->out_reshdr.flags = filter_resource_flags(lte->flags); + if (ctx->compressor != NULL) + lte->out_reshdr.flags |= WIM_RESHDR_FLAG_COMPRESSED; + + if (ctx->compressor != NULL && + lte->out_reshdr.size_in_wim >= lte->out_reshdr.uncompressed_size && + !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) && + !(lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS)) + { + /* Stream did not compress to less than its original + * size. If we're not writing a pipable WIM (which + * could mean the output file descriptor is + * non-seekable), and the stream isn't located in a + * resource pack (which would make reading it again + * costly), truncate the file to the start of the stream + * and write it uncompressed instead. */ + DEBUG("Stream of size %"PRIu64" did not compress to " + "less than original size; writing uncompressed.", + lte->size); + ret = write_stream_uncompressed(lte, ctx->out_fd); + if (ret) + return ret; + } + wimlib_assert(lte->out_reshdr.uncompressed_size == lte->size); + + ctx->cur_write_stream_offset = 0; + } } + do_write_streams_progress(&ctx->progress_data, lte, + completed_size, completed_stream_count, + false); + return 0; error: @@ -851,7 +946,7 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) ret = write_chunk(ctx, chunk, size, size); if (ret) return ret; - ctx->cur_read_res_offset += size; + ctx->cur_read_stream_offset += size; return 0; } @@ -864,13 +959,13 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) const u8 *resized_chunk; size_t needed_chunk_size; - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { needed_chunk_size = ctx->out_chunk_size; } else { u64 res_bytes_remaining; - res_bytes_remaining = ctx->cur_read_res_size - - ctx->cur_read_res_offset; + res_bytes_remaining = ctx->cur_read_stream_size - + ctx->cur_read_stream_offset; needed_chunk_size = min(ctx->out_chunk_size, ctx->chunk_buf_filled + res_bytes_remaining); @@ -882,7 +977,7 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) /* No intermediate buffering needed. */ resized_chunk = chunkptr; chunkptr += needed_chunk_size; - ctx->cur_read_res_offset += needed_chunk_size; + ctx->cur_read_stream_offset += needed_chunk_size; } else { /* Intermediate buffering needed. */ size_t bytes_consumed; @@ -893,10 +988,8 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) memcpy(&ctx->chunk_buf[ctx->chunk_buf_filled], chunkptr, bytes_consumed); - resized_chunk = ctx->chunk_buf; - chunkptr += bytes_consumed; - ctx->cur_read_res_offset += bytes_consumed; + ctx->cur_read_stream_offset += bytes_consumed; ctx->chunk_buf_filled += bytes_consumed; if (ctx->chunk_buf_filled == needed_chunk_size) { resized_chunk = ctx->chunk_buf; @@ -924,7 +1017,7 @@ write_stream_end_read(struct wim_lookup_table_entry *lte, int status, void *_ctx { struct write_streams_ctx *ctx = _ctx; if (status == 0) - wimlib_assert(ctx->cur_read_res_offset == ctx->cur_read_res_size); + wimlib_assert(ctx->cur_read_stream_offset == ctx->cur_read_stream_size); if (ctx->stream_was_duplicate) { free_lookup_table_entry(lte); } else if (lte->unhashed && ctx->lookup_table != NULL) { @@ -1042,7 +1135,8 @@ write_raw_copy_resource(struct wim_resource_spec *in_rspec, out_offset_in_wim += sizeof(struct pwm_stream_hdr); } in_fd = &in_rspec->wim->in_fd; - while (cur_read_offset != end_read_offset) { + wimlib_assert(cur_read_offset != end_read_offset); + do { bytes_to_read = min(sizeof(buf), end_read_offset - cur_read_offset); @@ -1055,7 +1149,8 @@ write_raw_copy_resource(struct wim_resource_spec *in_rspec, return ret; cur_read_offset += bytes_to_read; - } + + } while (cur_read_offset != end_read_offset); list_for_each_entry(lte, &in_rspec->stream_list, rspec_node) { if (lte->will_be_in_output_wim) { @@ -1074,7 +1169,8 @@ write_raw_copy_resource(struct wim_resource_spec *in_rspec, * file being written. */ static int write_raw_copy_resources(struct list_head *raw_copy_resources, - struct filedes *out_fd) + struct filedes *out_fd, + struct write_streams_progress_data *progress_data) { struct wim_lookup_table_entry *lte; int ret; @@ -1083,6 +1179,8 @@ write_raw_copy_resources(struct list_head *raw_copy_resources, ret = write_raw_copy_resource(lte->rspec, out_fd); if (ret) return ret; + do_write_streams_progress(progress_data, lte, lte->size, + 1, false); } return 0; } @@ -1131,6 +1229,120 @@ remove_zero_length_streams(struct list_head *stream_list) } } +/* + * Write a list of streams to the output WIM file. + * + * @stream_list + * The list of streams to write, specifies a list of `struct + * wim_lookup_table_entry's linked by the 'write_streams_list' member. + * + * @out_fd + * The file descriptor, opened for writing, to which to write the streams. + * + * @write_resource_flags + * Flags to modify how the streams are written: + * + * WRITE_RESOURCE_FLAG_RECOMPRESS: + * Force compression of all resources, even if they could otherwise + * be re-used by caping the raw data, due to being located in a WIM + * file with compatible compression parameters. + * + * WRITE_RESOURCE_FLAG_PIPABLE: + * Write the resources in the wimlib-specific pipable format, and + * furthermore do so in such a way that no seeking backwards in + * @out_fd will be performed (so it may be a pipe, contrary to the + * default behavior). + * + * WRITE_RESOURCE_FLAG_PACK_STREAMS: + * Pack all the streams into a single resource rather than writing + * them in separate resources. This format is only valid if the + * WIM version number is WIM_VERSION_PACKED_STREAMS. This flag + * currently may not be combined with WRITE_RESOURCE_FLAG_PIPABLE. + * + * @out_ctype + * Compression format to use to write the output streams, specified as one + * of the WIMLIB_COMPRESSION_TYPE_* constants, excepting + * WIMLIB_COMPRESSION_TYPE_INVALID but including + * WIMLIB_COMPRESSION_TYPE_NONE. + * + * @out_chunk_size + * Chunk size to use to write the streams. It must be a valid chunk size + * for the specified compression format @out_ctype, unless @out_ctype is + * WIMLIB_COMPRESSION_TYPE_NONE, in which case this parameter is ignored. + * + * @num_threads + * Number of threads to use to compress data. If 0, a default number of + * threads will be chosen. The number of threads still may be decreased + * from the specified value if insufficient memory is detected. + * + * @lookup_table + * If on-the-fly deduplication of unhashed streams is desired, this + * parameter must be pointer to the lookup table for the WIMStruct on whose + * behalf the streams are being written. Otherwise, this parameter can be + * NULL. + * + * @filter_ctx + * If on-the-fly deduplication of unhashed streams is desired, this + * parameter can be a pointer to a context for stream filtering used to + * detect whether the duplicate stream has been hard-filtered or not. If + * no streams are hard-filtered or no streams are unhashed, this parameter + * can be NULL. + * + * @progress_func + * If non-NULL, a progress function that will be called periodically with + * WIMLIB_PROGRESS_MSG_WRITE_STREAMS messages. Note that on-the-fly + * deduplication of unhashed streams may result in the total bytes provided + * in the progress data to decrease from one message to the next. + * + * This function will write the streams in @stream_list to resources in + * consecutive positions in the output WIM file, or to a single packed resource + * if WRITE_RESOURCE_FLAG_PACK_STREAMS was specified in @write_resource_flags. + * In both cases, the @out_reshdr of the `struct wim_lookup_table_entry' for + * each stream written will be updated to specify its location, size, and flags + * in the output WIM. In the packed resource case, + * WIM_RESHDR_FLAG_PACKED_STREAMS shall be set in the @flags field of the + * @out_reshdr, and @out_res_offset_in_wim and @out_res_size_in_wim will also + * be set to the offset and size, respectively, in the output WIM of the full + * packed resource containing the corresponding stream. + * + * Each of the streams to write may be in any location supported by the + * resource-handling code (specifically, read_stream_list()), such as the + * contents of external file that has been logically added to the output WIM, or + * a stream in another WIM file that has been imported, or even stream in the + * "same" WIM file of which a modified copy is being written. In the case that + * a stream is already in a WIM file and uses compatible compression parameters, + * by default this function will re-use the raw data instead of decompressing + * it, then recompressing it; however, with WRITE_RESOURCE_FLAG_RECOMPRESS + * specified in @write_resource_flags, this is not done. + * + * As a further requirement, this function requires that the + * @will_be_in_output_wim member be set on all streams in @stream_list as well + * as any other streams not in @stream_list that will be in the output WIM file, + * but not on any other streams in the output WIM's lookup table or sharing a + * packed resource with a stream in @stream_list. Still furthermore, if + * on-the-fly deduplication of streams is possible, then all streams in + * @stream_list must also be linked by @lookup_table_list along with any other + * streams that have @will_be_in_output_wim set. + * + * This function handles on-the-fly deduplication of streams for which SHA1 + * message digests have not yet been calculated and it is therefore known + * whether such streams are already in @stream_list or in the WIM's lookup table + * at all. If @lookup_table is non-NULL, then each stream in @stream_list that + * has @unhashed set but not @unique_size set is checksummed immediately before + * it would otherwise be read for writing in order to determine if it is + * identical to another stream already being written or one that would be + * filtered out of the output WIM using stream_filtered() with the context + * @filter_ctx. Each such duplicate stream will be removed from @stream_list, its + * reference count transfered to the pre-existing duplicate stream, its memory + * freed, and will not be written. Alternatively, if a stream in @stream_list + * is a duplicate with any stream in @lookup_table that has not been marked for + * writing or would not be hard-filtered, it is freed and the pre-existing + * duplicate is written instead, taking ownership of the reference count and + * slot in the @lookup_table_list. + * + * Returns 0 if all streams were written successfully (or did not need to be + * written); otherwise a non-zero error code. + */ static int write_stream_list(struct list_head *stream_list, struct filedes *out_fd, @@ -1140,13 +1352,18 @@ write_stream_list(struct list_head *stream_list, unsigned num_threads, struct wim_lookup_table *lookup_table, struct filter_context *filter_ctx, - struct wimlib_lzx_context **comp_ctx, wimlib_progress_func_t progress_func) { int ret; struct write_streams_ctx ctx; struct list_head raw_copy_resources; + wimlib_assert((write_resource_flags & + (WRITE_RESOURCE_FLAG_PACK_STREAMS | + WRITE_RESOURCE_FLAG_PIPABLE)) != + (WRITE_RESOURCE_FLAG_PACK_STREAMS | + WRITE_RESOURCE_FLAG_PIPABLE)); + remove_zero_length_streams(stream_list); if (list_empty(stream_list)) { @@ -1171,13 +1388,16 @@ write_stream_list(struct list_head *stream_list, ctx.write_resource_flags = write_resource_flags; ctx.filter_ctx = filter_ctx; - if (out_chunk_size <= STACK_MAX) { - ctx.chunk_buf = alloca(out_chunk_size); - } else { - ctx.chunk_buf = MALLOC(out_chunk_size); - if (ctx.chunk_buf == NULL) { - ret = WIMLIB_ERR_NOMEM; - goto out_destroy_context; + if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) { + wimlib_assert(out_chunk_size != 0); + if (out_chunk_size <= STACK_MAX) { + ctx.chunk_buf = alloca(out_chunk_size); + } else { + ctx.chunk_buf = MALLOC(out_chunk_size); + if (ctx.chunk_buf == NULL) { + ret = WIMLIB_ERR_NOMEM; + goto out_destroy_context; + } } } ctx.chunk_buf_filled = 0; @@ -1213,12 +1433,7 @@ write_stream_list(struct list_head *stream_list, * bytes needing to be compressed is less 2000000 (heuristic value). */ if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) { - if (out_ctype == WIMLIB_COMPRESSION_TYPE_LZMS && - ctx.lookup_table != NULL) { - WARNING("LZMS compression not implemented; data will " - "actually be written uncompressed."); - } - + #ifdef ENABLE_MULTITHREADED_COMPRESSION if (ctx.num_bytes_to_compress >= 2000000) { ret = new_parallel_chunk_compressor(out_ctype, out_chunk_size, @@ -1229,17 +1444,11 @@ write_stream_list(struct list_head *stream_list, "(status %d)", ret); } } + #endif if (ctx.compressor == NULL) { - if (out_ctype == WIMLIB_COMPRESSION_TYPE_LZX) { - ret = wimlib_lzx_alloc_context(out_chunk_size, - NULL, - comp_ctx); - if (ret) - goto out_destroy_context; - } ret = new_serial_chunk_compressor(out_ctype, out_chunk_size, - *comp_ctx, &ctx.compressor); + &ctx.compressor); if (ret) goto out_destroy_context; } @@ -1254,13 +1463,14 @@ write_stream_list(struct list_head *stream_list, ctx.progress_data.progress.write_streams.num_threads); INIT_LIST_HEAD(&ctx.pending_streams); + INIT_LIST_HEAD(&ctx.pack_streams); if (ctx.progress_data.progress_func) { (*ctx.progress_data.progress_func)(WIMLIB_PROGRESS_MSG_WRITE_STREAMS, &ctx.progress_data.progress); } - if (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { ret = begin_write_resource(&ctx, ctx.num_bytes_to_compress); if (ret) goto out_destroy_context; @@ -1292,7 +1502,7 @@ write_stream_list(struct list_head *stream_list, if (ret) goto out_destroy_context; - if (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { struct wim_reshdr reshdr; struct wim_lookup_table_entry *lte; u64 offset_in_res; @@ -1307,7 +1517,7 @@ write_stream_list(struct list_head *stream_list, reshdr.uncompressed_size); offset_in_res = 0; - list_for_each_entry(lte, &ctx.pending_streams, write_streams_list) { + list_for_each_entry(lte, &ctx.pack_streams, write_streams_list) { lte->out_reshdr.size_in_wim = lte->size; lte->out_reshdr.flags = filter_resource_flags(lte->flags); lte->out_reshdr.flags |= WIM_RESHDR_FLAG_PACKED_STREAMS; @@ -1324,10 +1534,11 @@ write_stream_list(struct list_head *stream_list, out_write_raw_copy_resources: /* Copy any compressed resources for which the raw data can be reused * without decompression. */ - ret = write_raw_copy_resources(&raw_copy_resources, ctx.out_fd); + ret = write_raw_copy_resources(&raw_copy_resources, ctx.out_fd, + &ctx.progress_data); out_destroy_context: - if (out_chunk_size > STACK_MAX) + if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE && out_chunk_size > STACK_MAX) FREE(ctx.chunk_buf); FREE(ctx.chunk_csizes); if (ctx.compressor) @@ -1336,26 +1547,57 @@ out_destroy_context: return ret; } +static int +wim_write_stream_list(WIMStruct *wim, + struct list_head *stream_list, + int write_flags, + unsigned num_threads, + struct filter_context *filter_ctx, + wimlib_progress_func_t progress_func) +{ + int out_ctype; + u32 out_chunk_size; + int write_resource_flags; + + write_resource_flags = write_flags_to_resource_flags(write_flags); + + if (write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { + out_chunk_size = wim->out_pack_chunk_size; + out_ctype = wim->out_pack_compression_type; + } else { + out_chunk_size = wim->out_chunk_size; + out_ctype = wim->out_compression_type; + } + + return write_stream_list(stream_list, + &wim->out_fd, + write_resource_flags, + out_ctype, + out_chunk_size, + num_threads, + wim->lookup_table, + filter_ctx, + progress_func); +} + static int write_wim_resource(struct wim_lookup_table_entry *lte, struct filedes *out_fd, int out_ctype, u32 out_chunk_size, - int write_resource_flags, - struct wimlib_lzx_context **comp_ctx) + int write_resource_flags) { LIST_HEAD(stream_list); list_add(<e->write_streams_list, &stream_list); lte->will_be_in_output_wim = 1; return write_stream_list(&stream_list, out_fd, - write_resource_flags & ~WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS, + write_resource_flags & ~WRITE_RESOURCE_FLAG_PACK_STREAMS, out_ctype, out_chunk_size, 1, NULL, NULL, - comp_ctx, NULL); } @@ -1366,8 +1608,7 @@ write_wim_resource_from_buffer(const void *buf, size_t buf_size, u32 out_chunk_size, struct wim_reshdr *out_reshdr, u8 *hash, - int write_resource_flags, - struct wimlib_lzx_context **comp_ctx) + int write_resource_flags) { int ret; struct wim_lookup_table_entry *lte; @@ -1384,7 +1625,7 @@ write_wim_resource_from_buffer(const void *buf, size_t buf_size, lte->size = buf_size; lte->flags = reshdr_flags; - if (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE) { + if (write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) { sha1_buffer(buf, buf_size, lte->hash); lte->unhashed = 0; } else { @@ -1392,7 +1633,7 @@ write_wim_resource_from_buffer(const void *buf, size_t buf_size, } ret = write_wim_resource(lte, out_fd, out_ctype, out_chunk_size, - write_resource_flags, comp_ctx); + write_resource_flags); if (ret) goto out_free_lte; @@ -1791,16 +2032,12 @@ write_wim_streams(WIMStruct *wim, int image, int write_flags, } } - return write_stream_list(stream_list, - &wim->out_fd, - write_flags_to_resource_flags(write_flags), - wim->out_compression_type, - wim->out_chunk_size, - num_threads, - wim->lookup_table, - filter_ctx, - &wim->lzx_context, - progress_func); + return wim_write_stream_list(wim, + stream_list, + write_flags, + num_threads, + filter_ctx, + progress_func); } static int @@ -1819,7 +2056,7 @@ write_wim_metadata_resources(WIMStruct *wim, int image, int write_flags, write_resource_flags = write_flags_to_resource_flags(write_flags); - write_resource_flags &= ~WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS; + write_resource_flags &= ~WRITE_RESOURCE_FLAG_PACK_STREAMS; DEBUG("Writing metadata resources (offset=%"PRIu64")", wim->out_fd.offset); @@ -1859,8 +2096,7 @@ write_wim_metadata_resources(WIMStruct *wim, int image, int write_flags, &wim->out_fd, wim->out_compression_type, wim->out_chunk_size, - write_resource_flags, - &wim->lzx_context); + write_resource_flags); } if (ret) return ret; @@ -1980,8 +2216,7 @@ write_wim_lookup_table(WIMStruct *wim, int image, int write_flags, &wim->out_fd, wim->hdr.part_number, out_reshdr, - write_flags_to_resource_flags(write_flags), - &wim->lzx_context); + write_flags_to_resource_flags(write_flags)); } /* @@ -2280,7 +2515,7 @@ write_pipable_wim(WIMStruct *wim, int image, int write_flags, /* Write extra copy of the XML data. */ ret = write_wim_xml_data(wim, image, WIM_TOTALBYTES_OMIT, &xml_reshdr, - WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE); + WRITE_RESOURCE_FLAG_PIPABLE); if (ret) return ret; @@ -2360,9 +2595,6 @@ write_wim_part(WIMStruct *wim, if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS) DEBUG("\tPACK_STREAMS"); - if (write_flags & WIMLIB_WRITE_FLAG_NO_PACK_STREAMS) - DEBUG("\tNO_PACK_STREAMS"); - if (write_flags & WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR) DEBUG("\tFILE_DESCRIPTOR"); @@ -2411,12 +2643,6 @@ write_wim_part(WIMStruct *wim, WIMLIB_WRITE_FLAG_NOT_PIPABLE)) return WIMLIB_ERR_INVALID_PARAM; - if ((write_flags & (WIMLIB_WRITE_FLAG_PACK_STREAMS | - WIMLIB_WRITE_FLAG_NO_PACK_STREAMS)) - == (WIMLIB_WRITE_FLAG_PACK_STREAMS | - WIMLIB_WRITE_FLAG_NO_PACK_STREAMS)) - return WIMLIB_ERR_INVALID_PARAM; - /* Save previous header, then start initializing the new one. */ memcpy(&hdr_save, &wim->hdr, sizeof(struct wim_header)); @@ -2435,18 +2661,14 @@ write_wim_part(WIMStruct *wim, write_flags |= WIMLIB_WRITE_FLAG_CHECK_INTEGRITY; } - if (!(write_flags & (WIMLIB_WRITE_FLAG_PACK_STREAMS | - WIMLIB_WRITE_FLAG_NO_PACK_STREAMS))) - if (wim->hdr.wim_version == WIM_VERSION_PACKED_STREAMS) { - DEBUG("WIM version 3584; default to PACK_STREAMS."); - write_flags |= WIMLIB_WRITE_FLAG_PACK_STREAMS; - } - if ((write_flags & (WIMLIB_WRITE_FLAG_PIPABLE | WIMLIB_WRITE_FLAG_PACK_STREAMS)) == (WIMLIB_WRITE_FLAG_PIPABLE | WIMLIB_WRITE_FLAG_PACK_STREAMS)) + { + ERROR("Cannot specify both PIPABLE and PACK_STREAMS!"); return WIMLIB_ERR_INVALID_PARAM; + } /* Set appropriate magic number. */ if (write_flags & WIMLIB_WRITE_FLAG_PIPABLE) @@ -2455,7 +2677,8 @@ write_wim_part(WIMStruct *wim, wim->hdr.magic = WIM_MAGIC; /* Set appropriate version number. */ - if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS) + if ((write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS) || + wim->out_compression_type == WIMLIB_COMPRESSION_TYPE_LZMS) wim->hdr.wim_version = WIM_VERSION_PACKED_STREAMS; else wim->hdr.wim_version = WIM_VERSION_DEFAULT; @@ -2745,15 +2968,9 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, if (wim_has_integrity_table(wim)) write_flags |= WIMLIB_WRITE_FLAG_CHECK_INTEGRITY; - /* Set default packed flag. */ - if (!(write_flags & (WIMLIB_WRITE_FLAG_PACK_STREAMS | - WIMLIB_WRITE_FLAG_NO_PACK_STREAMS))) - { - if (wim->hdr.wim_version == WIM_VERSION_PACKED_STREAMS) - write_flags |= WIMLIB_WRITE_FLAG_PACK_STREAMS; - } else if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS) { + /* Set WIM version if adding packed streams. */ + if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS) wim->hdr.wim_version = WIM_VERSION_PACKED_STREAMS; - } /* Set additional flags for overwrite. */ write_flags |= WIMLIB_WRITE_FLAG_OVERWRITE | @@ -2838,16 +3055,12 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, goto out_restore_physical_hdr; } - ret = write_stream_list(&stream_list, - &wim->out_fd, - write_flags_to_resource_flags(write_flags), - wim->compression_type, - wim->chunk_size, - num_threads, - wim->lookup_table, - &filter_ctx, - &wim->lzx_context, - progress_func); + ret = wim_write_stream_list(wim, + &stream_list, + write_flags, + num_threads, + &filter_ctx, + progress_func); if (ret) goto out_truncate; @@ -2938,18 +3151,35 @@ overwrite_wim_via_tmpfile(WIMStruct *wim, int write_flags, return 0; } +/* Determine if the specified WIM file may be updated by appending in-place + * rather than writing and replacing it with an entirely new file. */ static bool can_overwrite_wim_inplace(const WIMStruct *wim, int write_flags) { + /* REBUILD flag forces full rebuild. */ if (write_flags & WIMLIB_WRITE_FLAG_REBUILD) return false; + /* Deletions cause full rebuild by default. */ if (wim->deletion_occurred && !(write_flags & WIMLIB_WRITE_FLAG_SOFT_DELETE)) return false; + /* Pipable WIMs cannot be updated in place, nor can a non-pipable WIM be + * turned into a pipable WIM in-place. */ if (wim_is_pipable(wim) || (write_flags & WIMLIB_WRITE_FLAG_PIPABLE)) return false; + /* wimlib allows multiple packs in a single WIM, but they don't seem to + * be compatible with WIMGAPI, so force all streams to be repacked if + * the WIM already may have contained a pack and PACK_STREAMS was + * requested. */ + if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS && + wim->hdr.wim_version == WIM_VERSION_PACKED_STREAMS) + return false; + + /* The default compression type and compression chunk size selected for + * the output WIM must be the same as those currently used for the WIM. + */ if (wim->compression_type != wim->out_compression_type) return false; if (wim->chunk_size != wim->out_chunk_size)