X-Git-Url: https://wimlib.net/git/?p=wimlib;a=blobdiff_plain;f=src%2Fwrite.c;h=bcf282ba3fe882126e929b208baa593c426d402a;hp=95365744d9b435bf754fb08c3c528fabea77697e;hb=1562bea589e9ef4039a04223bc9059f349316e3d;hpb=26c7f8bb32e4a32001d409f1693e0df016270ed5 diff --git a/src/write.c b/src/write.c index 95365744..bcf282ba 100644 --- a/src/write.c +++ b/src/write.c @@ -34,7 +34,7 @@ # include #endif -#include "wimlib/compress_chunks.h" +#include "wimlib/chunk_compressor.h" #include "wimlib/endianness.h" #include "wimlib/error.h" #include "wimlib/file_io.h" @@ -43,6 +43,9 @@ #include "wimlib/lookup_table.h" #include "wimlib/metadata.h" #include "wimlib/resource.h" +#ifdef __WIN32__ +# include "wimlib/win32.h" /* win32_rename_replacement() */ +#endif #include "wimlib/write.h" #include "wimlib/xml.h" @@ -55,17 +58,22 @@ # include #endif +/* wimlib internal flags used when writing resources. */ +#define WRITE_RESOURCE_FLAG_RECOMPRESS 0x00000001 +#define WRITE_RESOURCE_FLAG_PIPABLE 0x00000002 +#define WRITE_RESOURCE_FLAG_PACK_STREAMS 0x00000004 + static inline int write_flags_to_resource_flags(int write_flags) { int write_resource_flags = 0; if (write_flags & WIMLIB_WRITE_FLAG_RECOMPRESS) - write_resource_flags |= WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS; + write_resource_flags |= WRITE_RESOURCE_FLAG_RECOMPRESS; if (write_flags & WIMLIB_WRITE_FLAG_PIPABLE) - write_resource_flags |= WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE; + write_resource_flags |= WRITE_RESOURCE_FLAG_PIPABLE; if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS) - write_resource_flags |= WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS; + write_resource_flags |= WRITE_RESOURCE_FLAG_PACK_STREAMS; return write_resource_flags; } @@ -146,7 +154,7 @@ can_raw_copy(const struct wim_lookup_table_entry *lte, { const struct wim_resource_spec *rspec; - if (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_RECOMPRESS) + if (write_resource_flags & WRITE_RESOURCE_FLAG_RECOMPRESS) return false; if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE) @@ -157,7 +165,7 @@ can_raw_copy(const struct wim_lookup_table_entry *lte, rspec = lte->rspec; - if (rspec->is_pipable != !!(write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE)) + if (rspec->is_pipable != !!(write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE)) return false; @@ -168,8 +176,11 @@ can_raw_copy(const struct wim_lookup_table_entry *lte, rspec->wim->chunk_size == out_chunk_size); } + /* XXX: For compatibility, we can't allow multiple packed resources per + * WIM. */ +#if 0 if ((rspec->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) && - (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS)) + (write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)) { /* Packed resource: Such resources may contain multiple streams, * and in general only a subset of them need to be written. As @@ -190,6 +201,8 @@ can_raw_copy(const struct wim_lookup_table_entry *lte, return (write_size > rspec->uncompressed_size / 2); } +#endif + return false; } @@ -220,7 +233,7 @@ stream_set_out_reshdr_for_reuse(struct wim_lookup_table_entry *lte) lte->out_res_offset_in_wim = rspec->offset_in_wim; lte->out_res_size_in_wim = rspec->size_in_wim; - lte->out_res_uncompressed_size = rspec->uncompressed_size; + /*lte->out_res_uncompressed_size = rspec->uncompressed_size;*/ } else { wimlib_assert(!(lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS)); @@ -242,7 +255,7 @@ write_pwm_stream_header(const struct wim_lookup_table_entry *lte, u32 reshdr_flags; int ret; - stream_hdr.magic = PWM_STREAM_MAGIC; + stream_hdr.magic = cpu_to_le64(PWM_STREAM_MAGIC); stream_hdr.uncompressed_size = cpu_to_le64(lte->size); if (additional_reshdr_flags & PWM_RESHDR_FLAG_UNHASHED) { zero_out_hash(stream_hdr.hash); @@ -260,20 +273,6 @@ write_pwm_stream_header(const struct wim_lookup_table_entry *lte, return ret; } -#if 0 -static int -seek_and_truncate(struct filedes *out_fd, off_t offset) -{ - if (filedes_seek(out_fd, offset) == -1 || - ftruncate(out_fd->fd, offset)) - { - ERROR_WITH_ERRNO("Failed to truncate output WIM file"); - return WIMLIB_ERR_WRITE; - } - return 0; -} -#endif - struct write_streams_progress_data { wimlib_progress_func_t progress_func; union wimlib_progress_info progress; @@ -283,31 +282,32 @@ struct write_streams_progress_data { static void do_write_streams_progress(struct write_streams_progress_data *progress_data, - struct wim_lookup_table_entry *lte, - bool stream_discarded) + u64 size, + bool discarded, + struct wim_lookup_table_entry *cur_stream) { union wimlib_progress_info *progress = &progress_data->progress; bool new_wim_part; - if (stream_discarded) { - progress->write_streams.total_bytes -= lte->size; + if (discarded) { + progress->write_streams.total_bytes -= size; if (progress_data->next_progress != ~(uint64_t)0 && progress_data->next_progress > progress->write_streams.total_bytes) { progress_data->next_progress = progress->write_streams.total_bytes; } } else { - progress->write_streams.completed_bytes += lte->size; + progress->write_streams.completed_bytes += size; } new_wim_part = false; - if (lte->resource_location == RESOURCE_IN_WIM && - lte->rspec->wim != progress_data->prev_wim_part) + if (cur_stream->resource_location == RESOURCE_IN_WIM && + cur_stream->rspec->wim != progress_data->prev_wim_part) { if (progress_data->prev_wim_part) { new_wim_part = true; progress->write_streams.completed_parts++; } - progress_data->prev_wim_part = lte->rspec->wim; + progress_data->prev_wim_part = cur_stream->rspec->wim; } progress->write_streams.completed_streams++; if (progress_data->progress_func @@ -402,9 +402,10 @@ struct write_streams_ctx { }; static u64 -get_chunk_entry_size(u64 res_size) +get_chunk_entry_size(u64 res_size, int write_resource_flags) { - if (res_size <= UINT32_MAX) + if (res_size <= UINT32_MAX || + (write_resource_flags & WIM_RESHDR_FLAG_PACKED_STREAMS)) return 4; else return 8; @@ -427,7 +428,7 @@ begin_chunk_table(struct write_streams_ctx *ctx, u64 res_expected_size) * potentially decreasing the number of chunk entries needed. */ expected_num_chunks = DIV_ROUND_UP(res_expected_size, ctx->out_chunk_size); expected_num_chunk_entries = expected_num_chunks; - if (!(ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS)) + if (!(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)) expected_num_chunk_entries--; /* Make sure the chunk_csizes array is long enough to store the @@ -452,15 +453,17 @@ begin_chunk_table(struct write_streams_ctx *ctx, u64 res_expected_size) ctx->chunk_index = 0; - if (!(ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE)) { + if (!(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE)) { /* Reserve space for the chunk table in the output file. In the * case of packed resources this reserves the upper bound for * the needed space, not necessarily the exact space which will * prove to be needed. At this point, we just use @chunk_csizes * for a buffer of 0's because the actual compressed chunk sizes * are unknown. */ - reserve_size = expected_num_chunk_entries * get_chunk_entry_size(res_expected_size); - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS) + reserve_size = expected_num_chunk_entries * + get_chunk_entry_size(res_expected_size, + ctx->write_resource_flags); + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) reserve_size += sizeof(struct alt_chunk_table_header_disk); memset(ctx->chunk_csizes, 0, reserve_size); ret = full_write(ctx->out_fd, ctx->chunk_csizes, reserve_size); @@ -502,10 +505,11 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, actual_num_chunks = ctx->chunk_index; actual_num_chunk_entries = actual_num_chunks; - if (!(ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS)) + if (!(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)) actual_num_chunk_entries--; - chunk_entry_size = get_chunk_entry_size(res_actual_size); + chunk_entry_size = get_chunk_entry_size(res_actual_size, + ctx->write_resource_flags); typedef le64 __attribute__((may_alias)) aliased_le64_t; typedef le32 __attribute__((may_alias)) aliased_le32_t; @@ -513,7 +517,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, if (chunk_entry_size == 4) { aliased_le32_t *entries = (aliased_le32_t*)ctx->chunk_csizes; - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { for (size_t i = 0; i < actual_num_chunk_entries; i++) entries[i] = cpu_to_le32(ctx->chunk_csizes[i]); } else { @@ -527,7 +531,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, } else { aliased_le64_t *entries = (aliased_le64_t*)ctx->chunk_csizes; - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { for (size_t i = 0; i < actual_num_chunk_entries; i++) entries[i] = cpu_to_le64(ctx->chunk_csizes[i]); } else { @@ -542,9 +546,9 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, size_t chunk_table_size = actual_num_chunk_entries * chunk_entry_size; u64 res_start_offset; - u64 res_end_offset = ctx->out_fd->offset; + u64 res_end_offset; - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) { ret = full_write(ctx->out_fd, ctx->chunk_csizes, chunk_table_size); if (ret) goto error; @@ -557,7 +561,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, chunk_table_offset = ctx->chunks_start_offset - chunk_table_size; - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { struct alt_chunk_table_header_disk hdr; hdr.res_usize = cpu_to_le64(res_actual_size); @@ -651,7 +655,6 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, if (ctx->lookup_table != NULL && lte->unhashed && !lte->unique_size) { wimlib_assert(!is_partial_res); - wimlib_assert(ctx->lookup_table != NULL); struct wim_lookup_table_entry *lte_new; @@ -673,12 +676,12 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, DEBUG("Discarding duplicate stream of " "length %"PRIu64, lte->size); do_write_streams_progress(&ctx->progress_data, - lte, true); + lte->size, true, lte); list_del(<e->write_streams_list); list_del(<e->lookup_table_list); if (lte_new->will_be_in_output_wim) lte_new->out_refcnt += lte->out_refcnt; - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS) + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) ctx->cur_write_res_size -= lte->size; free_lookup_table_entry(lte); return BEGIN_STREAM_STATUS_SKIP_STREAM; @@ -706,6 +709,54 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, return 0; } +/* Rewrite a stream that was just written compressed as uncompressed instead. + * This function is optional, but if a stream did not compress to less than its + * original size, it might as well be written uncompressed. */ +static int +write_stream_uncompressed(struct wim_lookup_table_entry *lte, + struct filedes *out_fd) +{ + int ret; + u64 begin_offset = lte->out_reshdr.offset_in_wim; + u64 end_offset = out_fd->offset; + + if (filedes_seek(out_fd, begin_offset) == -1) + return 0; + + ret = extract_full_stream_to_fd(lte, out_fd); + if (ret) { + /* Error reading the uncompressed data. */ + if (out_fd->offset == begin_offset && + filedes_seek(out_fd, end_offset) != -1) + { + /* Nothing was actually written yet, and we successfully + * seeked to the end of the compressed resource, so + * don't issue a hard error; just keep the compressed + * resource instead. */ + WARNING("Recovered compressed stream of " + "size %"PRIu64", continuing on.", + lte->size); + return 0; + } + return ret; + } + + wimlib_assert(out_fd->offset - begin_offset == lte->size); + + if (out_fd->offset < end_offset && + 0 != ftruncate(out_fd->fd, out_fd->offset)) + { + ERROR_WITH_ERRNO("Can't truncate output file to " + "offset %"PRIu64, out_fd->offset); + return WIMLIB_ERR_WRITE; + } + + lte->out_reshdr.size_in_wim = lte->size; + lte->out_reshdr.flags &= ~(WIM_RESHDR_FLAG_COMPRESSED | + WIM_RESHDR_FLAG_PACKED_STREAMS); + return 0; +} + /* Write the next chunk of (typically compressed) data to the output WIM, * handling the writing of the chunk table. */ static int @@ -714,17 +765,17 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk, { int ret; + struct wim_lookup_table_entry *lte; + + lte = list_entry(ctx->pending_streams.next, + struct wim_lookup_table_entry, write_streams_list); + if (ctx->cur_write_res_offset == 0 && - !(ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS)) + !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)) { /* Starting to write a new stream in non-packed mode. */ - struct wim_lookup_table_entry *lte; - - lte = list_entry(ctx->pending_streams.next, - struct wim_lookup_table_entry, write_streams_list); - - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) { int additional_reshdr_flags = 0; if (ctx->compressor != NULL) additional_reshdr_flags |= WIM_RESHDR_FLAG_COMPRESSED; @@ -750,7 +801,7 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk, /* If writing a pipable WIM, before the chunk data write a chunk * header that provides the compressed chunk size. */ - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) { struct pwm_chunk_hdr chunk_hdr = { .compressed_size = cpu_to_le32(csize), }; @@ -768,29 +819,46 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk, ctx->cur_write_res_offset += usize; + do_write_streams_progress(&ctx->progress_data, + usize, false, lte); + if (ctx->cur_write_res_offset == ctx->cur_write_res_size && - !(ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS)) + !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)) { - struct wim_lookup_table_entry *lte; - - lte = list_entry(ctx->pending_streams.next, - struct wim_lookup_table_entry, write_streams_list); wimlib_assert(ctx->cur_write_res_offset == lte->size); /* Finished writing a stream in non-packed mode. */ - do_write_streams_progress(&ctx->progress_data, lte, false); - ret = end_write_resource(ctx, <e->out_reshdr); if (ret) return ret; - wimlib_assert(lte->out_reshdr.uncompressed_size == lte->size); - lte->out_reshdr.flags = filter_resource_flags(lte->flags); if (ctx->compressor != NULL) lte->out_reshdr.flags |= WIM_RESHDR_FLAG_COMPRESSED; + if (ctx->compressor != NULL && + lte->out_reshdr.size_in_wim >= lte->out_reshdr.uncompressed_size && + !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) && + !(lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS)) + { + /* Stream did not compress to less than its original + * size. If we're not writing a pipable WIM (which + * could mean the output file descriptor is + * non-seekable), and the stream isn't located in a + * resource pack (which would make reading it again + * costly), truncate the file to the start of the stream + * and write it uncompressed instead. */ + DEBUG("Stream of size %"PRIu64" did not compress to " + "less than original size; writing uncompressed.", + lte->size); + ret = write_stream_uncompressed(lte, ctx->out_fd); + if (ret) + return ret; + } + + wimlib_assert(lte->out_reshdr.uncompressed_size == lte->size); + list_del(<e->write_streams_list); ctx->cur_write_res_offset = 0; } @@ -856,7 +924,7 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) const u8 *resized_chunk; size_t needed_chunk_size; - if (ctx->write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { needed_chunk_size = ctx->out_chunk_size; } else { u64 res_bytes_remaining; @@ -885,8 +953,6 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) memcpy(&ctx->chunk_buf[ctx->chunk_buf_filled], chunkptr, bytes_consumed); - resized_chunk = ctx->chunk_buf; - chunkptr += bytes_consumed; ctx->cur_read_res_offset += bytes_consumed; ctx->chunk_buf_filled += bytes_consumed; @@ -1034,7 +1100,8 @@ write_raw_copy_resource(struct wim_resource_spec *in_rspec, out_offset_in_wim += sizeof(struct pwm_stream_hdr); } in_fd = &in_rspec->wim->in_fd; - while (cur_read_offset != end_read_offset) { + wimlib_assert(cur_read_offset != end_read_offset); + do { bytes_to_read = min(sizeof(buf), end_read_offset - cur_read_offset); @@ -1047,7 +1114,8 @@ write_raw_copy_resource(struct wim_resource_spec *in_rspec, return ret; cur_read_offset += bytes_to_read; - } + + } while (cur_read_offset != end_read_offset); list_for_each_entry(lte, &in_rspec->stream_list, rspec_node) { if (lte->will_be_in_output_wim) { @@ -1066,7 +1134,8 @@ write_raw_copy_resource(struct wim_resource_spec *in_rspec, * file being written. */ static int write_raw_copy_resources(struct list_head *raw_copy_resources, - struct filedes *out_fd) + struct filedes *out_fd, + struct write_streams_progress_data *progress_data) { struct wim_lookup_table_entry *lte; int ret; @@ -1075,6 +1144,7 @@ write_raw_copy_resources(struct list_head *raw_copy_resources, ret = write_raw_copy_resource(lte->rspec, out_fd); if (ret) return ret; + do_write_streams_progress(progress_data, lte->size, false, lte); } return 0; } @@ -1123,6 +1193,120 @@ remove_zero_length_streams(struct list_head *stream_list) } } +/* + * Write a list of streams to the output WIM file. + * + * @stream_list + * The list of streams to write, specifies a list of `struct + * wim_lookup_table_entry's linked by the 'write_streams_list' member. + * + * @out_fd + * The file descriptor, opened for writing, to which to write the streams. + * + * @write_resource_flags + * Flags to modify how the streams are written: + * + * WRITE_RESOURCE_FLAG_RECOMPRESS: + * Force compression of all resources, even if they could otherwise + * be re-used by caping the raw data, due to being located in a WIM + * file with compatible compression parameters. + * + * WRITE_RESOURCE_FLAG_PIPABLE: + * Write the resources in the wimlib-specific pipable format, and + * furthermore do so in such a way that no seeking backwards in + * @out_fd will be performed (so it may be a pipe, contrary to the + * default behavior). + * + * WRITE_RESOURCE_FLAG_PACK_STREAMS: + * Pack all the streams into a single resource rather than writing + * them in separate resources. This format is only valid if the + * WIM version number is WIM_VERSION_PACKED_STREAMS. This flag + * currently may not be combined with WRITE_RESOURCE_FLAG_PIPABLE. + * + * @out_ctype + * Compression format to use to write the output streams, specified as one + * of the WIMLIB_COMPRESSION_TYPE_* constants, excepting + * WIMLIB_COMPRESSION_TYPE_INVALID but including + * WIMLIB_COMPRESSION_TYPE_NONE. + * + * @out_chunk_size + * Chunk size to use to write the streams. It must be a valid chunk size + * for the specified compression format @out_ctype, unless @out_ctype is + * WIMLIB_COMPRESSION_TYPE_NONE, in which case this parameter is ignored. + * + * @num_threads + * Number of threads to use to compress data. If 0, a default number of + * threads will be chosen. The number of threads still may be decreased + * from the specified value if insufficient memory is detected. + * + * @lookup_table + * If on-the-fly deduplication of unhashed streams is desired, this + * parameter must be pointer to the lookup table for the WIMStruct on whose + * behalf the streams are being written. Otherwise, this parameter can be + * NULL. + * + * @filter_ctx + * If on-the-fly deduplication of unhashed streams is desired, this + * parameter can be a pointer to a context for stream filtering used to + * detect whether the duplicate stream has been hard-filtered or not. If + * no streams are hard-filtered or no streams are unhashed, this parameter + * can be NULL. + * + * @progress_func + * If non-NULL, a progress function that will be called periodically with + * WIMLIB_PROGRESS_MSG_WRITE_STREAMS messages. Note that on-the-fly + * deduplication of unhashed streams may result in the total bytes provided + * in the progress data to decrease from one message to the next. + * + * This function will write the streams in @stream_list to resources in + * consecutive positions in the output WIM file, or to a single packed resource + * if WRITE_RESOURCE_FLAG_PACK_STREAMS was specified in @write_resource_flags. + * In both cases, the @out_reshdr of the `struct wim_lookup_table_entry' for + * each stream written will be updated to specify its location, size, and flags + * in the output WIM. In the packed resource case, + * WIM_RESHDR_FLAG_PACKED_STREAMS shall be set in the @flags field of the + * @out_reshdr, and @out_res_offset_in_wim and @out_res_size_in_wim will also + * be set to the offset and size, respectively, in the output WIM of the full + * packed resource containing the corresponding stream. + * + * Each of the streams to write may be in any location supported by the + * resource-handling code (specifically, read_stream_list()), such as the + * contents of external file that has been logically added to the output WIM, or + * a stream in another WIM file that has been imported, or even stream in the + * "same" WIM file of which a modified copy is being written. In the case that + * a stream is already in a WIM file and uses compatible compression parameters, + * by default this function will re-use the raw data instead of decompressing + * it, then recompressing it; however, with WRITE_RESOURCE_FLAG_RECOMPRESS + * specified in @write_resource_flags, this is not done. + * + * As a further requirement, this function requires that the + * @will_be_in_output_wim member be set on all streams in @stream_list as well + * as any other streams not in @stream_list that will be in the output WIM file, + * but not on any other streams in the output WIM's lookup table or sharing a + * packed resource with a stream in @stream_list. Still furthermore, if + * on-the-fly deduplication of streams is possible, then all streams in + * @stream_list must also be linked by @lookup_table_list along with any other + * streams that have @will_be_in_output_wim set. + * + * This function handles on-the-fly deduplication of streams for which SHA1 + * message digests have not yet been calculated and it is therefore known + * whether such streams are already in @stream_list or in the WIM's lookup table + * at all. If @lookup_table is non-NULL, then each stream in @stream_list that + * has @unhashed set but not @unique_size set is checksummed immediately before + * it would otherwise be read for writing in order to determine if it is + * identical to another stream already being written or one that would be + * filtered out of the output WIM using stream_filtered() with the context + * @filter_ctx. Each such duplicate stream will be removed from @stream_list, its + * reference count transfered to the pre-existing duplicate stream, its memory + * freed, and will not be written. Alternatively, if a stream in @stream_list + * is a duplicate with any stream in @lookup_table that has not been marked for + * writing or would not be hard-filtered, it is freed and the pre-existing + * duplicate is written instead, taking ownership of the reference count and + * slot in the @lookup_table_list. + * + * Returns 0 if all streams were written successfully (or did not need to be + * written); otherwise a non-zero error code. + */ static int write_stream_list(struct list_head *stream_list, struct filedes *out_fd, @@ -1132,13 +1316,18 @@ write_stream_list(struct list_head *stream_list, unsigned num_threads, struct wim_lookup_table *lookup_table, struct filter_context *filter_ctx, - struct wimlib_lzx_context **comp_ctx, wimlib_progress_func_t progress_func) { int ret; struct write_streams_ctx ctx; struct list_head raw_copy_resources; + wimlib_assert((write_resource_flags & + (WRITE_RESOURCE_FLAG_PACK_STREAMS | + WRITE_RESOURCE_FLAG_PIPABLE)) != + (WRITE_RESOURCE_FLAG_PACK_STREAMS | + WRITE_RESOURCE_FLAG_PIPABLE)); + remove_zero_length_streams(stream_list); if (list_empty(stream_list)) { @@ -1163,13 +1352,16 @@ write_stream_list(struct list_head *stream_list, ctx.write_resource_flags = write_resource_flags; ctx.filter_ctx = filter_ctx; - if (out_chunk_size <= STACK_MAX) { - ctx.chunk_buf = alloca(out_chunk_size); - } else { - ctx.chunk_buf = MALLOC(out_chunk_size); - if (ctx.chunk_buf == NULL) { - ret = WIMLIB_ERR_NOMEM; - goto out_destroy_context; + if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) { + wimlib_assert(out_chunk_size != 0); + if (out_chunk_size <= STACK_MAX) { + ctx.chunk_buf = alloca(out_chunk_size); + } else { + ctx.chunk_buf = MALLOC(out_chunk_size); + if (ctx.chunk_buf == NULL) { + ret = WIMLIB_ERR_NOMEM; + goto out_destroy_context; + } } } ctx.chunk_buf_filled = 0; @@ -1205,6 +1397,7 @@ write_stream_list(struct list_head *stream_list, * bytes needing to be compressed is less 2000000 (heuristic value). */ if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) { + #ifdef ENABLE_MULTITHREADED_COMPRESSION if (ctx.num_bytes_to_compress >= 2000000) { ret = new_parallel_chunk_compressor(out_ctype, out_chunk_size, @@ -1215,17 +1408,11 @@ write_stream_list(struct list_head *stream_list, "(status %d)", ret); } } + #endif if (ctx.compressor == NULL) { - if (out_ctype == WIMLIB_COMPRESSION_TYPE_LZX) { - ret = wimlib_lzx_alloc_context(out_chunk_size, - NULL, - comp_ctx); - if (ret) - goto out_destroy_context; - } ret = new_serial_chunk_compressor(out_ctype, out_chunk_size, - *comp_ctx, &ctx.compressor); + &ctx.compressor); if (ret) goto out_destroy_context; } @@ -1241,7 +1428,12 @@ write_stream_list(struct list_head *stream_list, INIT_LIST_HEAD(&ctx.pending_streams); - if (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (ctx.progress_data.progress_func) { + (*ctx.progress_data.progress_func)(WIMLIB_PROGRESS_MSG_WRITE_STREAMS, + &ctx.progress_data.progress); + } + + if (write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { ret = begin_write_resource(&ctx, ctx.num_bytes_to_compress); if (ret) goto out_destroy_context; @@ -1273,7 +1465,7 @@ write_stream_list(struct list_head *stream_list, if (ret) goto out_destroy_context; - if (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS) { + if (write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { struct wim_reshdr reshdr; struct wim_lookup_table_entry *lte; u64 offset_in_res; @@ -1296,7 +1488,7 @@ write_stream_list(struct list_head *stream_list, lte->out_reshdr.offset_in_wim = offset_in_res; lte->out_res_offset_in_wim = reshdr.offset_in_wim; lte->out_res_size_in_wim = reshdr.size_in_wim; - lte->out_res_uncompressed_size = reshdr.uncompressed_size; + /*lte->out_res_uncompressed_size = reshdr.uncompressed_size;*/ offset_in_res += lte->size; } wimlib_assert(offset_in_res == reshdr.uncompressed_size); @@ -1305,10 +1497,11 @@ write_stream_list(struct list_head *stream_list, out_write_raw_copy_resources: /* Copy any compressed resources for which the raw data can be reused * without decompression. */ - ret = write_raw_copy_resources(&raw_copy_resources, ctx.out_fd); + ret = write_raw_copy_resources(&raw_copy_resources, ctx.out_fd, + &ctx.progress_data); out_destroy_context: - if (out_chunk_size > STACK_MAX) + if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE && out_chunk_size > STACK_MAX) FREE(ctx.chunk_buf); FREE(ctx.chunk_csizes); if (ctx.compressor) @@ -1317,26 +1510,57 @@ out_destroy_context: return ret; } +static int +wim_write_stream_list(WIMStruct *wim, + struct list_head *stream_list, + int write_flags, + unsigned num_threads, + struct filter_context *filter_ctx, + wimlib_progress_func_t progress_func) +{ + int out_ctype; + u32 out_chunk_size; + int write_resource_flags; + + write_resource_flags = write_flags_to_resource_flags(write_flags); + + if (write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { + out_chunk_size = wim->out_pack_chunk_size; + out_ctype = wim->out_pack_compression_type; + } else { + out_chunk_size = wim->out_chunk_size; + out_ctype = wim->out_compression_type; + } + + return write_stream_list(stream_list, + &wim->out_fd, + write_resource_flags, + out_ctype, + out_chunk_size, + num_threads, + wim->lookup_table, + filter_ctx, + progress_func); +} + static int write_wim_resource(struct wim_lookup_table_entry *lte, struct filedes *out_fd, int out_ctype, u32 out_chunk_size, - int write_resource_flags, - struct wimlib_lzx_context **comp_ctx) + int write_resource_flags) { LIST_HEAD(stream_list); list_add(<e->write_streams_list, &stream_list); lte->will_be_in_output_wim = 1; return write_stream_list(&stream_list, out_fd, - write_resource_flags & ~WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS, + write_resource_flags & ~WRITE_RESOURCE_FLAG_PACK_STREAMS, out_ctype, out_chunk_size, 1, NULL, NULL, - comp_ctx, NULL); } @@ -1347,8 +1571,7 @@ write_wim_resource_from_buffer(const void *buf, size_t buf_size, u32 out_chunk_size, struct wim_reshdr *out_reshdr, u8 *hash, - int write_resource_flags, - struct wimlib_lzx_context **comp_ctx) + int write_resource_flags) { int ret; struct wim_lookup_table_entry *lte; @@ -1365,7 +1588,7 @@ write_wim_resource_from_buffer(const void *buf, size_t buf_size, lte->size = buf_size; lte->flags = reshdr_flags; - if (write_resource_flags & WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE) { + if (write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) { sha1_buffer(buf, buf_size, lte->hash); lte->unhashed = 0; } else { @@ -1373,7 +1596,7 @@ write_wim_resource_from_buffer(const void *buf, size_t buf_size, } ret = write_wim_resource(lte, out_fd, out_ctype, out_chunk_size, - write_resource_flags, comp_ctx); + write_resource_flags); if (ret) goto out_free_lte; @@ -1772,16 +1995,12 @@ write_wim_streams(WIMStruct *wim, int image, int write_flags, } } - return write_stream_list(stream_list, - &wim->out_fd, - write_flags_to_resource_flags(write_flags), - wim->out_compression_type, - wim->out_chunk_size, - num_threads, - wim->lookup_table, - filter_ctx, - &wim->lzx_context, - progress_func); + return wim_write_stream_list(wim, + stream_list, + write_flags, + num_threads, + filter_ctx, + progress_func); } static int @@ -1800,7 +2019,7 @@ write_wim_metadata_resources(WIMStruct *wim, int image, int write_flags, write_resource_flags = write_flags_to_resource_flags(write_flags); - write_resource_flags &= ~WIMLIB_WRITE_RESOURCE_FLAG_PACK_STREAMS; + write_resource_flags &= ~WRITE_RESOURCE_FLAG_PACK_STREAMS; DEBUG("Writing metadata resources (offset=%"PRIu64")", wim->out_fd.offset); @@ -1840,8 +2059,7 @@ write_wim_metadata_resources(WIMStruct *wim, int image, int write_flags, &wim->out_fd, wim->out_compression_type, wim->out_chunk_size, - write_resource_flags, - &wim->lzx_context); + write_resource_flags); } if (ret) return ret; @@ -1901,7 +2119,7 @@ cmp_streams_by_out_rspec(const void *p1, const void *p2) if (lte2->out_reshdr.flags & WIM_RESHDR_FLAG_PACKED_STREAMS) return -1; } - return cmp_u64(lte1->out_reshdr.offset_in_wim, + return cmp_u64(lte1->out_reshdr.offset_in_wim, lte2->out_reshdr.offset_in_wim); } @@ -1961,8 +2179,7 @@ write_wim_lookup_table(WIMStruct *wim, int image, int write_flags, &wim->out_fd, wim->hdr.part_number, out_reshdr, - write_flags_to_resource_flags(write_flags), - &wim->lzx_context); + write_flags_to_resource_flags(write_flags)); } /* @@ -2261,7 +2478,7 @@ write_pipable_wim(WIMStruct *wim, int image, int write_flags, /* Write extra copy of the XML data. */ ret = write_wim_xml_data(wim, image, WIM_TOTALBYTES_OMIT, &xml_reshdr, - WIMLIB_WRITE_RESOURCE_FLAG_PIPABLE); + WRITE_RESOURCE_FLAG_PIPABLE); if (ret) return ret; @@ -2341,9 +2558,6 @@ write_wim_part(WIMStruct *wim, if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS) DEBUG("\tPACK_STREAMS"); - if (write_flags & WIMLIB_WRITE_FLAG_NO_PACK_STREAMS) - DEBUG("\tNO_PACK_STREAMS"); - if (write_flags & WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR) DEBUG("\tFILE_DESCRIPTOR"); @@ -2392,12 +2606,6 @@ write_wim_part(WIMStruct *wim, WIMLIB_WRITE_FLAG_NOT_PIPABLE)) return WIMLIB_ERR_INVALID_PARAM; - if ((write_flags & (WIMLIB_WRITE_FLAG_PACK_STREAMS | - WIMLIB_WRITE_FLAG_NO_PACK_STREAMS)) - == (WIMLIB_WRITE_FLAG_PACK_STREAMS | - WIMLIB_WRITE_FLAG_NO_PACK_STREAMS)) - return WIMLIB_ERR_INVALID_PARAM; - /* Save previous header, then start initializing the new one. */ memcpy(&hdr_save, &wim->hdr, sizeof(struct wim_header)); @@ -2416,18 +2624,14 @@ write_wim_part(WIMStruct *wim, write_flags |= WIMLIB_WRITE_FLAG_CHECK_INTEGRITY; } - if (!(write_flags & (WIMLIB_WRITE_FLAG_PACK_STREAMS | - WIMLIB_WRITE_FLAG_NO_PACK_STREAMS))) - if (wim->hdr.wim_version == WIM_VERSION_PACKED_STREAMS) { - DEBUG("WIM version 3584; default to PACK_STREAMS."); - write_flags |= WIMLIB_WRITE_FLAG_PACK_STREAMS; - } - if ((write_flags & (WIMLIB_WRITE_FLAG_PIPABLE | WIMLIB_WRITE_FLAG_PACK_STREAMS)) == (WIMLIB_WRITE_FLAG_PIPABLE | WIMLIB_WRITE_FLAG_PACK_STREAMS)) + { + ERROR("Cannot specify both PIPABLE and PACK_STREAMS!"); return WIMLIB_ERR_INVALID_PARAM; + } /* Set appropriate magic number. */ if (write_flags & WIMLIB_WRITE_FLAG_PIPABLE) @@ -2436,7 +2640,8 @@ write_wim_part(WIMStruct *wim, wim->hdr.magic = WIM_MAGIC; /* Set appropriate version number. */ - if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS) + if ((write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS) || + wim->out_compression_type == WIMLIB_COMPRESSION_TYPE_LZMS) wim->hdr.wim_version = WIM_VERSION_PACKED_STREAMS; else wim->hdr.wim_version = WIM_VERSION_DEFAULT; @@ -2717,17 +2922,18 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, DEBUG("Overwriting `%"TS"' in-place", wim->filename); + /* Save original header so it can be restored in case of error */ + memcpy(&hdr_save, &wim->hdr, sizeof(struct wim_header)); + /* Set default integrity flag. */ if (!(write_flags & (WIMLIB_WRITE_FLAG_CHECK_INTEGRITY | WIMLIB_WRITE_FLAG_NO_CHECK_INTEGRITY))) if (wim_has_integrity_table(wim)) write_flags |= WIMLIB_WRITE_FLAG_CHECK_INTEGRITY; - /* Set default packed flag. */ - if (!(write_flags & (WIMLIB_WRITE_FLAG_PACK_STREAMS | - WIMLIB_WRITE_FLAG_NO_PACK_STREAMS))) - if (wim->hdr.wim_version == WIM_VERSION_PACKED_STREAMS) - write_flags |= WIMLIB_WRITE_FLAG_PACK_STREAMS; + /* Set WIM version if adding packed streams. */ + if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS) + wim->hdr.wim_version = WIM_VERSION_PACKED_STREAMS; /* Set additional flags for overwrite. */ write_flags |= WIMLIB_WRITE_FLAG_OVERWRITE | @@ -2744,12 +2950,14 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, if (wim->hdr.integrity_table_reshdr.offset_in_wim != 0 && wim->hdr.integrity_table_reshdr.offset_in_wim < old_xml_end) { WARNING("Didn't expect the integrity table to be before the XML data"); - return WIMLIB_ERR_RESOURCE_ORDER; + ret = WIMLIB_ERR_RESOURCE_ORDER; + goto out_restore_memory_hdr; } if (old_lookup_table_end > old_xml_begin) { WARNING("Didn't expect the lookup table to be after the XML data"); - return WIMLIB_ERR_RESOURCE_ORDER; + ret = WIMLIB_ERR_RESOURCE_ORDER; + goto out_restore_memory_hdr; } /* Set @old_wim_end, which indicates the point beyond which we don't @@ -2780,31 +2988,28 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, ret = check_resource_offsets(wim, old_wim_end); if (ret) - return ret; + goto out_restore_memory_hdr; ret = prepare_stream_list_for_write(wim, WIMLIB_ALL_IMAGES, write_flags, &stream_list, &lookup_table_list, &filter_ctx); if (ret) - return ret; + goto out_restore_memory_hdr; ret = open_wim_writable(wim, wim->filename, O_RDWR); if (ret) - return ret; + goto out_restore_memory_hdr; ret = lock_wim(wim, wim->out_fd.fd); if (ret) goto out_close_wim; - /* Save original header so it can be restored in case of error */ - memcpy(&hdr_save, &wim->hdr, sizeof(struct wim_header)); - /* Set WIM_HDR_FLAG_WRITE_IN_PROGRESS flag in header. */ wim->hdr.flags |= WIM_HDR_FLAG_WRITE_IN_PROGRESS; ret = write_wim_header_flags(wim->hdr.flags, &wim->out_fd); if (ret) { ERROR_WITH_ERRNO("Error updating WIM header flags"); - goto out_restore_memory_hdr; + goto out_unlock_wim; } if (filedes_seek(&wim->out_fd, old_wim_end) == -1) { @@ -2813,16 +3018,12 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, goto out_restore_physical_hdr; } - ret = write_stream_list(&stream_list, - &wim->out_fd, - write_flags_to_resource_flags(write_flags), - wim->compression_type, - wim->chunk_size, - num_threads, - wim->lookup_table, - &filter_ctx, - &wim->lzx_context, - progress_func); + ret = wim_write_stream_list(wim, + &stream_list, + write_flags, + num_threads, + &filter_ctx, + progress_func); if (ret) goto out_truncate; @@ -2837,7 +3038,8 @@ overwrite_wim_inplace(WIMStruct *wim, int write_flags, if (ret) goto out_truncate; - goto out_unlock_wim; + wim->wim_locked = 0; + return 0; out_truncate: if (!(write_flags & WIMLIB_WRITE_FLAG_NO_LOOKUP_TABLE)) { @@ -2849,12 +3051,12 @@ out_truncate: } out_restore_physical_hdr: (void)write_wim_header_flags(hdr_save.flags, &wim->out_fd); -out_restore_memory_hdr: - memcpy(&wim->hdr, &hdr_save, sizeof(struct wim_header)); -out_close_wim: - (void)close_wim_writable(wim, write_flags); out_unlock_wim: wim->wim_locked = 0; +out_close_wim: + (void)close_wim_writable(wim, write_flags); +out_restore_memory_hdr: + memcpy(&wim->hdr, &hdr_save, sizeof(struct wim_header)); return ret; } @@ -2912,24 +3114,39 @@ overwrite_wim_via_tmpfile(WIMStruct *wim, int write_flags, return 0; } +/* Determine if the specified WIM file may be updated by appending in-place + * rather than writing and replacing it with an entirely new file. */ static bool can_overwrite_wim_inplace(const WIMStruct *wim, int write_flags) { + /* REBUILD flag forces full rebuild. */ if (write_flags & WIMLIB_WRITE_FLAG_REBUILD) return false; + /* Deletions cause full rebuild by default. */ if (wim->deletion_occurred && !(write_flags & WIMLIB_WRITE_FLAG_SOFT_DELETE)) return false; + /* Pipable WIMs cannot be updated in place, nor can a non-pipable WIM be + * turned into a pipable WIM in-place. */ if (wim_is_pipable(wim) || (write_flags & WIMLIB_WRITE_FLAG_PIPABLE)) return false; - if (wim->hdr.wim_version != WIM_VERSION_PACKED_STREAMS) { - if (wim->compression_type != wim->out_compression_type) - return false; - if (wim->chunk_size != wim->out_chunk_size) - return false; - } + /* wimlib allows multiple packs in a single WIM, but they don't seem to + * be compatible with WIMGAPI, so force all streams to be repacked if + * the WIM already may have contained a pack and PACK_STREAMS was + * requested. */ + if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS && + wim->hdr.wim_version == WIM_VERSION_PACKED_STREAMS) + return false; + + /* The default compression type and compression chunk size selected for + * the output WIM must be the same as those currently used for the WIM. + */ + if (wim->compression_type != wim->out_compression_type) + return false; + if (wim->chunk_size != wim->out_chunk_size) + return false; return true; }