+/* wimlib internal flags used when writing resources. */
+#define WRITE_RESOURCE_FLAG_RECOMPRESS 0x00000001
+#define WRITE_RESOURCE_FLAG_PIPABLE 0x00000002
+#define WRITE_RESOURCE_FLAG_PACK_STREAMS 0x00000004
+
+static inline int
+write_flags_to_resource_flags(int write_flags)
+{
+ int write_resource_flags = 0;
+
+ if (write_flags & WIMLIB_WRITE_FLAG_RECOMPRESS)
+ write_resource_flags |= WRITE_RESOURCE_FLAG_RECOMPRESS;
+ if (write_flags & WIMLIB_WRITE_FLAG_PIPABLE)
+ write_resource_flags |= WRITE_RESOURCE_FLAG_PIPABLE;
+ if (write_flags & WIMLIB_WRITE_FLAG_PACK_STREAMS)
+ write_resource_flags |= WRITE_RESOURCE_FLAG_PACK_STREAMS;
+ return write_resource_flags;
+}
+
+struct filter_context {
+ int write_flags;
+ WIMStruct *wim;
+};
+
+/* Determine specified stream should be filtered out from the write.
+ *
+ * Return values:
+ *
+ * < 0 : The stream should be hard-filtered; that is, not included in the
+ * output WIM at all.
+ * 0 : The stream should not be filtered out.
+ * > 0 : The stream should be soft-filtered; that is, it already exists in the
+ * WIM file and may not need to be written again.
+ */
+static int
+stream_filtered(const struct wim_lookup_table_entry *lte,
+ const struct filter_context *ctx)
+{
+ int write_flags = ctx->write_flags;
+ WIMStruct *wim = ctx->wim;
+
+ if (ctx == NULL)
+ return 0;
+
+ if (write_flags & WIMLIB_WRITE_FLAG_OVERWRITE &&
+ lte->resource_location == RESOURCE_IN_WIM &&
+ lte->rspec->wim == wim)
+ return 1;
+
+ if (write_flags & WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS &&
+ lte->resource_location == RESOURCE_IN_WIM &&
+ lte->rspec->wim != wim)
+ return -1;
+
+ return 0;
+}
+
+static bool
+stream_hard_filtered(const struct wim_lookup_table_entry *lte,
+ struct filter_context *ctx)
+{
+ return stream_filtered(lte, ctx) < 0;
+}
+
+static inline int
+may_soft_filter_streams(const struct filter_context *ctx)
+{
+ if (ctx == NULL)
+ return 0;
+ return ctx->write_flags & WIMLIB_WRITE_FLAG_OVERWRITE;
+}
+
+static inline int
+may_hard_filter_streams(const struct filter_context *ctx)
+{
+ if (ctx == NULL)
+ return 0;
+ return ctx->write_flags & WIMLIB_WRITE_FLAG_SKIP_EXTERNAL_WIMS;
+}
+
+static inline int
+may_filter_streams(const struct filter_context *ctx)
+{
+ return (may_soft_filter_streams(ctx) ||
+ may_hard_filter_streams(ctx));
+}
+
+
+/* Return true if the specified resource is compressed and the compressed data
+ * can be reused with the specified output parameters. */
+static bool
+can_raw_copy(const struct wim_lookup_table_entry *lte,
+ int write_resource_flags, int out_ctype, u32 out_chunk_size)
+{
+ const struct wim_resource_spec *rspec;
+
+ if (write_resource_flags & WRITE_RESOURCE_FLAG_RECOMPRESS)
+ return false;
+
+ if (out_ctype == WIMLIB_COMPRESSION_TYPE_NONE)
+ return false;
+
+ if (lte->resource_location != RESOURCE_IN_WIM)
+ return false;
+
+ rspec = lte->rspec;
+
+ if (rspec->is_pipable != !!(write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE))
+ return false;
+
+
+ if (rspec->flags & WIM_RESHDR_FLAG_COMPRESSED) {
+ /* Normal compressed resource: Must use same compression type
+ * and chunk size. */
+ return (rspec->wim->compression_type == out_ctype &&
+ rspec->wim->chunk_size == out_chunk_size);
+ }
+
+ /* XXX: For compatibility, we can't allow multiple packed resources per
+ * WIM. */
+#if 0
+ if ((rspec->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) &&
+ (write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS))
+ {
+ /* Packed resource: Such resources may contain multiple streams,
+ * and in general only a subset of them need to be written. As
+ * a heuristic, re-use the raw data if at least half the
+ * uncompressed size is being written. */
+
+ /* Note: packed resources contain a header that specifies the
+ * compression type and chunk size; therefore we don't need to
+ * check if they are compatible with @out_ctype and
+ * @out_chunk_size. */
+
+ struct wim_lookup_table_entry *res_stream;
+ u64 write_size = 0;
+
+ list_for_each_entry(res_stream, &rspec->stream_list, rspec_node)
+ if (res_stream->will_be_in_output_wim)
+ write_size += res_stream->size;
+
+ return (write_size > rspec->uncompressed_size / 2);
+ }
+#endif
+
+ return false;
+}
+
+static u8
+filter_resource_flags(u8 flags)
+{
+ return (flags & ~(WIM_RESHDR_FLAG_PACKED_STREAMS |
+ WIM_RESHDR_FLAG_COMPRESSED |
+ WIM_RESHDR_FLAG_SPANNED |
+ WIM_RESHDR_FLAG_FREE));
+}
+
+static void
+stream_set_out_reshdr_for_reuse(struct wim_lookup_table_entry *lte)
+{
+ const struct wim_resource_spec *rspec;
+
+ wimlib_assert(lte->resource_location == RESOURCE_IN_WIM);
+ rspec = lte->rspec;
+
+ if (rspec->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) {
+
+ wimlib_assert(lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS);
+
+ lte->out_reshdr.offset_in_wim = lte->offset_in_res;
+ lte->out_reshdr.uncompressed_size = 0;
+ lte->out_reshdr.size_in_wim = lte->size;
+
+ lte->out_res_offset_in_wim = rspec->offset_in_wim;
+ lte->out_res_size_in_wim = rspec->size_in_wim;
+ /*lte->out_res_uncompressed_size = rspec->uncompressed_size;*/
+ } else {
+ wimlib_assert(!(lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS));
+
+ lte->out_reshdr.offset_in_wim = rspec->offset_in_wim;
+ lte->out_reshdr.uncompressed_size = rspec->uncompressed_size;
+ lte->out_reshdr.size_in_wim = rspec->size_in_wim;
+ }
+ lte->out_reshdr.flags = lte->flags;
+}
+
+
+/* Write the header for a stream in a pipable WIM. */
+static int
+write_pwm_stream_header(const struct wim_lookup_table_entry *lte,
+ struct filedes *out_fd,
+ int additional_reshdr_flags)
+{
+ struct pwm_stream_hdr stream_hdr;
+ u32 reshdr_flags;
+ int ret;
+
+ stream_hdr.magic = cpu_to_le64(PWM_STREAM_MAGIC);
+ stream_hdr.uncompressed_size = cpu_to_le64(lte->size);
+ if (additional_reshdr_flags & PWM_RESHDR_FLAG_UNHASHED) {
+ zero_out_hash(stream_hdr.hash);
+ } else {
+ wimlib_assert(!lte->unhashed);
+ copy_hash(stream_hdr.hash, lte->hash);
+ }
+
+ reshdr_flags = filter_resource_flags(lte->flags);
+ reshdr_flags |= additional_reshdr_flags;
+ stream_hdr.flags = cpu_to_le32(reshdr_flags);
+ ret = full_write(out_fd, &stream_hdr, sizeof(stream_hdr));
+ if (ret)
+ ERROR_WITH_ERRNO("Write error");
+ return ret;
+}
+
+struct write_streams_progress_data {
+ wimlib_progress_func_t progress_func;
+ union wimlib_progress_info progress;
+ uint64_t next_progress;
+ WIMStruct *prev_wim_part;
+};
+
+static void
+do_write_streams_progress(struct write_streams_progress_data *progress_data,
+ u64 size,
+ bool discarded,
+ struct wim_lookup_table_entry *cur_stream)
+{
+ union wimlib_progress_info *progress = &progress_data->progress;
+ bool new_wim_part;
+
+ if (discarded) {
+ progress->write_streams.total_bytes -= size;
+ if (progress_data->next_progress != ~(uint64_t)0 &&
+ progress_data->next_progress > progress->write_streams.total_bytes)
+ {
+ progress_data->next_progress = progress->write_streams.total_bytes;
+ }
+ } else {
+ progress->write_streams.completed_bytes += size;
+ }
+ new_wim_part = false;
+ if (cur_stream->resource_location == RESOURCE_IN_WIM &&
+ cur_stream->rspec->wim != progress_data->prev_wim_part)
+ {
+ if (progress_data->prev_wim_part) {
+ new_wim_part = true;
+ progress->write_streams.completed_parts++;
+ }
+ progress_data->prev_wim_part = cur_stream->rspec->wim;
+ }
+ progress->write_streams.completed_streams++;
+ if (progress_data->progress_func
+ && (progress->write_streams.completed_bytes >= progress_data->next_progress
+ || new_wim_part))
+ {
+ progress_data->progress_func(WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
+ progress);
+ if (progress_data->next_progress == progress->write_streams.total_bytes) {
+ progress_data->next_progress = ~(uint64_t)0;
+ } else {
+ progress_data->next_progress =
+ min(progress->write_streams.total_bytes,
+ progress->write_streams.completed_bytes +
+ progress->write_streams.total_bytes / 100);
+ }
+ }
+}
+
+struct write_streams_ctx {
+ /* File descriptor the streams are being written to. */
+ struct filedes *out_fd;
+
+ /* Lookup table for the WIMStruct on whose behalf the streams are being
+ * written. */
+ struct wim_lookup_table *lookup_table;
+
+ /* Compression format to use. */
+ int out_ctype;
+
+ /* Maximum uncompressed chunk size in compressed resources to use. */
+ u32 out_chunk_size;
+
+ /* Flags that affect how the streams will be written. */
+ int write_resource_flags;
+
+ /* Data used for issuing WRITE_STREAMS progress. */
+ struct write_streams_progress_data progress_data;
+
+ struct filter_context *filter_ctx;
+
+ /* Upper bound on the total number of bytes that need to be compressed.
+ * */
+ u64 num_bytes_to_compress;
+
+ /* Pointer to the chunk_compressor implementation being used for
+ * compressing chunks of data, or NULL if chunks are being written
+ * uncompressed. */
+ struct chunk_compressor *compressor;
+
+ /* Buffer for dividing the read data into chunks of size
+ * @out_chunk_size. */
+ u8 *chunk_buf;
+
+ /* Number of bytes in @chunk_buf that are currently filled. */
+ size_t chunk_buf_filled;
+
+ /* List of streams that currently have chunks being compressed. */
+ struct list_head pending_streams;
+
+ /* Set to true if the stream currently being read was a duplicate, and
+ * therefore the corresponding stream entry needs to be freed once the
+ * read finishes. (In this case we add the duplicate entry to
+ * pending_streams rather than the entry being read.) */
+ bool stream_was_duplicate;
+
+ /* Current uncompressed offset in the resource being read. */
+ u64 cur_read_res_offset;
+
+ /* Uncompressed size of the resource currently being read. */
+ u64 cur_read_res_size;
+
+ /* Current uncompressed offset in the resource being written. */
+ u64 cur_write_res_offset;
+
+ /* Uncompressed size of resource currently being written. */
+ u64 cur_write_res_size;
+
+ /* Array that is filled in with compressed chunk sizes as a resource is
+ * being written. */
+ u64 *chunk_csizes;
+
+ /* Index of next entry in @chunk_csizes to fill in. */
+ size_t chunk_index;
+
+ /* Number of entries in @chunk_csizes currently allocated. */
+ size_t num_alloc_chunks;
+
+ /* Offset in the output file of the start of the chunks of the resource
+ * currently being written. */
+ u64 chunks_start_offset;
+};
+
+static u64
+get_chunk_entry_size(u64 res_size, int write_resource_flags)
+{
+ if (res_size <= UINT32_MAX ||
+ (write_resource_flags & WIM_RESHDR_FLAG_PACKED_STREAMS))
+ return 4;
+ else
+ return 8;
+}
+
+/* Reserve space for the chunk table and prepare to accumulate the chunk table
+ * in memory. */
+static int
+begin_chunk_table(struct write_streams_ctx *ctx, u64 res_expected_size)
+{
+ u64 expected_num_chunks;
+ u64 expected_num_chunk_entries;
+ size_t reserve_size;
+ int ret;
+
+ /* Calculate the number of chunks and chunk entries that should be
+ * needed for the resource. These normally will be the final values,
+ * but in PACKED_STREAMS mode some of the streams we're planning to
+ * write into the resource may be duplicates, and therefore discarded,
+ * potentially decreasing the number of chunk entries needed. */
+ expected_num_chunks = DIV_ROUND_UP(res_expected_size, ctx->out_chunk_size);
+ expected_num_chunk_entries = expected_num_chunks;
+ if (!(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS))
+ expected_num_chunk_entries--;
+
+ /* Make sure the chunk_csizes array is long enough to store the
+ * compressed size of each chunk. */
+ if (expected_num_chunks > ctx->num_alloc_chunks) {
+ u64 new_length = expected_num_chunks + 50;
+
+ if ((size_t)new_length != new_length) {
+ ERROR("Resource size too large (%"PRIu64" bytes!",
+ res_expected_size);
+ return WIMLIB_ERR_NOMEM;
+ }
+
+ FREE(ctx->chunk_csizes);
+ ctx->chunk_csizes = MALLOC(new_length * sizeof(ctx->chunk_csizes[0]));
+ if (ctx->chunk_csizes == NULL) {
+ ctx->num_alloc_chunks = 0;
+ return WIMLIB_ERR_NOMEM;
+ }
+ ctx->num_alloc_chunks = new_length;
+ }
+
+ ctx->chunk_index = 0;
+
+ if (!(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE)) {
+ /* Reserve space for the chunk table in the output file. In the
+ * case of packed resources this reserves the upper bound for
+ * the needed space, not necessarily the exact space which will
+ * prove to be needed. At this point, we just use @chunk_csizes
+ * for a buffer of 0's because the actual compressed chunk sizes
+ * are unknown. */
+ reserve_size = expected_num_chunk_entries *
+ get_chunk_entry_size(res_expected_size,
+ ctx->write_resource_flags);
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)
+ reserve_size += sizeof(struct alt_chunk_table_header_disk);
+ memset(ctx->chunk_csizes, 0, reserve_size);
+ ret = full_write(ctx->out_fd, ctx->chunk_csizes, reserve_size);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int
+begin_write_resource(struct write_streams_ctx *ctx, u64 res_expected_size)
+{
+ int ret;
+
+ wimlib_assert(res_expected_size != 0);
+
+ if (ctx->compressor != NULL) {
+ ret = begin_chunk_table(ctx, res_expected_size);
+ if (ret)
+ return ret;
+ }
+
+ /* Output file descriptor is now positioned at the offset at which to
+ * write the first chunk of the resource. */
+ ctx->chunks_start_offset = ctx->out_fd->offset;
+ ctx->cur_write_res_offset = 0;
+ ctx->cur_write_res_size = res_expected_size;
+ return 0;
+}
+
+static int
+end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size,
+ u64 *res_start_offset_ret, u64 *res_store_size_ret)
+{
+ size_t actual_num_chunks;
+ size_t actual_num_chunk_entries;
+ size_t chunk_entry_size;
+ int ret;
+
+ actual_num_chunks = ctx->chunk_index;
+ actual_num_chunk_entries = actual_num_chunks;
+ if (!(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS))
+ actual_num_chunk_entries--;
+
+ chunk_entry_size = get_chunk_entry_size(res_actual_size,
+ ctx->write_resource_flags);
+
+ typedef le64 __attribute__((may_alias)) aliased_le64_t;
+ typedef le32 __attribute__((may_alias)) aliased_le32_t;
+
+ if (chunk_entry_size == 4) {
+ aliased_le32_t *entries = (aliased_le32_t*)ctx->chunk_csizes;
+
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) {
+ for (size_t i = 0; i < actual_num_chunk_entries; i++)
+ entries[i] = cpu_to_le32(ctx->chunk_csizes[i]);
+ } else {
+ u32 offset = ctx->chunk_csizes[0];
+ for (size_t i = 0; i < actual_num_chunk_entries; i++) {
+ u32 next_size = ctx->chunk_csizes[i + 1];
+ entries[i] = cpu_to_le32(offset);
+ offset += next_size;
+ }
+ }
+ } else {
+ aliased_le64_t *entries = (aliased_le64_t*)ctx->chunk_csizes;
+
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) {
+ for (size_t i = 0; i < actual_num_chunk_entries; i++)
+ entries[i] = cpu_to_le64(ctx->chunk_csizes[i]);
+ } else {
+ u64 offset = ctx->chunk_csizes[0];
+ for (size_t i = 0; i < actual_num_chunk_entries; i++) {
+ u64 next_size = ctx->chunk_csizes[i + 1];
+ entries[i] = cpu_to_le64(offset);
+ offset += next_size;
+ }
+ }
+ }
+
+ size_t chunk_table_size = actual_num_chunk_entries * chunk_entry_size;
+ u64 res_start_offset;
+ u64 res_end_offset;
+
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) {
+ ret = full_write(ctx->out_fd, ctx->chunk_csizes, chunk_table_size);
+ if (ret)
+ goto error;
+ res_end_offset = ctx->out_fd->offset;
+ res_start_offset = ctx->chunks_start_offset;
+ } else {
+ res_end_offset = ctx->out_fd->offset;
+
+ u64 chunk_table_offset;
+
+ chunk_table_offset = ctx->chunks_start_offset - chunk_table_size;
+
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) {
+ struct alt_chunk_table_header_disk hdr;
+
+ hdr.res_usize = cpu_to_le64(res_actual_size);
+ hdr.chunk_size = cpu_to_le32(ctx->out_chunk_size);
+ hdr.compression_format = cpu_to_le32(ctx->out_ctype);
+
+ BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 1);
+ BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 2);
+ BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3);
+
+ ret = full_pwrite(ctx->out_fd, &hdr, sizeof(hdr),
+ chunk_table_offset - sizeof(hdr));
+ if (ret)
+ goto error;
+ res_start_offset = chunk_table_offset - sizeof(hdr);
+ } else {
+ res_start_offset = chunk_table_offset;
+ }
+
+ ret = full_pwrite(ctx->out_fd, ctx->chunk_csizes,
+ chunk_table_size, chunk_table_offset);
+ if (ret)
+ goto error;
+ }
+
+ *res_start_offset_ret = res_start_offset;
+ *res_store_size_ret = res_end_offset - res_start_offset;
+
+ return 0;
+
+error:
+ ERROR_WITH_ERRNO("Write error");
+ return ret;
+}
+
+/* Finish writing a WIM resource by writing or updating the chunk table (if not
+ * writing the data uncompressed) and loading its metadata into @out_reshdr. */
+static int
+end_write_resource(struct write_streams_ctx *ctx, struct wim_reshdr *out_reshdr)
+{
+ int ret;
+ u64 res_size_in_wim;
+ u64 res_uncompressed_size;
+ u64 res_offset_in_wim;
+
+ wimlib_assert(ctx->cur_write_res_size == ctx->cur_write_res_offset);
+ res_uncompressed_size = ctx->cur_write_res_size;
+
+ if (ctx->compressor) {
+ ret = end_chunk_table(ctx, res_uncompressed_size,
+ &res_offset_in_wim, &res_size_in_wim);
+ if (ret)
+ return ret;
+ } else {
+ res_offset_in_wim = ctx->chunks_start_offset;
+ res_size_in_wim = ctx->out_fd->offset - res_offset_in_wim;
+ }
+ out_reshdr->uncompressed_size = res_uncompressed_size;
+ out_reshdr->size_in_wim = res_size_in_wim;
+ out_reshdr->offset_in_wim = res_offset_in_wim;
+ DEBUG("Finished writing resource: %"PRIu64" => %"PRIu64" @ %"PRIu64"",
+ res_uncompressed_size, res_size_in_wim, res_offset_in_wim);
+ return 0;
+}
+
+/* Begin processing a stream for writing. */
+static int
+write_stream_begin_read(struct wim_lookup_table_entry *lte,
+ bool is_partial_res, void *_ctx)