+ ret = full_pwrite(ctx->out_fd, &hdr, sizeof(hdr),
+ chunk_table_offset - sizeof(hdr));
+ if (ret)
+ goto error;
+ res_start_offset = chunk_table_offset - sizeof(hdr);
+ } else {
+ res_start_offset = chunk_table_offset;
+ }
+
+ ret = full_pwrite(ctx->out_fd, ctx->chunk_csizes,
+ chunk_table_size, chunk_table_offset);
+ if (ret)
+ goto error;
+ }
+
+ *res_start_offset_ret = res_start_offset;
+ *res_store_size_ret = res_end_offset - res_start_offset;
+
+ return 0;
+
+error:
+ ERROR_WITH_ERRNO("Write error");
+ return ret;
+}
+
+/* Finish writing a WIM resource by writing or updating the chunk table (if not
+ * writing the data uncompressed) and loading its metadata into @out_reshdr. */
+static int
+end_write_resource(struct write_streams_ctx *ctx, struct wim_reshdr *out_reshdr)
+{
+ int ret;
+ u64 res_size_in_wim;
+ u64 res_uncompressed_size;
+ u64 res_offset_in_wim;
+
+ wimlib_assert(ctx->cur_write_stream_offset == ctx->cur_write_res_size ||
+ (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS));
+ res_uncompressed_size = ctx->cur_write_res_size;
+
+ if (ctx->compressor) {
+ ret = end_chunk_table(ctx, res_uncompressed_size,
+ &res_offset_in_wim, &res_size_in_wim);
+ if (ret)
+ return ret;
+ } else {
+ res_offset_in_wim = ctx->chunks_start_offset;
+ res_size_in_wim = ctx->out_fd->offset - res_offset_in_wim;
+ }
+ out_reshdr->uncompressed_size = res_uncompressed_size;
+ out_reshdr->size_in_wim = res_size_in_wim;
+ out_reshdr->offset_in_wim = res_offset_in_wim;
+ DEBUG("Finished writing resource: %"PRIu64" => %"PRIu64" @ %"PRIu64"",
+ res_uncompressed_size, res_size_in_wim, res_offset_in_wim);
+ return 0;
+}
+
+/* Begin processing a stream for writing. */
+static int
+write_stream_begin_read(struct wim_lookup_table_entry *lte,
+ bool is_partial_res, void *_ctx)
+{
+ struct write_streams_ctx *ctx = _ctx;
+ int ret;
+
+ wimlib_assert(lte->size > 0);
+
+ ctx->cur_read_stream_offset = 0;
+ ctx->cur_read_stream_size = lte->size;
+
+ /* As an optimization, we allow some streams to be "unhashed", meaning
+ * their SHA1 message digests are unknown. This is the case with
+ * streams that are added by scanning a directry tree with
+ * wimlib_add_image(), for example. Since WIM uses single-instance
+ * streams, we don't know whether such each such stream really need to
+ * written until it is actually checksummed, unless it has a unique
+ * size. In such cases we read and checksum the stream in this
+ * function, thereby advancing ahead of read_stream_list(), which will
+ * still provide the data again to write_stream_process_chunk(). This
+ * is okay because an unhashed stream cannot be in a WIM resource, which
+ * might be costly to decompress. */
+ ctx->stream_was_duplicate = false;
+ if (ctx->lookup_table != NULL && lte->unhashed && !lte->unique_size) {
+
+ wimlib_assert(!is_partial_res);
+
+ struct wim_lookup_table_entry *lte_new;
+
+ ret = hash_unhashed_stream(lte, ctx->lookup_table, <e_new);
+ if (ret)
+ return ret;
+ if (lte_new != lte) {
+ /* Duplicate stream detected. */
+
+ if (lte_new->will_be_in_output_wim ||
+ stream_filtered(lte_new, ctx->filter_ctx))
+ {
+ /* The duplicate stream is already being
+ * included in the output WIM, or it would be
+ * filtered out if it had been. Skip writing
+ * this stream (and reading it again) entirely,
+ * passing its output reference count to the
+ * duplicate stream in the former case. */
+ DEBUG("Discarding duplicate stream of "
+ "length %"PRIu64, lte->size);
+ do_write_streams_progress(&ctx->progress_data,
+ lte, lte->size,
+ 1, true);
+ list_del(<e->write_streams_list);
+ list_del(<e->lookup_table_list);
+ if (lte_new->will_be_in_output_wim)
+ lte_new->out_refcnt += lte->out_refcnt;
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)
+ ctx->cur_write_res_size -= lte->size;
+ free_lookup_table_entry(lte);
+ return BEGIN_STREAM_STATUS_SKIP_STREAM;
+ } else {
+ /* The duplicate stream can validly be written,
+ * but was not marked as such. Discard the
+ * current stream entry and use the duplicate,
+ * but actually freeing the current entry must
+ * wait until read_stream_list() has finished
+ * reading its data. */
+ DEBUG("Stream duplicate, but not already "
+ "selected for writing.");
+ list_replace(<e->write_streams_list,
+ <e_new->write_streams_list);
+ list_replace(<e->lookup_table_list,
+ <e_new->lookup_table_list);
+ lte_new->out_refcnt = lte->out_refcnt;
+ lte_new->will_be_in_output_wim = 1;
+ ctx->stream_was_duplicate = true;
+ lte = lte_new;
+ }
+ }
+ }
+ list_move_tail(<e->write_streams_list, &ctx->pending_streams);
+ return 0;
+}
+
+/* Rewrite a stream that was just written compressed as uncompressed instead.
+ * This function is optional, but if a stream did not compress to less than its
+ * original size, it might as well be written uncompressed. */
+static int
+write_stream_uncompressed(struct wim_lookup_table_entry *lte,
+ struct filedes *out_fd)
+{
+ int ret;
+ u64 begin_offset = lte->out_reshdr.offset_in_wim;
+ u64 end_offset = out_fd->offset;
+
+ if (filedes_seek(out_fd, begin_offset) == -1)
+ return 0;
+
+ ret = extract_full_stream_to_fd(lte, out_fd);
+ if (ret) {
+ /* Error reading the uncompressed data. */
+ if (out_fd->offset == begin_offset &&
+ filedes_seek(out_fd, end_offset) != -1)
+ {
+ /* Nothing was actually written yet, and we successfully
+ * seeked to the end of the compressed resource, so
+ * don't issue a hard error; just keep the compressed
+ * resource instead. */
+ WARNING("Recovered compressed stream of "
+ "size %"PRIu64", continuing on.",
+ lte->size);
+ return 0;
+ }
+ return ret;
+ }
+
+ wimlib_assert(out_fd->offset - begin_offset == lte->size);
+
+ if (out_fd->offset < end_offset &&
+ 0 != ftruncate(out_fd->fd, out_fd->offset))
+ {
+ ERROR_WITH_ERRNO("Can't truncate output file to "
+ "offset %"PRIu64, out_fd->offset);
+ return WIMLIB_ERR_WRITE;
+ }
+
+ lte->out_reshdr.size_in_wim = lte->size;
+ lte->out_reshdr.flags &= ~(WIM_RESHDR_FLAG_COMPRESSED |
+ WIM_RESHDR_FLAG_PACKED_STREAMS);
+ return 0;
+}
+
+/* Write the next chunk of (typically compressed) data to the output WIM,
+ * handling the writing of the chunk table. */
+static int
+write_chunk(struct write_streams_ctx *ctx, const void *cchunk,
+ size_t csize, size_t usize)
+{
+ int ret;
+
+ struct wim_lookup_table_entry *lte;
+ u32 completed_stream_count;
+ u32 completed_size;
+
+ lte = list_entry(ctx->pending_streams.next,
+ struct wim_lookup_table_entry, write_streams_list);
+
+ if (ctx->cur_write_stream_offset == 0 &&
+ !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS))
+ {
+ /* Starting to write a new stream in non-packed mode. */
+
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) {
+ int additional_reshdr_flags = 0;
+ if (ctx->compressor != NULL)
+ additional_reshdr_flags |= WIM_RESHDR_FLAG_COMPRESSED;
+
+ DEBUG("Writing pipable WIM stream header "
+ "(offset=%"PRIu64")", ctx->out_fd->offset);
+
+ ret = write_pwm_stream_header(lte, ctx->out_fd,
+ additional_reshdr_flags);
+ if (ret)
+ return ret;
+ }
+
+ ret = begin_write_resource(ctx, lte->size);
+ if (ret)
+ return ret;
+ }
+
+ if (ctx->compressor != NULL) {
+ /* Record the compresed chunk size. */
+ wimlib_assert(ctx->chunk_index < ctx->num_alloc_chunks);
+ ctx->chunk_csizes[ctx->chunk_index++] = csize;
+
+ /* If writing a pipable WIM, before the chunk data write a chunk
+ * header that provides the compressed chunk size. */
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) {
+ struct pwm_chunk_hdr chunk_hdr = {
+ .compressed_size = cpu_to_le32(csize),
+ };
+ ret = full_write(ctx->out_fd, &chunk_hdr,
+ sizeof(chunk_hdr));
+ if (ret)
+ goto error;
+ }
+ }
+
+ /* Write the chunk data. */
+ ret = full_write(ctx->out_fd, cchunk, csize);
+ if (ret)
+ goto error;
+
+ ctx->cur_write_stream_offset += usize;
+
+ completed_size = usize;
+ completed_stream_count = 0;
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) {
+ /* Wrote chunk in packed mode. It may have finished multiple
+ * streams. */
+ while (ctx->cur_write_stream_offset > lte->size) {
+ struct wim_lookup_table_entry *next;
+
+ ctx->cur_write_stream_offset -= lte->size;
+
+ wimlib_assert(!list_is_singular(&ctx->pending_streams) &&
+ !list_empty(&ctx->pending_streams));
+ next = list_entry(lte->write_streams_list.next,
+ struct wim_lookup_table_entry,
+ write_streams_list);
+ list_move_tail(<e->write_streams_list,
+ &ctx->pack_streams);
+ lte = next;
+ completed_stream_count++;
+ }
+ if (ctx->cur_write_stream_offset == lte->size) {
+ ctx->cur_write_stream_offset = 0;
+ list_move_tail(<e->write_streams_list,
+ &ctx->pack_streams);
+ completed_stream_count++;
+ }
+ } else {
+ /* Wrote chunk in non-packed mode. It may have finished a
+ * stream. */
+ if (ctx->cur_write_stream_offset == lte->size) {
+
+ completed_stream_count++;
+
+ list_del(<e->write_streams_list);
+
+ wimlib_assert(ctx->cur_write_stream_offset ==
+ ctx->cur_write_res_size);
+
+ ret = end_write_resource(ctx, <e->out_reshdr);
+ if (ret)
+ return ret;
+
+ lte->out_reshdr.flags = filter_resource_flags(lte->flags);
+ if (ctx->compressor != NULL)
+ lte->out_reshdr.flags |= WIM_RESHDR_FLAG_COMPRESSED;
+
+ if (ctx->compressor != NULL &&
+ lte->out_reshdr.size_in_wim >= lte->out_reshdr.uncompressed_size &&
+ !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) &&
+ !(lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS))
+ {
+ /* Stream did not compress to less than its original
+ * size. If we're not writing a pipable WIM (which
+ * could mean the output file descriptor is
+ * non-seekable), and the stream isn't located in a
+ * resource pack (which would make reading it again
+ * costly), truncate the file to the start of the stream
+ * and write it uncompressed instead. */
+ DEBUG("Stream of size %"PRIu64" did not compress to "
+ "less than original size; writing uncompressed.",
+ lte->size);
+ ret = write_stream_uncompressed(lte, ctx->out_fd);
+ if (ret)
+ return ret;
+ }
+ wimlib_assert(lte->out_reshdr.uncompressed_size == lte->size);
+
+ ctx->cur_write_stream_offset = 0;
+ }
+ }
+
+ do_write_streams_progress(&ctx->progress_data, lte,
+ completed_size, completed_stream_count,
+ false);
+
+ return 0;
+
+error:
+ ERROR_WITH_ERRNO("Write error");
+ return ret;
+}
+
+static int
+submit_chunk_for_compression(struct write_streams_ctx *ctx,
+ const void *chunk, size_t size)
+{
+ /* While we are unable to submit the chunk for compression (due to too
+ * many chunks already outstanding), retrieve and write the next
+ * compressed chunk. */
+ while (!ctx->compressor->submit_chunk(ctx->compressor, chunk, size)) {
+ const void *cchunk;
+ unsigned csize;
+ unsigned usize;
+ bool bret;
+ int ret;
+
+ bret = ctx->compressor->get_chunk(ctx->compressor,
+ &cchunk, &csize, &usize);
+
+ wimlib_assert(bret);
+
+ ret = write_chunk(ctx, cchunk, csize, usize);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+/* Process the next chunk of data to be written to a WIM resource. */
+static int
+write_stream_process_chunk(const void *chunk, size_t size, void *_ctx)
+{
+ struct write_streams_ctx *ctx = _ctx;
+ int ret;
+ const u8 *chunkptr, *chunkend;
+
+ wimlib_assert(size != 0);
+
+ if (ctx->compressor == NULL) {
+ /* Write chunk uncompressed. */
+ ret = write_chunk(ctx, chunk, size, size);
+ if (ret)
+ return ret;
+ ctx->cur_read_stream_offset += size;
+ return 0;
+ }
+
+ /* Submit the chunk for compression, but take into account that the
+ * @size the chunk was provided in may not correspond to the
+ * @out_chunk_size being used for compression. */
+ chunkptr = chunk;
+ chunkend = chunkptr + size;
+ do {
+ const u8 *resized_chunk;
+ size_t needed_chunk_size;
+
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) {
+ needed_chunk_size = ctx->out_chunk_size;
+ } else {
+ u64 res_bytes_remaining;
+
+ res_bytes_remaining = ctx->cur_read_stream_size -
+ ctx->cur_read_stream_offset;
+ needed_chunk_size = min(ctx->out_chunk_size,
+ ctx->chunk_buf_filled +
+ res_bytes_remaining);
+ }
+
+ if (ctx->chunk_buf_filled == 0 &&
+ chunkend - chunkptr >= needed_chunk_size)
+ {
+ /* No intermediate buffering needed. */
+ resized_chunk = chunkptr;
+ chunkptr += needed_chunk_size;
+ ctx->cur_read_stream_offset += needed_chunk_size;
+ } else {
+ /* Intermediate buffering needed. */
+ size_t bytes_consumed;
+
+ bytes_consumed = min(chunkend - chunkptr,
+ needed_chunk_size - ctx->chunk_buf_filled);
+
+ memcpy(&ctx->chunk_buf[ctx->chunk_buf_filled],
+ chunkptr, bytes_consumed);
+
+ chunkptr += bytes_consumed;
+ ctx->cur_read_stream_offset += bytes_consumed;
+ ctx->chunk_buf_filled += bytes_consumed;
+ if (ctx->chunk_buf_filled == needed_chunk_size) {
+ resized_chunk = ctx->chunk_buf;
+ ctx->chunk_buf_filled = 0;
+ } else {
+ break;
+ }
+
+ }
+
+ ret = submit_chunk_for_compression(ctx, resized_chunk,
+ needed_chunk_size);
+ if (ret)
+ return ret;
+
+ } while (chunkptr != chunkend);
+ return 0;
+}
+
+/* Finish processing a stream for writing. It may not have been completely
+ * written yet, as the chunk_compressor implementation may still have chunks
+ * buffered or being compressed. */
+static int
+write_stream_end_read(struct wim_lookup_table_entry *lte, int status, void *_ctx)
+{
+ struct write_streams_ctx *ctx = _ctx;
+ if (status == 0)
+ wimlib_assert(ctx->cur_read_stream_offset == ctx->cur_read_stream_size);
+ if (ctx->stream_was_duplicate) {
+ free_lookup_table_entry(lte);
+ } else if (lte->unhashed && ctx->lookup_table != NULL) {
+ list_del(<e->unhashed_list);
+ lookup_table_insert(ctx->lookup_table, lte);
+ lte->unhashed = 0;
+ }
+ return status;
+}
+
+/* Compute statistics about a list of streams that will be written.
+ *
+ * Assumes the streams are sorted such that all streams located in each distinct
+ * WIM (specified by WIMStruct) are together. */
+static void
+compute_stream_list_stats(struct list_head *stream_list,
+ struct write_streams_ctx *ctx)
+{
+ struct wim_lookup_table_entry *lte;
+ u64 total_bytes = 0;
+ u64 num_streams = 0;
+ u64 total_parts = 0;
+ WIMStruct *prev_wim_part = NULL;
+
+ list_for_each_entry(lte, stream_list, write_streams_list) {
+ num_streams++;
+ total_bytes += lte->size;
+ if (lte->resource_location == RESOURCE_IN_WIM) {
+ if (prev_wim_part != lte->rspec->wim) {
+ prev_wim_part = lte->rspec->wim;
+ total_parts++;
+ }
+ }
+ }
+ ctx->progress_data.progress.write_streams.total_bytes = total_bytes;
+ ctx->progress_data.progress.write_streams.total_streams = num_streams;
+ ctx->progress_data.progress.write_streams.completed_bytes = 0;
+ ctx->progress_data.progress.write_streams.completed_streams = 0;
+ ctx->progress_data.progress.write_streams.compression_type = ctx->out_ctype;
+ ctx->progress_data.progress.write_streams.total_parts = total_parts;
+ ctx->progress_data.progress.write_streams.completed_parts = 0;
+ ctx->progress_data.next_progress = 0;
+ ctx->progress_data.prev_wim_part = NULL;
+}
+
+/* Find streams in @stream_list that can be copied to the output WIM in raw form
+ * rather than compressed. Delete these streams from @stream_list, and move one
+ * per resource to @raw_copy_resources. Return the total uncompressed size of
+ * the streams that need to be compressed. */
+static u64
+find_raw_copy_resources(struct list_head *stream_list,
+ int write_resource_flags,
+ int out_ctype,
+ u32 out_chunk_size,
+ struct list_head *raw_copy_resources)
+{
+ struct wim_lookup_table_entry *lte, *tmp;
+ u64 num_bytes_to_compress = 0;
+
+ INIT_LIST_HEAD(raw_copy_resources);
+
+ /* Initialize temporary raw_copy_ok flag. */
+ list_for_each_entry(lte, stream_list, write_streams_list)
+ if (lte->resource_location == RESOURCE_IN_WIM)
+ lte->rspec->raw_copy_ok = 0;
+
+ list_for_each_entry_safe(lte, tmp, stream_list, write_streams_list) {
+ if (lte->resource_location == RESOURCE_IN_WIM &&
+ lte->rspec->raw_copy_ok)
+ {
+ list_del(<e->write_streams_list);
+ } else if (can_raw_copy(lte, write_resource_flags,
+ out_ctype, out_chunk_size))
+ {
+ lte->rspec->raw_copy_ok = 1;
+ list_move_tail(<e->write_streams_list,
+ raw_copy_resources);
+ } else {
+ num_bytes_to_compress += lte->size;