+ return call_progress(progfunc, WIMLIB_PROGRESS_MSG_DONE_WITH_FILE,
+ &info, progctx);
+}
+
+static int
+do_done_with_blob(struct blob_descriptor *blob,
+ wimlib_progress_func_t progfunc, void *progctx)
+{
+ int ret;
+ struct wim_inode *inode;
+ tchar *cookie1;
+ tchar *cookie2;
+
+ if (!blob->may_send_done_with_file)
+ return 0;
+
+ inode = blob->file_inode;
+
+ wimlib_assert(inode != NULL);
+ wimlib_assert(inode->i_num_remaining_streams > 0);
+ if (--inode->i_num_remaining_streams > 0)
+ return 0;
+
+ cookie1 = progress_get_streamless_path(blob->file_on_disk);
+ cookie2 = progress_get_win32_path(blob->file_on_disk);
+
+ ret = done_with_file(blob->file_on_disk, progfunc, progctx);
+
+ progress_put_win32_path(cookie2);
+ progress_put_streamless_path(cookie1);
+
+ return ret;
+}
+
+/* Handle WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES mode. */
+static inline int
+done_with_blob(struct blob_descriptor *blob, struct write_blobs_ctx *ctx)
+{
+ if (likely(!(ctx->write_resource_flags &
+ WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE)))
+ return 0;
+ return do_done_with_blob(blob, ctx->progress_data.progfunc,
+ ctx->progress_data.progctx);
+}
+
+/* Begin processing a blob for writing. */
+static int
+write_blob_begin_read(struct blob_descriptor *blob, void *_ctx)
+{
+ struct write_blobs_ctx *ctx = _ctx;
+ int ret;
+
+ wimlib_assert(blob->size > 0);
+
+ ctx->cur_read_blob_offset = 0;
+ ctx->cur_read_blob_size = blob->size;
+
+ /* As an optimization, we allow some blobs to be "unhashed", meaning
+ * their SHA-1 message digests are unknown. This is the case with blobs
+ * that are added by scanning a directory tree with wimlib_add_image(),
+ * for example. Since WIM uses single-instance blobs, we don't know
+ * whether such each such blob really need to written until it is
+ * actually checksummed, unless it has a unique size. In such cases we
+ * read and checksum the blob in this function, thereby advancing ahead
+ * of read_blob_list(), which will still provide the data again to
+ * write_blob_process_chunk(). This is okay because an unhashed blob
+ * cannot be in a WIM resource, which might be costly to decompress. */
+ if (ctx->blob_table != NULL && blob->unhashed && !blob->unique_size) {
+
+ struct blob_descriptor *new_blob;
+
+ ret = hash_unhashed_blob(blob, ctx->blob_table, &new_blob);
+ if (ret)
+ return ret;
+ if (new_blob != blob) {
+ /* Duplicate blob detected. */
+
+ if (new_blob->will_be_in_output_wim ||
+ blob_filtered(new_blob, ctx->filter_ctx))
+ {
+ /* The duplicate blob is already being included
+ * in the output WIM, or it would be filtered
+ * out if it had been. Skip writing this blob
+ * (and reading it again) entirely, passing its
+ * output reference count to the duplicate blob
+ * in the former case. */
+ ret = do_write_blobs_progress(&ctx->progress_data,
+ blob->size, 1, true);
+ list_del(&blob->write_blobs_list);
+ list_del(&blob->blob_table_list);
+ if (new_blob->will_be_in_output_wim)
+ new_blob->out_refcnt += blob->out_refcnt;
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID)
+ ctx->cur_write_res_size -= blob->size;
+ if (!ret)
+ ret = done_with_blob(blob, ctx);
+ free_blob_descriptor(blob);
+ if (ret)
+ return ret;
+ return BEGIN_BLOB_STATUS_SKIP_BLOB;
+ } else {
+ /* The duplicate blob can validly be written,
+ * but was not marked as such. Discard the
+ * current blob descriptor and use the
+ * duplicate, but actually freeing the current
+ * blob descriptor must wait until
+ * read_blob_list() has finished reading its
+ * data. */
+ list_replace(&blob->write_blobs_list,
+ &new_blob->write_blobs_list);
+ list_replace(&blob->blob_table_list,
+ &new_blob->blob_table_list);
+ blob->will_be_in_output_wim = 0;
+ new_blob->out_refcnt = blob->out_refcnt;
+ new_blob->will_be_in_output_wim = 1;
+ new_blob->may_send_done_with_file = 0;
+ blob = new_blob;
+ }
+ }
+ }
+ list_move_tail(&blob->write_blobs_list, &ctx->blobs_being_compressed);
+ return 0;
+}
+
+/* Rewrite a blob that was just written compressed (as a non-solid WIM resource)
+ * as uncompressed instead. */
+static int
+write_blob_uncompressed(struct blob_descriptor *blob, struct filedes *out_fd)
+{
+ int ret;
+ u64 begin_offset = blob->out_reshdr.offset_in_wim;
+ u64 end_offset = out_fd->offset;
+
+ if (filedes_seek(out_fd, begin_offset) == -1)
+ return 0;
+
+ ret = extract_blob_to_fd(blob, out_fd);
+ if (ret) {
+ /* Error reading the uncompressed data. */
+ if (out_fd->offset == begin_offset &&
+ filedes_seek(out_fd, end_offset) != -1)
+ {
+ /* Nothing was actually written yet, and we successfully
+ * seeked to the end of the compressed resource, so
+ * don't issue a hard error; just keep the compressed
+ * resource instead. */
+ WARNING("Recovered compressed resource of "
+ "size %"PRIu64", continuing on.", blob->size);
+ return 0;
+ }
+ return ret;
+ }
+
+ wimlib_assert(out_fd->offset - begin_offset == blob->size);
+
+ /* We could ftruncate() the file to 'out_fd->offset' here, but there
+ * isn't much point. Usually we will only be truncating by a few bytes
+ * and will just overwrite the data immediately. */
+
+ blob->out_reshdr.size_in_wim = blob->size;
+ blob->out_reshdr.flags &= ~(WIM_RESHDR_FLAG_COMPRESSED |
+ WIM_RESHDR_FLAG_SOLID);
+ return 0;
+}
+
+/* Returns true if the specified blob, which was written as a non-solid
+ * resource, should be truncated from the WIM file and re-written uncompressed.
+ * blob->out_reshdr must be filled in from the initial write of the blob. */
+static bool
+should_rewrite_blob_uncompressed(const struct write_blobs_ctx *ctx,
+ const struct blob_descriptor *blob)
+{
+ /* If the compressed data is smaller than the uncompressed data, prefer
+ * the compressed data. */
+ if (blob->out_reshdr.size_in_wim < blob->out_reshdr.uncompressed_size)
+ return false;
+
+ /* If we're not actually writing compressed data, then there's no need
+ * for re-writing. */
+ if (!ctx->compressor)
+ return false;
+
+ /* If writing a pipable WIM, everything we write to the output is final
+ * (it might actually be a pipe!). */
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE)
+ return false;
+
+ /* If the blob that would need to be re-read is located in a solid
+ * resource in another WIM file, then re-reading it would be costly. So
+ * don't do it.
+ *
+ * Exception: if the compressed size happens to be *exactly* the same as
+ * the uncompressed size, then the blob *must* be written uncompressed
+ * in order to remain compatible with the Windows Overlay Filesystem
+ * Filter Driver (WOF).
+ *
+ * TODO: we are currently assuming that the optimization for
+ * single-chunk resources in maybe_rewrite_blob_uncompressed() prevents
+ * this case from being triggered too often. To fully prevent excessive
+ * decompressions in degenerate cases, we really should obtain the
+ * uncompressed data by decompressing the compressed data we wrote to
+ * the output file.
+ */
+ if (blob->blob_location == BLOB_IN_WIM &&
+ blob->size != blob->rdesc->uncompressed_size &&
+ blob->size != blob->out_reshdr.size_in_wim)
+ return false;
+
+ return true;
+}
+
+static int
+maybe_rewrite_blob_uncompressed(struct write_blobs_ctx *ctx,
+ struct blob_descriptor *blob)
+{
+ if (!should_rewrite_blob_uncompressed(ctx, blob))
+ return 0;
+
+ /* Regular (non-solid) WIM resources with exactly one chunk and
+ * compressed size equal to uncompressed size are exactly the same as
+ * the corresponding compressed data --- since there must be 0 entries
+ * in the chunk table and the only chunk must be stored uncompressed.
+ * In this case, there's no need to rewrite anything. */
+ if (ctx->chunk_index == 1 &&
+ blob->out_reshdr.size_in_wim == blob->out_reshdr.uncompressed_size)
+ {
+ blob->out_reshdr.flags &= ~WIM_RESHDR_FLAG_COMPRESSED;
+ return 0;
+ }
+
+ return write_blob_uncompressed(blob, ctx->out_fd);
+}
+
+/* Write the next chunk of (typically compressed) data to the output WIM,
+ * handling the writing of the chunk table. */
+static int
+write_chunk(struct write_blobs_ctx *ctx, const void *cchunk,
+ size_t csize, size_t usize)
+{
+ int ret;
+ struct blob_descriptor *blob;
+ u32 completed_blob_count;
+ u32 completed_size;
+
+ blob = list_entry(ctx->blobs_being_compressed.next,
+ struct blob_descriptor, write_blobs_list);
+
+ if (ctx->cur_write_blob_offset == 0 &&
+ !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID))
+ {
+ /* Starting to write a new blob in non-solid mode. */
+
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) {
+ ret = write_pwm_blob_header(blob, ctx->out_fd,
+ ctx->compressor != NULL);
+ if (ret)
+ return ret;
+ }
+
+ ret = begin_write_resource(ctx, blob->size);
+ if (ret)
+ return ret;
+ }
+
+ if (ctx->compressor != NULL) {
+ /* Record the compresed chunk size. */
+ wimlib_assert(ctx->chunk_index < ctx->num_alloc_chunks);
+ ctx->chunk_csizes[ctx->chunk_index++] = csize;
+
+ /* If writing a pipable WIM, before the chunk data write a chunk
+ * header that provides the compressed chunk size. */
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) {
+ struct pwm_chunk_hdr chunk_hdr = {
+ .compressed_size = cpu_to_le32(csize),
+ };
+ ret = full_write(ctx->out_fd, &chunk_hdr,
+ sizeof(chunk_hdr));
+ if (ret)
+ goto write_error;
+ }
+ }
+
+ /* Write the chunk data. */
+ ret = full_write(ctx->out_fd, cchunk, csize);
+ if (ret)
+ goto write_error;
+
+ ctx->cur_write_blob_offset += usize;
+
+ completed_size = usize;
+ completed_blob_count = 0;
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
+ /* Wrote chunk in solid mode. It may have finished multiple
+ * blobs. */
+ struct blob_descriptor *next_blob;
+
+ while (blob && ctx->cur_write_blob_offset >= blob->size) {
+
+ ctx->cur_write_blob_offset -= blob->size;
+
+ if (ctx->cur_write_blob_offset)
+ next_blob = list_entry(blob->write_blobs_list.next,
+ struct blob_descriptor,
+ write_blobs_list);
+ else
+ next_blob = NULL;
+
+ ret = done_with_blob(blob, ctx);
+ if (ret)
+ return ret;
+ list_move_tail(&blob->write_blobs_list, &ctx->blobs_in_solid_resource);
+ completed_blob_count++;
+
+ blob = next_blob;
+ }
+ } else {
+ /* Wrote chunk in non-solid mode. It may have finished a
+ * blob. */
+ if (ctx->cur_write_blob_offset == blob->size) {
+
+ wimlib_assert(ctx->cur_write_blob_offset ==
+ ctx->cur_write_res_size);
+
+ ret = end_write_resource(ctx, &blob->out_reshdr);
+ if (ret)
+ return ret;
+
+ blob->out_reshdr.flags = reshdr_flags_for_blob(blob);
+ if (ctx->compressor != NULL)
+ blob->out_reshdr.flags |= WIM_RESHDR_FLAG_COMPRESSED;
+
+ ret = maybe_rewrite_blob_uncompressed(ctx, blob);
+ if (ret)
+ return ret;
+
+ wimlib_assert(blob->out_reshdr.uncompressed_size == blob->size);
+
+ ctx->cur_write_blob_offset = 0;
+
+ ret = done_with_blob(blob, ctx);
+ if (ret)
+ return ret;
+ list_del(&blob->write_blobs_list);
+ completed_blob_count++;
+ }
+ }
+
+ return do_write_blobs_progress(&ctx->progress_data, completed_size,
+ completed_blob_count, false);
+
+write_error:
+ ERROR_WITH_ERRNO("Write error");
+ return ret;
+}
+
+static int
+prepare_chunk_buffer(struct write_blobs_ctx *ctx)
+{
+ /* While we are unable to get a new chunk buffer due to too many chunks
+ * already outstanding, retrieve and write the next compressed chunk. */
+ while (!(ctx->cur_chunk_buf =
+ ctx->compressor->get_chunk_buffer(ctx->compressor)))
+ {
+ const void *cchunk;
+ u32 csize;
+ u32 usize;
+ bool bret;
+ int ret;
+
+ bret = ctx->compressor->get_compression_result(ctx->compressor,
+ &cchunk,
+ &csize,
+ &usize);
+ wimlib_assert(bret);
+
+ ret = write_chunk(ctx, cchunk, csize, usize);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+/* Process the next chunk of data to be written to a WIM resource. */
+static int
+write_blob_process_chunk(const void *chunk, size_t size, void *_ctx)
+{
+ struct write_blobs_ctx *ctx = _ctx;
+ int ret;
+ const u8 *chunkptr, *chunkend;
+
+ wimlib_assert(size != 0);
+
+ if (ctx->compressor == NULL) {
+ /* Write chunk uncompressed. */
+ ret = write_chunk(ctx, chunk, size, size);
+ if (ret)
+ return ret;
+ ctx->cur_read_blob_offset += size;
+ return 0;
+ }
+
+ /* Submit the chunk for compression, but take into account that the
+ * @size the chunk was provided in may not correspond to the
+ * @out_chunk_size being used for compression. */
+ chunkptr = chunk;
+ chunkend = chunkptr + size;
+ do {
+ size_t needed_chunk_size;
+ size_t bytes_consumed;
+
+ if (!ctx->cur_chunk_buf) {
+ ret = prepare_chunk_buffer(ctx);
+ if (ret)
+ return ret;
+ }
+
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
+ needed_chunk_size = ctx->out_chunk_size;
+ } else {
+ needed_chunk_size = min(ctx->out_chunk_size,
+ ctx->cur_chunk_buf_filled +
+ (ctx->cur_read_blob_size -
+ ctx->cur_read_blob_offset));
+ }
+
+ bytes_consumed = min(chunkend - chunkptr,
+ needed_chunk_size - ctx->cur_chunk_buf_filled);
+
+ memcpy(&ctx->cur_chunk_buf[ctx->cur_chunk_buf_filled],
+ chunkptr, bytes_consumed);
+
+ chunkptr += bytes_consumed;
+ ctx->cur_read_blob_offset += bytes_consumed;
+ ctx->cur_chunk_buf_filled += bytes_consumed;
+
+ if (ctx->cur_chunk_buf_filled == needed_chunk_size) {
+ ctx->compressor->signal_chunk_filled(ctx->compressor,
+ ctx->cur_chunk_buf_filled);
+ ctx->cur_chunk_buf = NULL;
+ ctx->cur_chunk_buf_filled = 0;
+ }
+ } while (chunkptr != chunkend);
+ return 0;
+}
+
+/* Finish processing a blob for writing. It may not have been completely
+ * written yet, as the chunk_compressor implementation may still have chunks
+ * buffered or being compressed. */
+static int
+write_blob_end_read(struct blob_descriptor *blob, int status, void *_ctx)
+{
+ struct write_blobs_ctx *ctx = _ctx;
+
+ wimlib_assert(ctx->cur_read_blob_offset == ctx->cur_read_blob_size || status);
+
+ if (!blob->will_be_in_output_wim) {
+ /* The blob was a duplicate. Now that its data has finished
+ * being read, it is being discarded in favor of the duplicate
+ * entry. It therefore is no longer needed, and we can fire the
+ * DONE_WITH_FILE callback because the file will not be read
+ * again.
+ *
+ * Note: we can't yet fire DONE_WITH_FILE for non-duplicate
+ * blobs, since it needs to be possible to re-read the file if
+ * it does not compress to less than its original size. */
+ if (!status)
+ status = done_with_blob(blob, ctx);
+ free_blob_descriptor(blob);
+ } else if (!status && blob->unhashed && ctx->blob_table != NULL) {
+ /* The blob was not a duplicate and was previously unhashed.
+ * Since we passed COMPUTE_MISSING_BLOB_HASHES to
+ * read_blob_list(), blob->hash is now computed and valid. So
+ * turn this blob into a "hashed" blob. */
+ list_del(&blob->unhashed_list);
+ blob_table_insert(ctx->blob_table, blob);
+ blob->unhashed = 0;
+ }
+ return status;
+}
+
+/*
+ * Compute statistics about a list of blobs that will be written.
+ *
+ * Assumes the blobs are sorted such that all blobs located in each distinct WIM
+ * (specified by WIMStruct) are together.
+ *
+ * For compactions, also verify that there are no overlapping resources. This
+ * really should be checked earlier, but for now it's easiest to check here.
+ */
+static int
+compute_blob_list_stats(struct list_head *blob_list,
+ struct write_blobs_ctx *ctx)
+{
+ struct blob_descriptor *blob;
+ u64 total_bytes = 0;
+ u64 num_blobs = 0;
+ u64 total_parts = 0;
+ WIMStruct *prev_wim_part = NULL;
+ const struct wim_resource_descriptor *prev_rdesc = NULL;
+
+ list_for_each_entry(blob, blob_list, write_blobs_list) {
+ num_blobs++;
+ total_bytes += blob->size;
+ if (blob->blob_location == BLOB_IN_WIM) {
+ const struct wim_resource_descriptor *rdesc = blob->rdesc;
+ WIMStruct *wim = rdesc->wim;
+
+ if (prev_wim_part != wim) {
+ prev_wim_part = wim;
+ total_parts++;
+ }
+ if (unlikely(wim->being_compacted) && rdesc != prev_rdesc) {
+ if (prev_rdesc != NULL &&
+ rdesc->offset_in_wim <
+ prev_rdesc->offset_in_wim +
+ prev_rdesc->size_in_wim)
+ {
+ WARNING("WIM file contains overlapping "
+ "resources! Compaction is not "
+ "possible.");
+ return WIMLIB_ERR_RESOURCE_ORDER;
+ }
+ prev_rdesc = rdesc;
+ }
+ }
+ }
+ ctx->progress_data.progress.write_streams.total_bytes = total_bytes;
+ ctx->progress_data.progress.write_streams.total_streams = num_blobs;
+ ctx->progress_data.progress.write_streams.completed_bytes = 0;
+ ctx->progress_data.progress.write_streams.completed_streams = 0;
+ ctx->progress_data.progress.write_streams.compression_type = ctx->out_ctype;
+ ctx->progress_data.progress.write_streams.total_parts = total_parts;
+ ctx->progress_data.progress.write_streams.completed_parts = 0;
+ ctx->progress_data.next_progress = 0;
+ return 0;
+}
+
+/* Find blobs in @blob_list that can be copied to the output WIM in raw form
+ * rather than compressed. Delete these blobs from @blob_list and move them to
+ * @raw_copy_blobs. Return the total uncompressed size of the blobs that need
+ * to be compressed. */
+static u64
+find_raw_copy_blobs(struct list_head *blob_list, int write_resource_flags,
+ int out_ctype, u32 out_chunk_size,
+ struct list_head *raw_copy_blobs)
+{
+ struct blob_descriptor *blob, *tmp;
+ u64 num_nonraw_bytes = 0;
+
+ INIT_LIST_HEAD(raw_copy_blobs);
+
+ /* Initialize temporary raw_copy_ok flag. */
+ list_for_each_entry(blob, blob_list, write_blobs_list)
+ if (blob->blob_location == BLOB_IN_WIM)
+ blob->rdesc->raw_copy_ok = 0;
+
+ list_for_each_entry_safe(blob, tmp, blob_list, write_blobs_list) {
+ if (can_raw_copy(blob, write_resource_flags,
+ out_ctype, out_chunk_size))
+ {
+ blob->rdesc->raw_copy_ok = 1;
+ list_move_tail(&blob->write_blobs_list, raw_copy_blobs);
+ } else {
+ num_nonraw_bytes += blob->size;
+ }
+ }
+
+ return num_nonraw_bytes;
+}
+
+/* Copy a raw compressed resource located in another WIM file to the WIM file
+ * being written. */
+static int
+write_raw_copy_resource(struct wim_resource_descriptor *in_rdesc,
+ struct filedes *out_fd)
+{
+ u64 cur_read_offset;
+ u64 end_read_offset;
+ u8 buf[BUFFER_SIZE];
+ size_t bytes_to_read;
+ int ret;
+ struct filedes *in_fd;
+ struct blob_descriptor *blob;
+ u64 out_offset_in_wim;
+
+ /* Copy the raw data. */
+ cur_read_offset = in_rdesc->offset_in_wim;
+ end_read_offset = cur_read_offset + in_rdesc->size_in_wim;
+
+ out_offset_in_wim = out_fd->offset;
+
+ if (in_rdesc->is_pipable) {
+ if (cur_read_offset < sizeof(struct pwm_blob_hdr))
+ return WIMLIB_ERR_INVALID_PIPABLE_WIM;
+ cur_read_offset -= sizeof(struct pwm_blob_hdr);
+ out_offset_in_wim += sizeof(struct pwm_blob_hdr);
+ }
+ in_fd = &in_rdesc->wim->in_fd;
+ wimlib_assert(cur_read_offset != end_read_offset);
+
+ if (likely(!in_rdesc->wim->being_compacted) ||
+ in_rdesc->offset_in_wim > out_fd->offset) {
+ do {
+ bytes_to_read = min(sizeof(buf),
+ end_read_offset - cur_read_offset);
+
+ ret = full_pread(in_fd, buf, bytes_to_read,
+ cur_read_offset);
+ if (ret)
+ return ret;
+
+ ret = full_write(out_fd, buf, bytes_to_read);
+ if (ret)
+ return ret;
+
+ cur_read_offset += bytes_to_read;
+
+ } while (cur_read_offset != end_read_offset);
+ } else {
+ /* Optimization: the WIM file is being compacted and the
+ * resource being written is already in the desired location.
+ * Skip over the data instead of re-writing it. */
+
+ /* Due the earlier check for overlapping resources, it should
+ * never be the case that we already overwrote the resource. */
+ wimlib_assert(!(in_rdesc->offset_in_wim < out_fd->offset));
+
+ if (-1 == filedes_seek(out_fd, out_fd->offset + in_rdesc->size_in_wim))
+ return WIMLIB_ERR_WRITE;
+ }
+
+ list_for_each_entry(blob, &in_rdesc->blob_list, rdesc_node) {
+ if (blob->will_be_in_output_wim) {
+ blob_set_out_reshdr_for_reuse(blob);
+ if (in_rdesc->flags & WIM_RESHDR_FLAG_SOLID)
+ blob->out_res_offset_in_wim = out_offset_in_wim;
+ else
+ blob->out_reshdr.offset_in_wim = out_offset_in_wim;
+
+ }
+ }
+ return 0;
+}
+
+/* Copy a list of raw compressed resources located in other WIM file(s) to the
+ * WIM file being written. */
+static int
+write_raw_copy_resources(struct list_head *raw_copy_blobs,
+ struct filedes *out_fd,
+ struct write_blobs_progress_data *progress_data)
+{
+ struct blob_descriptor *blob;
+ int ret;
+
+ list_for_each_entry(blob, raw_copy_blobs, write_blobs_list)
+ blob->rdesc->raw_copy_ok = 1;
+
+ list_for_each_entry(blob, raw_copy_blobs, write_blobs_list) {
+ if (blob->rdesc->raw_copy_ok) {
+ /* Write each solid resource only one time. */
+ ret = write_raw_copy_resource(blob->rdesc, out_fd);
+ if (ret)
+ return ret;
+ blob->rdesc->raw_copy_ok = 0;
+ }
+ ret = do_write_blobs_progress(progress_data, blob->size,
+ 1, false);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+/* Wait for and write all chunks pending in the compressor. */
+static int
+finish_remaining_chunks(struct write_blobs_ctx *ctx)
+{
+ const void *cdata;
+ u32 csize;
+ u32 usize;
+ int ret;
+
+ if (ctx->compressor == NULL)
+ return 0;
+
+ if (ctx->cur_chunk_buf_filled != 0) {
+ ctx->compressor->signal_chunk_filled(ctx->compressor,
+ ctx->cur_chunk_buf_filled);
+ }
+
+ while (ctx->compressor->get_compression_result(ctx->compressor, &cdata,
+ &csize, &usize))
+ {
+ ret = write_chunk(ctx, cdata, csize, usize);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static void
+validate_blob_list(struct list_head *blob_list)
+{
+ struct blob_descriptor *blob;
+
+ list_for_each_entry(blob, blob_list, write_blobs_list) {
+ wimlib_assert(blob->will_be_in_output_wim);
+ wimlib_assert(blob->size != 0);
+ }
+}
+
+static inline bool
+blob_is_in_file(const struct blob_descriptor *blob)
+{
+ return blob->blob_location == BLOB_IN_FILE_ON_DISK
+#ifdef __WIN32__
+ || blob->blob_location == BLOB_IN_WINNT_FILE_ON_DISK
+ || blob->blob_location == BLOB_WIN32_ENCRYPTED
+#endif
+ ;
+}
+
+static void
+init_done_with_file_info(struct list_head *blob_list)
+{
+ struct blob_descriptor *blob;
+
+ list_for_each_entry(blob, blob_list, write_blobs_list) {
+ if (blob_is_in_file(blob)) {
+ blob->file_inode->i_num_remaining_streams = 0;
+ blob->may_send_done_with_file = 1;
+ } else {
+ blob->may_send_done_with_file = 0;
+ }
+ }
+
+ list_for_each_entry(blob, blob_list, write_blobs_list)
+ if (blob->may_send_done_with_file)
+ blob->file_inode->i_num_remaining_streams++;
+}
+
+/*
+ * Write a list of blobs to the output WIM file.
+ *
+ * @blob_list
+ * The list of blobs to write, specified by a list of 'struct blob_descriptor' linked
+ * by the 'write_blobs_list' member.
+ *
+ * @out_fd
+ * The file descriptor, opened for writing, to which to write the blobs.
+ *
+ * @write_resource_flags
+ * Flags to modify how the blobs are written:
+ *
+ * WRITE_RESOURCE_FLAG_RECOMPRESS:
+ * Force compression of all resources, even if they could otherwise
+ * be re-used by copying the raw data, due to being located in a WIM
+ * file with compatible compression parameters.
+ *
+ * WRITE_RESOURCE_FLAG_PIPABLE:
+ * Write the resources in the wimlib-specific pipable format, and
+ * furthermore do so in such a way that no seeking backwards in
+ * @out_fd will be performed (so it may be a pipe).
+ *
+ * WRITE_RESOURCE_FLAG_SOLID:
+ * Combine all the blobs into a single resource rather than writing
+ * them in separate resources. This flag is only valid if the WIM
+ * version number has been, or will be, set to WIM_VERSION_SOLID.
+ * This flag may not be combined with WRITE_RESOURCE_FLAG_PIPABLE.
+ *
+ * @out_ctype
+ * Compression format to use in the output resources, specified as one of
+ * the WIMLIB_COMPRESSION_TYPE_* constants. WIMLIB_COMPRESSION_TYPE_NONE
+ * is allowed.
+ *
+ * @out_chunk_size
+ * Compression chunk size to use in the output resources. It must be a
+ * valid chunk size for the specified compression format @out_ctype, unless
+ * @out_ctype is WIMLIB_COMPRESSION_TYPE_NONE, in which case this parameter
+ * is ignored.
+ *
+ * @num_threads
+ * Number of threads to use to compress data. If 0, a default number of
+ * threads will be chosen. The number of threads still may be decreased
+ * from the specified value if insufficient memory is detected.
+ *
+ * @blob_table
+ * If on-the-fly deduplication of unhashed blobs is desired, this parameter
+ * must be pointer to the blob table for the WIMStruct on whose behalf the
+ * blobs are being written. Otherwise, this parameter can be NULL.
+ *
+ * @filter_ctx
+ * If on-the-fly deduplication of unhashed blobs is desired, this parameter
+ * can be a pointer to a context for blob filtering used to detect whether
+ * the duplicate blob has been hard-filtered or not. If no blobs are
+ * hard-filtered or no blobs are unhashed, this parameter can be NULL.
+ *
+ * This function will write the blobs in @blob_list to resources in
+ * consecutive positions in the output WIM file, or to a single solid resource
+ * if WRITE_RESOURCE_FLAG_SOLID was specified in @write_resource_flags. In both
+ * cases, the @out_reshdr of the `struct blob_descriptor' for each blob written will be
+ * updated to specify its location, size, and flags in the output WIM. In the
+ * solid resource case, WIM_RESHDR_FLAG_SOLID will be set in the @flags field of
+ * each @out_reshdr, and furthermore @out_res_offset_in_wim and
+ * @out_res_size_in_wim of each @out_reshdr will be set to the offset and size,
+ * respectively, in the output WIM of the solid resource containing the
+ * corresponding blob.
+ *
+ * Each of the blobs to write may be in any location supported by the
+ * resource-handling code (specifically, read_blob_list()), such as the contents
+ * of external file that has been logically added to the output WIM, or a blob
+ * in another WIM file that has been imported, or even a blob in the "same" WIM
+ * file of which a modified copy is being written. In the case that a blob is
+ * already in a WIM file and uses compatible compression parameters, by default
+ * this function will re-use the raw data instead of decompressing it, then
+ * recompressing it; however, with WRITE_RESOURCE_FLAG_RECOMPRESS
+ * specified in @write_resource_flags, this is not done.
+ *
+ * As a further requirement, this function requires that the
+ * @will_be_in_output_wim member be set to 1 on all blobs in @blob_list as well
+ * as any other blobs not in @blob_list that will be in the output WIM file, but
+ * set to 0 on any other blobs in the output WIM's blob table or sharing a solid
+ * resource with a blob in @blob_list. Still furthermore, if on-the-fly
+ * deduplication of blobs is possible, then all blobs in @blob_list must also be
+ * linked by @blob_table_list along with any other blobs that have
+ * @will_be_in_output_wim set.
+ *
+ * This function handles on-the-fly deduplication of blobs for which SHA-1
+ * message digests have not yet been calculated. Such blobs may or may not need
+ * to be written. If @blob_table is non-NULL, then each blob in @blob_list that
+ * has @unhashed set but not @unique_size set is checksummed immediately before
+ * it would otherwise be read for writing in order to determine if it is
+ * identical to another blob already being written or one that would be filtered
+ * out of the output WIM using blob_filtered() with the context @filter_ctx.
+ * Each such duplicate blob will be removed from @blob_list, its reference count
+ * transferred to the pre-existing duplicate blob, its memory freed, and will
+ * not be written. Alternatively, if a blob in @blob_list is a duplicate with
+ * any blob in @blob_table that has not been marked for writing or would not be
+ * hard-filtered, it is freed and the pre-existing duplicate is written instead,
+ * taking ownership of the reference count and slot in the @blob_table_list.
+ *
+ * Returns 0 if every blob was either written successfully or did not need to be
+ * written; otherwise returns a non-zero error code.
+ */
+static int
+write_blob_list(struct list_head *blob_list,
+ struct filedes *out_fd,
+ int write_resource_flags,
+ int out_ctype,
+ u32 out_chunk_size,
+ unsigned num_threads,
+ struct blob_table *blob_table,
+ struct filter_context *filter_ctx,
+ wimlib_progress_func_t progfunc,
+ void *progctx)
+{
+ int ret;
+ struct write_blobs_ctx ctx;
+ struct list_head raw_copy_blobs;
+ u64 num_nonraw_bytes;
+
+ wimlib_assert((write_resource_flags &
+ (WRITE_RESOURCE_FLAG_SOLID |
+ WRITE_RESOURCE_FLAG_PIPABLE)) !=
+ (WRITE_RESOURCE_FLAG_SOLID |
+ WRITE_RESOURCE_FLAG_PIPABLE));
+
+ validate_blob_list(blob_list);
+
+ if (list_empty(blob_list))
+ return 0;
+
+ /* If needed, set auxiliary information so that we can detect when the
+ * library has finished using each external file. */
+ if (unlikely(write_resource_flags & WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE))
+ init_done_with_file_info(blob_list);
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ ctx.out_fd = out_fd;
+ ctx.blob_table = blob_table;
+ ctx.out_ctype = out_ctype;
+ ctx.out_chunk_size = out_chunk_size;
+ ctx.write_resource_flags = write_resource_flags;
+ ctx.filter_ctx = filter_ctx;
+
+ /*
+ * We normally sort the blobs to write by a "sequential" order that is
+ * optimized for reading. But when using solid compression, we instead
+ * sort the blobs by file extension and file name (when applicable; and
+ * we don't do this for blobs from solid resources) so that similar
+ * files are grouped together, which improves the compression ratio.
+ * This is somewhat of a hack since a blob does not necessarily
+ * correspond one-to-one with a filename, nor is there any guarantee
+ * that two files with similar names or extensions are actually similar
+ * in content. A potential TODO is to sort the blobs based on some
+ * measure of similarity of their actual contents.
+ */
+
+ ret = sort_blob_list_by_sequential_order(blob_list,
+ offsetof(struct blob_descriptor,
+ write_blobs_list));
+ if (ret)
+ return ret;
+
+ ret = compute_blob_list_stats(blob_list, &ctx);
+ if (ret)
+ return ret;
+
+ if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID_SORT) {
+ ret = sort_blob_list_for_solid_compression(blob_list);
+ if (unlikely(ret))
+ WARNING("Failed to sort blobs for solid compression. Continuing anyways.");
+ }
+
+ ctx.progress_data.progfunc = progfunc;
+ ctx.progress_data.progctx = progctx;
+
+ num_nonraw_bytes = find_raw_copy_blobs(blob_list, write_resource_flags,
+ out_ctype, out_chunk_size,
+ &raw_copy_blobs);
+
+ /* Copy any compressed resources for which the raw data can be reused
+ * without decompression. */
+ ret = write_raw_copy_resources(&raw_copy_blobs, ctx.out_fd,
+ &ctx.progress_data);
+
+ if (ret || num_nonraw_bytes == 0)
+ goto out_destroy_context;
+
+ /* Unless uncompressed output was required, allocate a chunk_compressor
+ * to do compression. There are serial and parallel implementations of
+ * the chunk_compressor interface. We default to parallel using the
+ * specified number of threads, unless the upper bound on the number
+ * bytes needing to be compressed is less than a heuristic value. */
+ if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
+
+ #ifdef ENABLE_MULTITHREADED_COMPRESSION
+ if (num_nonraw_bytes > max(2000000, out_chunk_size)) {
+ ret = new_parallel_chunk_compressor(out_ctype,
+ out_chunk_size,
+ num_threads, 0,
+ &ctx.compressor);
+ if (ret > 0) {
+ WARNING("Couldn't create parallel chunk compressor: %"TS".\n"
+ " Falling back to single-threaded compression.",
+ wimlib_get_error_string(ret));
+ }
+ }
+ #endif
+
+ if (ctx.compressor == NULL) {
+ ret = new_serial_chunk_compressor(out_ctype, out_chunk_size,
+ &ctx.compressor);
+ if (ret)
+ goto out_destroy_context;
+ }
+ }
+
+ if (ctx.compressor)
+ ctx.progress_data.progress.write_streams.num_threads = ctx.compressor->num_threads;
+ else
+ ctx.progress_data.progress.write_streams.num_threads = 1;
+
+ INIT_LIST_HEAD(&ctx.blobs_being_compressed);
+ INIT_LIST_HEAD(&ctx.blobs_in_solid_resource);
+
+ ret = call_progress(ctx.progress_data.progfunc,
+ WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
+ &ctx.progress_data.progress,
+ ctx.progress_data.progctx);
+ if (ret)
+ goto out_destroy_context;
+
+ if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
+ ret = begin_write_resource(&ctx, num_nonraw_bytes);
+ if (ret)
+ goto out_destroy_context;
+ }
+
+ /* Read the list of blobs needing to be compressed, using the specified
+ * callbacks to execute processing of the data. */
+
+ struct read_blob_callbacks cbs = {
+ .begin_blob = write_blob_begin_read,
+ .consume_chunk = write_blob_process_chunk,
+ .end_blob = write_blob_end_read,
+ .ctx = &ctx,
+ };
+
+ ret = read_blob_list(blob_list,
+ offsetof(struct blob_descriptor, write_blobs_list),
+ &cbs,
+ BLOB_LIST_ALREADY_SORTED |
+ VERIFY_BLOB_HASHES |
+ COMPUTE_MISSING_BLOB_HASHES);