+ return 0;
+}
+
+static int
+begin_write_resource(struct write_blobs_ctx *ctx, u64 res_expected_size)
+{
+ int ret;
+
+ wimlib_assert(res_expected_size != 0);
+
+ if (ctx->compressor != NULL) {
+ ret = begin_chunk_table(ctx, res_expected_size);
+ if (ret)
+ return ret;
+ }
+
+ /* Output file descriptor is now positioned at the offset at which to
+ * write the first chunk of the resource. */
+ ctx->chunks_start_offset = ctx->out_fd->offset;
+ ctx->cur_write_blob_offset = 0;
+ ctx->cur_write_res_size = res_expected_size;
+ return 0;
+}
+
+static int
+end_chunk_table(struct write_blobs_ctx *ctx, u64 res_actual_size,
+ u64 *res_start_offset_ret, u64 *res_store_size_ret)
+{
+ size_t actual_num_chunks;
+ size_t actual_num_chunk_entries;
+ size_t chunk_entry_size;
+ int ret;
+
+ actual_num_chunks = ctx->chunk_index;
+ actual_num_chunk_entries = actual_num_chunks;
+ if (!(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID))
+ actual_num_chunk_entries--;
+
+ chunk_entry_size = get_chunk_entry_size(res_actual_size,
+ 0 != (ctx->write_resource_flags &
+ WRITE_RESOURCE_FLAG_SOLID));
+
+ typedef le64 _may_alias_attribute aliased_le64_t;
+ typedef le32 _may_alias_attribute aliased_le32_t;
+
+ if (chunk_entry_size == 4) {
+ aliased_le32_t *entries = (aliased_le32_t*)ctx->chunk_csizes;
+
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
+ for (size_t i = 0; i < actual_num_chunk_entries; i++)
+ entries[i] = cpu_to_le32(ctx->chunk_csizes[i]);
+ } else {
+ u32 offset = ctx->chunk_csizes[0];
+ for (size_t i = 0; i < actual_num_chunk_entries; i++) {
+ u32 next_size = ctx->chunk_csizes[i + 1];
+ entries[i] = cpu_to_le32(offset);
+ offset += next_size;
+ }
+ }
+ } else {
+ aliased_le64_t *entries = (aliased_le64_t*)ctx->chunk_csizes;
+
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
+ for (size_t i = 0; i < actual_num_chunk_entries; i++)
+ entries[i] = cpu_to_le64(ctx->chunk_csizes[i]);
+ } else {
+ u64 offset = ctx->chunk_csizes[0];
+ for (size_t i = 0; i < actual_num_chunk_entries; i++) {
+ u64 next_size = ctx->chunk_csizes[i + 1];
+ entries[i] = cpu_to_le64(offset);
+ offset += next_size;
+ }
+ }
+ }
+
+ size_t chunk_table_size = actual_num_chunk_entries * chunk_entry_size;
+ u64 res_start_offset;
+ u64 res_end_offset;
+
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) {
+ ret = full_write(ctx->out_fd, ctx->chunk_csizes, chunk_table_size);
+ if (ret)
+ goto write_error;
+ res_end_offset = ctx->out_fd->offset;
+ res_start_offset = ctx->chunks_start_offset;
+ } else {
+ res_end_offset = ctx->out_fd->offset;
+
+ u64 chunk_table_offset;
+
+ chunk_table_offset = ctx->chunks_start_offset - chunk_table_size;
+
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
+ struct alt_chunk_table_header_disk hdr;
+
+ hdr.res_usize = cpu_to_le64(res_actual_size);
+ hdr.chunk_size = cpu_to_le32(ctx->out_chunk_size);
+ hdr.compression_format = cpu_to_le32(ctx->out_ctype);
+
+ BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_XPRESS != 1);
+ BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZX != 2);
+ BUILD_BUG_ON(WIMLIB_COMPRESSION_TYPE_LZMS != 3);
+
+ ret = full_pwrite(ctx->out_fd, &hdr, sizeof(hdr),
+ chunk_table_offset - sizeof(hdr));
+ if (ret)
+ goto write_error;
+ res_start_offset = chunk_table_offset - sizeof(hdr);
+ } else {
+ res_start_offset = chunk_table_offset;
+ }
+
+ ret = full_pwrite(ctx->out_fd, ctx->chunk_csizes,
+ chunk_table_size, chunk_table_offset);
+ if (ret)
+ goto write_error;
+ }
+
+ *res_start_offset_ret = res_start_offset;
+ *res_store_size_ret = res_end_offset - res_start_offset;
+
+ return 0;
+
+write_error:
+ ERROR_WITH_ERRNO("Write error");
+ return ret;
+}
+
+/* Finish writing a WIM resource by writing or updating the chunk table (if not
+ * writing the data uncompressed) and loading its metadata into @out_reshdr. */
+static int
+end_write_resource(struct write_blobs_ctx *ctx, struct wim_reshdr *out_reshdr)
+{
+ int ret;
+ u64 res_size_in_wim;
+ u64 res_uncompressed_size;
+ u64 res_offset_in_wim;
+
+ wimlib_assert(ctx->cur_write_blob_offset == ctx->cur_write_res_size ||
+ (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID));
+ res_uncompressed_size = ctx->cur_write_res_size;
+
+ if (ctx->compressor) {
+ ret = end_chunk_table(ctx, res_uncompressed_size,
+ &res_offset_in_wim, &res_size_in_wim);
+ if (ret)
+ return ret;
+ } else {
+ res_offset_in_wim = ctx->chunks_start_offset;
+ res_size_in_wim = ctx->out_fd->offset - res_offset_in_wim;
+ }
+ out_reshdr->uncompressed_size = res_uncompressed_size;
+ out_reshdr->size_in_wim = res_size_in_wim;
+ out_reshdr->offset_in_wim = res_offset_in_wim;
+ return 0;
+}
+
+/* Call when no more data from the file at @path is needed. */
+static int
+done_with_file(const tchar *path, wimlib_progress_func_t progfunc, void *progctx)
+{
+ union wimlib_progress_info info;
+
+ info.done_with_file.path_to_file = path;
+
+ return call_progress(progfunc, WIMLIB_PROGRESS_MSG_DONE_WITH_FILE,
+ &info, progctx);
+}
+
+static int
+do_done_with_blob(struct blob_descriptor *blob,
+ wimlib_progress_func_t progfunc, void *progctx)
+{
+ int ret;
+ struct wim_inode *inode;
+ tchar *cookie1;
+ tchar *cookie2;
+
+ if (!blob->may_send_done_with_file)
+ return 0;
+
+ inode = blob->file_inode;
+
+ wimlib_assert(inode != NULL);
+ wimlib_assert(inode->i_num_remaining_streams > 0);
+ if (--inode->i_num_remaining_streams > 0)
+ return 0;
+
+ cookie1 = progress_get_streamless_path(blob->file_on_disk);
+ cookie2 = progress_get_win32_path(blob->file_on_disk);
+
+ ret = done_with_file(blob->file_on_disk, progfunc, progctx);
+
+ progress_put_win32_path(cookie2);
+ progress_put_streamless_path(cookie1);
+
+ return ret;
+}
+
+/* Handle WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES mode. */
+static inline int
+done_with_blob(struct blob_descriptor *blob, struct write_blobs_ctx *ctx)
+{
+ if (likely(!(ctx->write_resource_flags &
+ WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE)))
+ return 0;
+ return do_done_with_blob(blob, ctx->progress_data.progfunc,
+ ctx->progress_data.progctx);
+}
+
+/* Begin processing a blob for writing. */
+static int
+write_blob_begin_read(struct blob_descriptor *blob, void *_ctx)
+{
+ struct write_blobs_ctx *ctx = _ctx;
+ int ret;
+
+ wimlib_assert(blob->size > 0);
+
+ ctx->cur_read_blob_offset = 0;
+ ctx->cur_read_blob_size = blob->size;
+
+ /* As an optimization, we allow some blobs to be "unhashed", meaning
+ * their SHA-1 message digests are unknown. This is the case with blobs
+ * that are added by scanning a directory tree with wimlib_add_image(),
+ * for example. Since WIM uses single-instance blobs, we don't know
+ * whether such each such blob really need to written until it is
+ * actually checksummed, unless it has a unique size. In such cases we
+ * read and checksum the blob in this function, thereby advancing ahead
+ * of read_blob_list(), which will still provide the data again to
+ * write_blob_process_chunk(). This is okay because an unhashed blob
+ * cannot be in a WIM resource, which might be costly to decompress. */
+ if (ctx->blob_table != NULL && blob->unhashed && !blob->unique_size) {
+
+ struct blob_descriptor *new_blob;
+
+ ret = hash_unhashed_blob(blob, ctx->blob_table, &new_blob);
+ if (ret)
+ return ret;
+ if (new_blob != blob) {
+ /* Duplicate blob detected. */
+
+ if (new_blob->will_be_in_output_wim ||
+ blob_filtered(new_blob, ctx->filter_ctx))
+ {
+ /* The duplicate blob is already being included
+ * in the output WIM, or it would be filtered
+ * out if it had been. Skip writing this blob
+ * (and reading it again) entirely, passing its
+ * output reference count to the duplicate blob
+ * in the former case. */
+ ret = do_write_blobs_progress(&ctx->progress_data,
+ blob->size, 1, true);
+ list_del(&blob->write_blobs_list);
+ list_del(&blob->blob_table_list);
+ if (new_blob->will_be_in_output_wim)
+ new_blob->out_refcnt += blob->out_refcnt;
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID)
+ ctx->cur_write_res_size -= blob->size;
+ if (!ret)
+ ret = done_with_blob(blob, ctx);
+ free_blob_descriptor(blob);
+ if (ret)
+ return ret;
+ return BEGIN_BLOB_STATUS_SKIP_BLOB;
+ } else {
+ /* The duplicate blob can validly be written,
+ * but was not marked as such. Discard the
+ * current blob descriptor and use the
+ * duplicate, but actually freeing the current
+ * blob descriptor must wait until
+ * read_blob_list() has finished reading its
+ * data. */
+ list_replace(&blob->write_blobs_list,
+ &new_blob->write_blobs_list);
+ list_replace(&blob->blob_table_list,
+ &new_blob->blob_table_list);
+ blob->will_be_in_output_wim = 0;
+ new_blob->out_refcnt = blob->out_refcnt;
+ new_blob->will_be_in_output_wim = 1;
+ new_blob->may_send_done_with_file = 0;
+ blob = new_blob;
+ }
+ }
+ }
+ list_move_tail(&blob->write_blobs_list, &ctx->blobs_being_compressed);
+ return 0;
+}
+
+/* Rewrite a blob that was just written compressed (as a non-solid WIM resource)
+ * as uncompressed instead. */
+static int
+write_blob_uncompressed(struct blob_descriptor *blob, struct filedes *out_fd)
+{
+ int ret;
+ u64 begin_offset = blob->out_reshdr.offset_in_wim;
+ u64 end_offset = out_fd->offset;
+
+ if (filedes_seek(out_fd, begin_offset) == -1)
+ return 0;
+
+ ret = extract_blob_to_fd(blob, out_fd);
+ if (ret) {
+ /* Error reading the uncompressed data. */
+ if (out_fd->offset == begin_offset &&
+ filedes_seek(out_fd, end_offset) != -1)
+ {
+ /* Nothing was actually written yet, and we successfully
+ * seeked to the end of the compressed resource, so
+ * don't issue a hard error; just keep the compressed
+ * resource instead. */
+ WARNING("Recovered compressed resource of "
+ "size %"PRIu64", continuing on.", blob->size);
+ return 0;
+ }
+ return ret;
+ }
+
+ wimlib_assert(out_fd->offset - begin_offset == blob->size);
+
+ if (out_fd->offset < end_offset &&
+ 0 != ftruncate(out_fd->fd, out_fd->offset))
+ {
+ ERROR_WITH_ERRNO("Can't truncate output file to "
+ "offset %"PRIu64, out_fd->offset);
+ return WIMLIB_ERR_WRITE;
+ }
+
+ blob->out_reshdr.size_in_wim = blob->size;
+ blob->out_reshdr.flags &= ~(WIM_RESHDR_FLAG_COMPRESSED |
+ WIM_RESHDR_FLAG_SOLID);
+ return 0;
+}
+
+/* Returns true if the specified blob, which was written as a non-solid
+ * resource, should be truncated from the WIM file and re-written uncompressed.
+ * blob->out_reshdr must be filled in from the initial write of the blob. */
+static bool
+should_rewrite_blob_uncompressed(const struct write_blobs_ctx *ctx,
+ const struct blob_descriptor *blob)
+{
+ /* If the compressed data is smaller than the uncompressed data, prefer
+ * the compressed data. */
+ if (blob->out_reshdr.size_in_wim < blob->out_reshdr.uncompressed_size)
+ return false;
+
+ /* If we're not actually writing compressed data, then there's no need
+ * for re-writing. */
+ if (!ctx->compressor)
+ return false;
+
+ /* If writing a pipable WIM, everything we write to the output is final
+ * (it might actually be a pipe!). */
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE)
+ return false;
+
+ /* If the blob that would need to be re-read is located in a solid
+ * resource in another WIM file, then re-reading it would be costly. So
+ * don't do it.
+ *
+ * Exception: if the compressed size happens to be *exactly* the same as
+ * the uncompressed size, then the blob *must* be written uncompressed
+ * in order to remain compatible with the Windows Overlay Filesystem
+ * Filter Driver (WOF).
+ *
+ * TODO: we are currently assuming that the optimization for
+ * single-chunk resources in maybe_rewrite_blob_uncompressed() prevents
+ * this case from being triggered too often. To fully prevent excessive
+ * decompressions in degenerate cases, we really should obtain the
+ * uncompressed data by decompressing the compressed data we wrote to
+ * the output file.
+ */
+ if (blob->blob_location == BLOB_IN_WIM &&
+ blob->size != blob->rdesc->uncompressed_size &&
+ blob->size != blob->out_reshdr.size_in_wim)
+ return false;
+
+ return true;
+}
+
+static int
+maybe_rewrite_blob_uncompressed(struct write_blobs_ctx *ctx,
+ struct blob_descriptor *blob)
+{
+ if (!should_rewrite_blob_uncompressed(ctx, blob))
+ return 0;
+
+ /* Regular (non-solid) WIM resources with exactly one chunk and
+ * compressed size equal to uncompressed size are exactly the same as
+ * the corresponding compressed data --- since there must be 0 entries
+ * in the chunk table and the only chunk must be stored uncompressed.
+ * In this case, there's no need to rewrite anything. */
+ if (ctx->chunk_index == 1 &&
+ blob->out_reshdr.size_in_wim == blob->out_reshdr.uncompressed_size)
+ {
+ blob->out_reshdr.flags &= ~WIM_RESHDR_FLAG_COMPRESSED;
+ return 0;
+ }
+
+ return write_blob_uncompressed(blob, ctx->out_fd);
+}
+
+/* Write the next chunk of (typically compressed) data to the output WIM,
+ * handling the writing of the chunk table. */
+static int
+write_chunk(struct write_blobs_ctx *ctx, const void *cchunk,
+ size_t csize, size_t usize)
+{
+ int ret;
+ struct blob_descriptor *blob;
+ u32 completed_blob_count;
+ u32 completed_size;
+
+ blob = list_entry(ctx->blobs_being_compressed.next,
+ struct blob_descriptor, write_blobs_list);
+
+ if (ctx->cur_write_blob_offset == 0 &&
+ !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID))
+ {
+ /* Starting to write a new blob in non-solid mode. */
+
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) {
+ ret = write_pwm_blob_header(blob, ctx->out_fd,
+ ctx->compressor != NULL);
+ if (ret)
+ return ret;
+ }
+
+ ret = begin_write_resource(ctx, blob->size);
+ if (ret)
+ return ret;
+ }
+
+ if (ctx->compressor != NULL) {
+ /* Record the compresed chunk size. */
+ wimlib_assert(ctx->chunk_index < ctx->num_alloc_chunks);
+ ctx->chunk_csizes[ctx->chunk_index++] = csize;
+
+ /* If writing a pipable WIM, before the chunk data write a chunk
+ * header that provides the compressed chunk size. */
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) {
+ struct pwm_chunk_hdr chunk_hdr = {
+ .compressed_size = cpu_to_le32(csize),
+ };
+ ret = full_write(ctx->out_fd, &chunk_hdr,
+ sizeof(chunk_hdr));
+ if (ret)
+ goto write_error;
+ }
+ }
+
+ /* Write the chunk data. */
+ ret = full_write(ctx->out_fd, cchunk, csize);
+ if (ret)
+ goto write_error;
+
+ ctx->cur_write_blob_offset += usize;
+
+ completed_size = usize;
+ completed_blob_count = 0;
+ if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
+ /* Wrote chunk in solid mode. It may have finished multiple
+ * blobs. */
+ struct blob_descriptor *next_blob;
+
+ while (blob && ctx->cur_write_blob_offset >= blob->size) {
+
+ ctx->cur_write_blob_offset -= blob->size;
+
+ if (ctx->cur_write_blob_offset)
+ next_blob = list_entry(blob->write_blobs_list.next,
+ struct blob_descriptor,
+ write_blobs_list);
+ else
+ next_blob = NULL;
+
+ ret = done_with_blob(blob, ctx);
+ if (ret)
+ return ret;
+ list_move_tail(&blob->write_blobs_list, &ctx->blobs_in_solid_resource);
+ completed_blob_count++;
+
+ blob = next_blob;
+ }
+ } else {
+ /* Wrote chunk in non-solid mode. It may have finished a
+ * blob. */
+ if (ctx->cur_write_blob_offset == blob->size) {
+
+ wimlib_assert(ctx->cur_write_blob_offset ==
+ ctx->cur_write_res_size);
+
+ ret = end_write_resource(ctx, &blob->out_reshdr);
+ if (ret)
+ return ret;
+
+ blob->out_reshdr.flags = reshdr_flags_for_blob(blob);
+ if (ctx->compressor != NULL)
+ blob->out_reshdr.flags |= WIM_RESHDR_FLAG_COMPRESSED;
+
+ ret = maybe_rewrite_blob_uncompressed(ctx, blob);
+ if (ret)
+ return ret;
+
+ wimlib_assert(blob->out_reshdr.uncompressed_size == blob->size);
+
+ ctx->cur_write_blob_offset = 0;
+
+ ret = done_with_blob(blob, ctx);
+ if (ret)
+ return ret;
+ list_del(&blob->write_blobs_list);
+ completed_blob_count++;
+ }