+ ;
+}
+
+static void
+init_done_with_file_info(struct list_head *blob_list)
+{
+ struct blob_descriptor *blob;
+
+ list_for_each_entry(blob, blob_list, write_blobs_list) {
+ if (blob_is_in_file(blob)) {
+ blob->file_inode->i_num_remaining_streams = 0;
+ blob->may_send_done_with_file = 1;
+ } else {
+ blob->may_send_done_with_file = 0;
+ }
+ }
+
+ list_for_each_entry(blob, blob_list, write_blobs_list)
+ if (blob->may_send_done_with_file)
+ blob->file_inode->i_num_remaining_streams++;
+}
+
+/*
+ * Write a list of blobs to the output WIM file.
+ *
+ * @blob_list
+ * The list of blobs to write, specified by a list of 'struct blob_descriptor' linked
+ * by the 'write_blobs_list' member.
+ *
+ * @out_fd
+ * The file descriptor, opened for writing, to which to write the blobs.
+ *
+ * @write_resource_flags
+ * Flags to modify how the blobs are written:
+ *
+ * WRITE_RESOURCE_FLAG_RECOMPRESS:
+ * Force compression of all resources, even if they could otherwise
+ * be re-used by copying the raw data, due to being located in a WIM
+ * file with compatible compression parameters.
+ *
+ * WRITE_RESOURCE_FLAG_PIPABLE:
+ * Write the resources in the wimlib-specific pipable format, and
+ * furthermore do so in such a way that no seeking backwards in
+ * @out_fd will be performed (so it may be a pipe).
+ *
+ * WRITE_RESOURCE_FLAG_SOLID:
+ * Combine all the blobs into a single resource rather than writing
+ * them in separate resources. This flag is only valid if the WIM
+ * version number has been, or will be, set to WIM_VERSION_SOLID.
+ * This flag may not be combined with WRITE_RESOURCE_FLAG_PIPABLE.
+ *
+ * @out_ctype
+ * Compression format to use in the output resources, specified as one of
+ * the WIMLIB_COMPRESSION_TYPE_* constants. WIMLIB_COMPRESSION_TYPE_NONE
+ * is allowed.
+ *
+ * @out_chunk_size
+ * Compression chunk size to use in the output resources. It must be a
+ * valid chunk size for the specified compression format @out_ctype, unless
+ * @out_ctype is WIMLIB_COMPRESSION_TYPE_NONE, in which case this parameter
+ * is ignored.
+ *
+ * @num_threads
+ * Number of threads to use to compress data. If 0, a default number of
+ * threads will be chosen. The number of threads still may be decreased
+ * from the specified value if insufficient memory is detected.
+ *
+ * @blob_table
+ * If on-the-fly deduplication of unhashed blobs is desired, this parameter
+ * must be pointer to the blob table for the WIMStruct on whose behalf the
+ * blobs are being written. Otherwise, this parameter can be NULL.
+ *
+ * @filter_ctx
+ * If on-the-fly deduplication of unhashed blobs is desired, this parameter
+ * can be a pointer to a context for blob filtering used to detect whether
+ * the duplicate blob has been hard-filtered or not. If no blobs are
+ * hard-filtered or no blobs are unhashed, this parameter can be NULL.
+ *
+ * This function will write the blobs in @blob_list to resources in
+ * consecutive positions in the output WIM file, or to a single solid resource
+ * if WRITE_RESOURCE_FLAG_SOLID was specified in @write_resource_flags. In both
+ * cases, the @out_reshdr of the `struct blob_descriptor' for each blob written will be
+ * updated to specify its location, size, and flags in the output WIM. In the
+ * solid resource case, WIM_RESHDR_FLAG_SOLID will be set in the @flags field of
+ * each @out_reshdr, and furthermore @out_res_offset_in_wim and
+ * @out_res_size_in_wim of each @out_reshdr will be set to the offset and size,
+ * respectively, in the output WIM of the solid resource containing the
+ * corresponding blob.
+ *
+ * Each of the blobs to write may be in any location supported by the
+ * resource-handling code (specifically, read_blob_list()), such as the contents
+ * of external file that has been logically added to the output WIM, or a blob
+ * in another WIM file that has been imported, or even a blob in the "same" WIM
+ * file of which a modified copy is being written. In the case that a blob is
+ * already in a WIM file and uses compatible compression parameters, by default
+ * this function will re-use the raw data instead of decompressing it, then
+ * recompressing it; however, with WRITE_RESOURCE_FLAG_RECOMPRESS
+ * specified in @write_resource_flags, this is not done.
+ *
+ * As a further requirement, this function requires that the
+ * @will_be_in_output_wim member be set to 1 on all blobs in @blob_list as well
+ * as any other blobs not in @blob_list that will be in the output WIM file, but
+ * set to 0 on any other blobs in the output WIM's blob table or sharing a solid
+ * resource with a blob in @blob_list. Still furthermore, if on-the-fly
+ * deduplication of blobs is possible, then all blobs in @blob_list must also be
+ * linked by @blob_table_list along with any other blobs that have
+ * @will_be_in_output_wim set.
+ *
+ * This function handles on-the-fly deduplication of blobs for which SHA-1
+ * message digests have not yet been calculated. Such blobs may or may not need
+ * to be written. If @blob_table is non-NULL, then each blob in @blob_list that
+ * has @unhashed set but not @unique_size set is checksummed immediately before
+ * it would otherwise be read for writing in order to determine if it is
+ * identical to another blob already being written or one that would be filtered
+ * out of the output WIM using blob_filtered() with the context @filter_ctx.
+ * Each such duplicate blob will be removed from @blob_list, its reference count
+ * transfered to the pre-existing duplicate blob, its memory freed, and will not
+ * be written. Alternatively, if a blob in @blob_list is a duplicate with any
+ * blob in @blob_table that has not been marked for writing or would not be
+ * hard-filtered, it is freed and the pre-existing duplicate is written instead,
+ * taking ownership of the reference count and slot in the @blob_table_list.
+ *
+ * Returns 0 if every blob was either written successfully or did not need to be
+ * written; otherwise returns a non-zero error code.
+ */
+static int
+write_blob_list(struct list_head *blob_list,
+ struct filedes *out_fd,
+ int write_resource_flags,
+ int out_ctype,
+ u32 out_chunk_size,
+ unsigned num_threads,
+ struct blob_table *blob_table,
+ struct filter_context *filter_ctx,
+ wimlib_progress_func_t progfunc,
+ void *progctx)
+{
+ int ret;
+ struct write_blobs_ctx ctx;
+ struct list_head raw_copy_blobs;
+ u64 num_nonraw_bytes;
+
+ wimlib_assert((write_resource_flags &
+ (WRITE_RESOURCE_FLAG_SOLID |
+ WRITE_RESOURCE_FLAG_PIPABLE)) !=
+ (WRITE_RESOURCE_FLAG_SOLID |
+ WRITE_RESOURCE_FLAG_PIPABLE));
+
+ validate_blob_list(blob_list);
+
+ if (list_empty(blob_list))
+ return 0;
+
+ /* If needed, set auxiliary information so that we can detect when the
+ * library has finished using each external file. */
+ if (unlikely(write_resource_flags & WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE))
+ init_done_with_file_info(blob_list);
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ ctx.out_fd = out_fd;
+ ctx.blob_table = blob_table;
+ ctx.out_ctype = out_ctype;
+ ctx.out_chunk_size = out_chunk_size;
+ ctx.write_resource_flags = write_resource_flags;
+ ctx.filter_ctx = filter_ctx;
+
+ /*
+ * We normally sort the blobs to write by a "sequential" order that is
+ * optimized for reading. But when using solid compression, we instead
+ * sort the blobs by file extension and file name (when applicable; and
+ * we don't do this for blobs from solid resources) so that similar
+ * files are grouped together, which improves the compression ratio.
+ * This is somewhat of a hack since a blob does not necessarily
+ * correspond one-to-one with a filename, nor is there any guarantee
+ * that two files with similar names or extensions are actually similar
+ * in content. A potential TODO is to sort the blobs based on some
+ * measure of similarity of their actual contents.
+ */
+
+ ret = sort_blob_list_by_sequential_order(blob_list,
+ offsetof(struct blob_descriptor,
+ write_blobs_list));
+ if (ret)
+ return ret;
+
+ compute_blob_list_stats(blob_list, &ctx);
+
+ if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID_SORT) {
+ ret = sort_blob_list_for_solid_compression(blob_list);
+ if (unlikely(ret))
+ WARNING("Failed to sort blobs for solid compression. Continuing anyways.");
+ }
+
+ ctx.progress_data.progfunc = progfunc;
+ ctx.progress_data.progctx = progctx;
+
+ num_nonraw_bytes = find_raw_copy_blobs(blob_list, write_resource_flags,
+ out_ctype, out_chunk_size,
+ &raw_copy_blobs);
+
+ /* Copy any compressed resources for which the raw data can be reused
+ * without decompression. */
+ ret = write_raw_copy_resources(&raw_copy_blobs, ctx.out_fd,
+ &ctx.progress_data);
+
+ if (ret || num_nonraw_bytes == 0)
+ goto out_destroy_context;
+
+ /* Unless uncompressed output was required, allocate a chunk_compressor
+ * to do compression. There are serial and parallel implementations of
+ * the chunk_compressor interface. We default to parallel using the
+ * specified number of threads, unless the upper bound on the number
+ * bytes needing to be compressed is less than a heuristic value. */
+ if (out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
+
+ #ifdef ENABLE_MULTITHREADED_COMPRESSION
+ if (num_nonraw_bytes > max(2000000, out_chunk_size)) {
+ ret = new_parallel_chunk_compressor(out_ctype,
+ out_chunk_size,
+ num_threads, 0,
+ &ctx.compressor);
+ if (ret > 0) {
+ WARNING("Couldn't create parallel chunk compressor: %"TS".\n"
+ " Falling back to single-threaded compression.",
+ wimlib_get_error_string(ret));
+ }
+ }
+ #endif
+
+ if (ctx.compressor == NULL) {
+ ret = new_serial_chunk_compressor(out_ctype, out_chunk_size,
+ &ctx.compressor);
+ if (ret)
+ goto out_destroy_context;
+ }
+ }
+
+ if (ctx.compressor)
+ ctx.progress_data.progress.write_streams.num_threads = ctx.compressor->num_threads;
+ else
+ ctx.progress_data.progress.write_streams.num_threads = 1;
+
+ INIT_LIST_HEAD(&ctx.blobs_being_compressed);
+ INIT_LIST_HEAD(&ctx.blobs_in_solid_resource);
+
+ ret = call_progress(ctx.progress_data.progfunc,
+ WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
+ &ctx.progress_data.progress,
+ ctx.progress_data.progctx);
+ if (ret)
+ goto out_destroy_context;
+
+ if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
+ ret = begin_write_resource(&ctx, num_nonraw_bytes);
+ if (ret)
+ goto out_destroy_context;
+ }
+
+ /* Read the list of blobs needing to be compressed, using the specified
+ * callbacks to execute processing of the data. */
+
+ struct read_blob_callbacks cbs = {
+ .begin_blob = write_blob_begin_read,
+ .consume_chunk = write_blob_process_chunk,
+ .end_blob = write_blob_end_read,
+ .ctx = &ctx,
+ };
+
+ ret = read_blob_list(blob_list,
+ offsetof(struct blob_descriptor, write_blobs_list),
+ &cbs,
+ BLOB_LIST_ALREADY_SORTED |
+ VERIFY_BLOB_HASHES |
+ COMPUTE_MISSING_BLOB_HASHES);
+
+ if (ret)
+ goto out_destroy_context;
+
+ ret = finish_remaining_chunks(&ctx);
+ if (ret)
+ goto out_destroy_context;
+
+ if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
+ struct wim_reshdr reshdr;
+ struct blob_descriptor *blob;
+ u64 offset_in_res;
+
+ ret = end_write_resource(&ctx, &reshdr);
+ if (ret)
+ goto out_destroy_context;
+
+ offset_in_res = 0;
+ list_for_each_entry(blob, &ctx.blobs_in_solid_resource, write_blobs_list) {
+ blob->out_reshdr.size_in_wim = blob->size;
+ blob->out_reshdr.flags = reshdr_flags_for_blob(blob) |
+ WIM_RESHDR_FLAG_SOLID;
+ blob->out_reshdr.uncompressed_size = 0;
+ blob->out_reshdr.offset_in_wim = offset_in_res;
+ blob->out_res_offset_in_wim = reshdr.offset_in_wim;
+ blob->out_res_size_in_wim = reshdr.size_in_wim;
+ blob->out_res_uncompressed_size = reshdr.uncompressed_size;
+ offset_in_res += blob->size;
+ }
+ wimlib_assert(offset_in_res == reshdr.uncompressed_size);
+ }
+
+out_destroy_context:
+ FREE(ctx.chunk_csizes);
+ if (ctx.compressor)
+ ctx.compressor->destroy(ctx.compressor);
+ return ret;
+}
+
+
+static int
+write_file_data_blobs(WIMStruct *wim,
+ struct list_head *blob_list,
+ int write_flags,
+ unsigned num_threads,
+ struct filter_context *filter_ctx)
+{
+ int out_ctype;
+ u32 out_chunk_size;
+ int write_resource_flags;
+
+ write_resource_flags = write_flags_to_resource_flags(write_flags);
+
+ if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
+ out_chunk_size = wim->out_solid_chunk_size;
+ out_ctype = wim->out_solid_compression_type;
+ } else {
+ out_chunk_size = wim->out_chunk_size;
+ out_ctype = wim->out_compression_type;
+ }
+
+ return write_blob_list(blob_list,
+ &wim->out_fd,
+ write_resource_flags,
+ out_ctype,
+ out_chunk_size,
+ num_threads,
+ wim->blob_table,
+ filter_ctx,
+ wim->progfunc,
+ wim->progctx);
+}
+
+/* Write the contents of the specified blob as a WIM resource. */
+static int
+write_wim_resource(struct blob_descriptor *blob,
+ struct filedes *out_fd,
+ int out_ctype,
+ u32 out_chunk_size,
+ int write_resource_flags)
+{
+ LIST_HEAD(blob_list);
+ list_add(&blob->write_blobs_list, &blob_list);
+ blob->will_be_in_output_wim = 1;
+ return write_blob_list(&blob_list,
+ out_fd,
+ write_resource_flags & ~WRITE_RESOURCE_FLAG_SOLID,
+ out_ctype,
+ out_chunk_size,
+ 1,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+}
+
+/* Write the contents of the specified buffer as a WIM resource. */
+int
+write_wim_resource_from_buffer(const void *buf,
+ size_t buf_size,
+ bool is_metadata,
+ struct filedes *out_fd,
+ int out_ctype,
+ u32 out_chunk_size,
+ struct wim_reshdr *out_reshdr,
+ u8 *hash_ret,
+ int write_resource_flags)
+{
+ int ret;
+ struct blob_descriptor blob;
+
+ if (unlikely(buf_size == 0)) {
+ zero_reshdr(out_reshdr);
+ if (hash_ret)
+ copy_hash(hash_ret, zero_hash);
+ return 0;
+ }
+
+ blob_set_is_located_in_attached_buffer(&blob, (void *)buf, buf_size);
+ sha1_buffer(buf, buf_size, blob.hash);
+ blob.unhashed = 0;
+ blob.is_metadata = is_metadata;
+
+ ret = write_wim_resource(&blob, out_fd, out_ctype, out_chunk_size,
+ write_resource_flags);
+ if (ret)
+ return ret;
+
+ copy_reshdr(out_reshdr, &blob.out_reshdr);
+
+ if (hash_ret)
+ copy_hash(hash_ret, blob.hash);
+ return 0;
+}
+
+struct blob_size_table {
+ struct hlist_head *array;
+ size_t num_entries;
+ size_t capacity;
+};
+
+static int
+init_blob_size_table(struct blob_size_table *tab, size_t capacity)
+{
+ tab->array = CALLOC(capacity, sizeof(tab->array[0]));
+ if (tab->array == NULL)
+ return WIMLIB_ERR_NOMEM;
+ tab->num_entries = 0;
+ tab->capacity = capacity;
+ return 0;
+}
+
+static void
+destroy_blob_size_table(struct blob_size_table *tab)
+{
+ FREE(tab->array);
+}
+
+static int
+blob_size_table_insert(struct blob_descriptor *blob, void *_tab)
+{
+ struct blob_size_table *tab = _tab;
+ size_t pos;
+ struct blob_descriptor *same_size_blob;
+
+ pos = hash_u64(blob->size) % tab->capacity;
+ blob->unique_size = 1;
+ hlist_for_each_entry(same_size_blob, &tab->array[pos], hash_list_2) {
+ if (same_size_blob->size == blob->size) {
+ blob->unique_size = 0;
+ same_size_blob->unique_size = 0;
+ break;
+ }
+ }
+
+ hlist_add_head(&blob->hash_list_2, &tab->array[pos]);
+ tab->num_entries++;
+ return 0;
+}
+
+struct find_blobs_ctx {
+ WIMStruct *wim;
+ int write_flags;
+ struct list_head blob_list;
+ struct blob_size_table blob_size_tab;
+};
+
+static void
+reference_blob_for_write(struct blob_descriptor *blob,
+ struct list_head *blob_list, u32 nref)
+{
+ if (!blob->will_be_in_output_wim) {
+ blob->out_refcnt = 0;
+ list_add_tail(&blob->write_blobs_list, blob_list);
+ blob->will_be_in_output_wim = 1;
+ }
+ blob->out_refcnt += nref;
+}
+
+static int
+fully_reference_blob_for_write(struct blob_descriptor *blob, void *_blob_list)
+{
+ struct list_head *blob_list = _blob_list;
+ blob->will_be_in_output_wim = 0;
+ reference_blob_for_write(blob, blob_list, blob->refcnt);
+ return 0;
+}
+
+static int
+inode_find_blobs_to_reference(const struct wim_inode *inode,
+ const struct blob_table *table,
+ struct list_head *blob_list)
+{
+ wimlib_assert(inode->i_nlink > 0);
+
+ for (unsigned i = 0; i < inode->i_num_streams; i++) {
+ struct blob_descriptor *blob;
+ const u8 *hash;
+
+ blob = stream_blob(&inode->i_streams[i], table);
+ if (blob) {
+ reference_blob_for_write(blob, blob_list, inode->i_nlink);
+ } else {
+ hash = stream_hash(&inode->i_streams[i]);
+ if (!is_zero_hash(hash))
+ return blob_not_found_error(inode, hash);
+ }
+ }
+ return 0;
+}
+
+static int
+do_blob_set_not_in_output_wim(struct blob_descriptor *blob, void *_ignore)
+{
+ blob->will_be_in_output_wim = 0;
+ return 0;
+}
+
+static int
+image_find_blobs_to_reference(WIMStruct *wim)
+{
+ struct wim_image_metadata *imd;
+ struct wim_inode *inode;
+ struct blob_descriptor *blob;
+ struct list_head *blob_list;
+ int ret;
+
+ imd = wim_get_current_image_metadata(wim);
+
+ image_for_each_unhashed_blob(blob, imd)
+ blob->will_be_in_output_wim = 0;
+
+ blob_list = wim->private;
+ image_for_each_inode(inode, imd) {
+ ret = inode_find_blobs_to_reference(inode,
+ wim->blob_table,
+ blob_list);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int
+prepare_unfiltered_list_of_blobs_in_output_wim(WIMStruct *wim,
+ int image,
+ int blobs_ok,
+ struct list_head *blob_list_ret)
+{
+ int ret;
+
+ INIT_LIST_HEAD(blob_list_ret);
+
+ if (blobs_ok && (image == WIMLIB_ALL_IMAGES ||
+ (image == 1 && wim->hdr.image_count == 1)))
+ {
+ /* Fast case: Assume that all blobs are being written and that
+ * the reference counts are correct. */
+ struct blob_descriptor *blob;
+ struct wim_image_metadata *imd;
+ unsigned i;
+
+ for_blob_in_table(wim->blob_table,
+ fully_reference_blob_for_write,
+ blob_list_ret);
+
+ for (i = 0; i < wim->hdr.image_count; i++) {
+ imd = wim->image_metadata[i];
+ image_for_each_unhashed_blob(blob, imd)
+ fully_reference_blob_for_write(blob, blob_list_ret);
+ }
+ } else {
+ /* Slow case: Walk through the images being written and
+ * determine the blobs referenced. */
+ for_blob_in_table(wim->blob_table,
+ do_blob_set_not_in_output_wim, NULL);
+ wim->private = blob_list_ret;
+ ret = for_image(wim, image, image_find_blobs_to_reference);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+struct insert_other_if_hard_filtered_ctx {
+ struct blob_size_table *tab;
+ struct filter_context *filter_ctx;
+};
+
+static int
+insert_other_if_hard_filtered(struct blob_descriptor *blob, void *_ctx)
+{
+ struct insert_other_if_hard_filtered_ctx *ctx = _ctx;
+
+ if (!blob->will_be_in_output_wim &&
+ blob_hard_filtered(blob, ctx->filter_ctx))
+ blob_size_table_insert(blob, ctx->tab);
+ return 0;
+}
+
+static int
+determine_blob_size_uniquity(struct list_head *blob_list,
+ struct blob_table *lt,
+ struct filter_context *filter_ctx)
+{
+ int ret;
+ struct blob_size_table tab;
+ struct blob_descriptor *blob;
+
+ ret = init_blob_size_table(&tab, 9001);
+ if (ret)
+ return ret;
+
+ if (may_hard_filter_blobs(filter_ctx)) {
+ struct insert_other_if_hard_filtered_ctx ctx = {
+ .tab = &tab,
+ .filter_ctx = filter_ctx,
+ };
+ for_blob_in_table(lt, insert_other_if_hard_filtered, &ctx);