+/*
+ * Write a list of blobs to the output WIM file.
+ *
+ * @blob_list
+ * The list of blobs to write, specified by a list of 'struct blob_descriptor' linked
+ * by the 'write_blobs_list' member.
+ *
+ * @out_fd
+ * The file descriptor, opened for writing, to which to write the blobs.
+ *
+ * @write_resource_flags
+ * Flags to modify how the blobs are written:
+ *
+ * WRITE_RESOURCE_FLAG_RECOMPRESS:
+ * Force compression of all resources, even if they could otherwise
+ * be re-used by copying the raw data, due to being located in a WIM
+ * file with compatible compression parameters.
+ *
+ * WRITE_RESOURCE_FLAG_PIPABLE:
+ * Write the resources in the wimlib-specific pipable format, and
+ * furthermore do so in such a way that no seeking backwards in
+ * @out_fd will be performed (so it may be a pipe).
+ *
+ * WRITE_RESOURCE_FLAG_SOLID:
+ * Combine all the blobs into a single resource rather than writing
+ * them in separate resources. This flag is only valid if the WIM
+ * version number has been, or will be, set to WIM_VERSION_SOLID.
+ * This flag may not be combined with WRITE_RESOURCE_FLAG_PIPABLE.
+ *
+ * @out_ctype
+ * Compression format to use in the output resources, specified as one of
+ * the WIMLIB_COMPRESSION_TYPE_* constants. WIMLIB_COMPRESSION_TYPE_NONE
+ * is allowed.
+ *
+ * @out_chunk_size
+ * Compression chunk size to use in the output resources. It must be a
+ * valid chunk size for the specified compression format @out_ctype, unless
+ * @out_ctype is WIMLIB_COMPRESSION_TYPE_NONE, in which case this parameter
+ * is ignored.
+ *
+ * @num_threads
+ * Number of threads to use to compress data. If 0, a default number of
+ * threads will be chosen. The number of threads still may be decreased
+ * from the specified value if insufficient memory is detected.
+ *
+ * @blob_table
+ * If on-the-fly deduplication of unhashed blobs is desired, this parameter
+ * must be pointer to the blob table for the WIMStruct on whose behalf the
+ * blobs are being written. Otherwise, this parameter can be NULL.
+ *
+ * @filter_ctx
+ * If on-the-fly deduplication of unhashed blobs is desired, this parameter
+ * can be a pointer to a context for blob filtering used to detect whether
+ * the duplicate blob has been hard-filtered or not. If no blobs are
+ * hard-filtered or no blobs are unhashed, this parameter can be NULL.
+ *
+ * This function will write the blobs in @blob_list to resources in
+ * consecutive positions in the output WIM file, or to a single solid resource
+ * if WRITE_RESOURCE_FLAG_SOLID was specified in @write_resource_flags. In both
+ * cases, the @out_reshdr of the `struct blob_descriptor' for each blob written will be
+ * updated to specify its location, size, and flags in the output WIM. In the
+ * solid resource case, WIM_RESHDR_FLAG_SOLID will be set in the @flags field of
+ * each @out_reshdr, and furthermore @out_res_offset_in_wim and
+ * @out_res_size_in_wim of each @out_reshdr will be set to the offset and size,
+ * respectively, in the output WIM of the solid resource containing the
+ * corresponding blob.
+ *
+ * Each of the blobs to write may be in any location supported by the
+ * resource-handling code (specifically, read_blob_list()), such as the contents
+ * of external file that has been logically added to the output WIM, or a blob
+ * in another WIM file that has been imported, or even a blob in the "same" WIM
+ * file of which a modified copy is being written. In the case that a blob is
+ * already in a WIM file and uses compatible compression parameters, by default
+ * this function will re-use the raw data instead of decompressing it, then
+ * recompressing it; however, with WRITE_RESOURCE_FLAG_RECOMPRESS
+ * specified in @write_resource_flags, this is not done.
+ *
+ * As a further requirement, this function requires that the
+ * @will_be_in_output_wim member be set to 1 on all blobs in @blob_list as well
+ * as any other blobs not in @blob_list that will be in the output WIM file, but
+ * set to 0 on any other blobs in the output WIM's blob table or sharing a solid
+ * resource with a blob in @blob_list. Still furthermore, if on-the-fly
+ * deduplication of blobs is possible, then all blobs in @blob_list must also be
+ * linked by @blob_table_list along with any other blobs that have
+ * @will_be_in_output_wim set.
+ *
+ * This function handles on-the-fly deduplication of blobs for which SHA-1
+ * message digests have not yet been calculated. Such blobs may or may not need
+ * to be written. If @blob_table is non-NULL, then each blob in @blob_list that
+ * has @unhashed set but not @unique_size set is checksummed immediately before
+ * it would otherwise be read for writing in order to determine if it is
+ * identical to another blob already being written or one that would be filtered
+ * out of the output WIM using blob_filtered() with the context @filter_ctx.
+ * Each such duplicate blob will be removed from @blob_list, its reference count
+ * transferred to the pre-existing duplicate blob, its memory freed, and will
+ * not be written. Alternatively, if a blob in @blob_list is a duplicate with
+ * any blob in @blob_table that has not been marked for writing or would not be
+ * hard-filtered, it is freed and the pre-existing duplicate is written instead,
+ * taking ownership of the reference count and slot in the @blob_table_list.
+ *
+ * Returns 0 if every blob was either written successfully or did not need to be
+ * written; otherwise returns a non-zero error code.
+ */
+static int
+write_blob_list(struct list_head *blob_list,
+ struct filedes *out_fd,
+ int write_resource_flags,
+ int out_ctype,
+ u32 out_chunk_size,
+ unsigned num_threads,
+ struct blob_table *blob_table,
+ struct filter_context *filter_ctx,
+ wimlib_progress_func_t progfunc,
+ void *progctx)
+{
+ int ret;
+ struct write_blobs_ctx ctx;
+ struct list_head raw_copy_blobs;
+ u64 num_nonraw_bytes;
+
+ wimlib_assert((write_resource_flags &
+ (WRITE_RESOURCE_FLAG_SOLID |
+ WRITE_RESOURCE_FLAG_PIPABLE)) !=
+ (WRITE_RESOURCE_FLAG_SOLID |
+ WRITE_RESOURCE_FLAG_PIPABLE));
+
+ validate_blob_list(blob_list);
+
+ if (list_empty(blob_list))
+ return 0;
+
+ /* If needed, set auxiliary information so that we can detect when the
+ * library has finished using each external file. */
+ if (unlikely(write_resource_flags & WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE))
+ init_done_with_file_info(blob_list);
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ ctx.out_fd = out_fd;
+ ctx.blob_table = blob_table;
+ ctx.out_ctype = out_ctype;
+ ctx.out_chunk_size = out_chunk_size;
+ ctx.write_resource_flags = write_resource_flags;
+ ctx.filter_ctx = filter_ctx;
+
+ /*
+ * We normally sort the blobs to write by a "sequential" order that is
+ * optimized for reading. But when using solid compression, we instead
+ * sort the blobs by file extension and file name (when applicable; and
+ * we don't do this for blobs from solid resources) so that similar
+ * files are grouped together, which improves the compression ratio.
+ * This is somewhat of a hack since a blob does not necessarily
+ * correspond one-to-one with a filename, nor is there any guarantee
+ * that two files with similar names or extensions are actually similar
+ * in content. A potential TODO is to sort the blobs based on some
+ * measure of similarity of their actual contents.
+ */
+
+ ret = sort_blob_list_by_sequential_order(blob_list,
+ offsetof(struct blob_descriptor,
+ write_blobs_list));
+ if (ret)
+ return ret;
+
+ ret = compute_blob_list_stats(blob_list, &ctx);
+ if (ret)
+ return ret;
+
+ if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID_SORT) {
+ ret = sort_blob_list_for_solid_compression(blob_list);
+ if (unlikely(ret))
+ WARNING("Failed to sort blobs for solid compression. Continuing anyways.");
+ }
+
+ ctx.progress_data.progfunc = progfunc;
+ ctx.progress_data.progctx = progctx;
+
+ num_nonraw_bytes = find_raw_copy_blobs(blob_list, write_resource_flags,
+ out_ctype, out_chunk_size,
+ &raw_copy_blobs);
+
+ /* Unless no data needs to be compressed, allocate a chunk_compressor to
+ * do compression. There are serial and parallel implementations of the
+ * chunk_compressor interface. We default to parallel using the
+ * specified number of threads, unless the upper bound on the number
+ * bytes needing to be compressed is less than a heuristic value. */
+ if (num_nonraw_bytes != 0 && out_ctype != WIMLIB_COMPRESSION_TYPE_NONE) {
+ #ifdef ENABLE_MULTITHREADED_COMPRESSION
+ if (num_nonraw_bytes > max(2000000, out_chunk_size)) {
+ ret = new_parallel_chunk_compressor(out_ctype,
+ out_chunk_size,
+ num_threads, 0,
+ &ctx.compressor);
+ if (ret > 0) {
+ WARNING("Couldn't create parallel chunk compressor: %"TS".\n"
+ " Falling back to single-threaded compression.",
+ wimlib_get_error_string(ret));
+ }
+ }
+ #endif
+
+ if (ctx.compressor == NULL) {
+ ret = new_serial_chunk_compressor(out_ctype, out_chunk_size,
+ &ctx.compressor);
+ if (ret)
+ goto out_destroy_context;
+ }
+ }
+
+ if (ctx.compressor)
+ ctx.progress_data.progress.write_streams.num_threads = ctx.compressor->num_threads;
+ else
+ ctx.progress_data.progress.write_streams.num_threads = 1;
+
+ ret = call_progress(ctx.progress_data.progfunc,
+ WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
+ &ctx.progress_data.progress,
+ ctx.progress_data.progctx);
+ if (ret)
+ goto out_destroy_context;
+
+ /* Copy any compressed resources for which the raw data can be reused
+ * without decompression. */
+ ret = write_raw_copy_resources(&raw_copy_blobs, ctx.out_fd,
+ &ctx.progress_data);
+
+ if (ret || num_nonraw_bytes == 0)
+ goto out_destroy_context;
+
+ INIT_LIST_HEAD(&ctx.blobs_being_compressed);
+
+ if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
+
+ INIT_LIST_HEAD(&ctx.blobs_in_solid_resource);
+
+ ret = begin_write_resource(&ctx, num_nonraw_bytes);
+ if (ret)
+ goto out_destroy_context;
+ }
+
+ /* Read the list of blobs needing to be compressed, using the specified
+ * callbacks to execute processing of the data. */
+
+ struct read_blob_callbacks cbs = {
+ .begin_blob = write_blob_begin_read,
+ .consume_chunk = write_blob_process_chunk,
+ .end_blob = write_blob_end_read,
+ .ctx = &ctx,
+ };
+
+ ret = read_blob_list(blob_list,
+ offsetof(struct blob_descriptor, write_blobs_list),
+ &cbs,
+ BLOB_LIST_ALREADY_SORTED |
+ VERIFY_BLOB_HASHES |
+ COMPUTE_MISSING_BLOB_HASHES);
+
+ if (ret)
+ goto out_destroy_context;
+
+ ret = finish_remaining_chunks(&ctx);
+ if (ret)
+ goto out_destroy_context;
+
+ if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
+ struct wim_reshdr reshdr;
+ struct blob_descriptor *blob;
+ u64 offset_in_res;
+
+ ret = end_write_resource(&ctx, &reshdr);
+ if (ret)
+ goto out_destroy_context;
+
+ offset_in_res = 0;
+ list_for_each_entry(blob, &ctx.blobs_in_solid_resource, write_blobs_list) {
+ blob->out_reshdr.size_in_wim = blob->size;
+ blob->out_reshdr.flags = reshdr_flags_for_blob(blob) |
+ WIM_RESHDR_FLAG_SOLID;
+ blob->out_reshdr.uncompressed_size = 0;
+ blob->out_reshdr.offset_in_wim = offset_in_res;
+ blob->out_res_offset_in_wim = reshdr.offset_in_wim;
+ blob->out_res_size_in_wim = reshdr.size_in_wim;
+ blob->out_res_uncompressed_size = reshdr.uncompressed_size;
+ offset_in_res += blob->size;
+ }
+ wimlib_assert(offset_in_res == reshdr.uncompressed_size);
+ }
+
+out_destroy_context:
+ FREE(ctx.chunk_csizes);
+ if (ctx.compressor)
+ ctx.compressor->destroy(ctx.compressor);
+ return ret;
+}
+
+
+static int
+write_file_data_blobs(WIMStruct *wim,
+ struct list_head *blob_list,
+ int write_flags,
+ unsigned num_threads,
+ struct filter_context *filter_ctx)
+{
+ int out_ctype;
+ u32 out_chunk_size;
+ int write_resource_flags;
+
+ write_resource_flags = write_flags_to_resource_flags(write_flags);
+
+ if (write_resource_flags & WRITE_RESOURCE_FLAG_SOLID) {
+ out_chunk_size = wim->out_solid_chunk_size;
+ out_ctype = wim->out_solid_compression_type;
+ } else {
+ out_chunk_size = wim->out_chunk_size;
+ out_ctype = wim->out_compression_type;
+ }
+
+ return write_blob_list(blob_list,
+ &wim->out_fd,
+ write_resource_flags,
+ out_ctype,
+ out_chunk_size,
+ num_threads,
+ wim->blob_table,
+ filter_ctx,
+ wim->progfunc,
+ wim->progctx);
+}
+
+/* Write the contents of the specified blob as a WIM resource. */
+static int
+write_wim_resource(struct blob_descriptor *blob,
+ struct filedes *out_fd,
+ int out_ctype,
+ u32 out_chunk_size,
+ int write_resource_flags)
+{
+ LIST_HEAD(blob_list);
+ list_add(&blob->write_blobs_list, &blob_list);
+ blob->will_be_in_output_wim = 1;
+ return write_blob_list(&blob_list,
+ out_fd,
+ write_resource_flags & ~WRITE_RESOURCE_FLAG_SOLID,
+ out_ctype,
+ out_chunk_size,
+ 1,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+}
+
+/* Write the contents of the specified buffer as a WIM resource. */
+int
+write_wim_resource_from_buffer(const void *buf,
+ size_t buf_size,
+ bool is_metadata,
+ struct filedes *out_fd,
+ int out_ctype,
+ u32 out_chunk_size,
+ struct wim_reshdr *out_reshdr,
+ u8 *hash_ret,
+ int write_resource_flags)
+{
+ int ret;
+ struct blob_descriptor blob;
+
+ if (unlikely(buf_size == 0)) {
+ zero_reshdr(out_reshdr);
+ if (hash_ret)
+ copy_hash(hash_ret, zero_hash);
+ return 0;
+ }
+
+ blob_set_is_located_in_attached_buffer(&blob, (void *)buf, buf_size);
+ sha1_buffer(buf, buf_size, blob.hash);
+ blob.unhashed = 0;
+ blob.is_metadata = is_metadata;
+
+ ret = write_wim_resource(&blob, out_fd, out_ctype, out_chunk_size,
+ write_resource_flags);
+ if (ret)
+ return ret;
+
+ copy_reshdr(out_reshdr, &blob.out_reshdr);
+
+ if (hash_ret)
+ copy_hash(hash_ret, blob.hash);
+ return 0;
+}
+
+struct blob_size_table {
+ struct hlist_head *array;
+ size_t num_entries;
+ size_t capacity;
+};
+
+static int
+init_blob_size_table(struct blob_size_table *tab, size_t capacity)
+{
+ tab->array = CALLOC(capacity, sizeof(tab->array[0]));
+ if (tab->array == NULL)
+ return WIMLIB_ERR_NOMEM;
+ tab->num_entries = 0;
+ tab->capacity = capacity;
+ return 0;
+}
+
+static void
+destroy_blob_size_table(struct blob_size_table *tab)
+{
+ FREE(tab->array);
+}
+
+static int
+blob_size_table_insert(struct blob_descriptor *blob, void *_tab)
+{
+ struct blob_size_table *tab = _tab;
+ size_t pos;
+ struct blob_descriptor *same_size_blob;
+
+ pos = hash_u64(blob->size) % tab->capacity;
+ blob->unique_size = 1;
+ hlist_for_each_entry(same_size_blob, &tab->array[pos], hash_list_2) {
+ if (same_size_blob->size == blob->size) {
+ blob->unique_size = 0;
+ same_size_blob->unique_size = 0;
+ break;
+ }
+ }
+
+ hlist_add_head(&blob->hash_list_2, &tab->array[pos]);
+ tab->num_entries++;
+ return 0;
+}
+
+struct find_blobs_ctx {
+ WIMStruct *wim;
+ int write_flags;
+ struct list_head blob_list;
+ struct blob_size_table blob_size_tab;
+};
+
+static void
+reference_blob_for_write(struct blob_descriptor *blob,
+ struct list_head *blob_list, u32 nref)
+{
+ if (!blob->will_be_in_output_wim) {
+ blob->out_refcnt = 0;
+ list_add_tail(&blob->write_blobs_list, blob_list);
+ blob->will_be_in_output_wim = 1;
+ }
+ blob->out_refcnt += nref;
+}
+
+static int
+fully_reference_blob_for_write(struct blob_descriptor *blob, void *_blob_list)
+{
+ struct list_head *blob_list = _blob_list;
+ blob->will_be_in_output_wim = 0;
+ reference_blob_for_write(blob, blob_list, blob->refcnt);
+ return 0;
+}
+
+static int
+inode_find_blobs_to_reference(const struct wim_inode *inode,
+ const struct blob_table *table,
+ struct list_head *blob_list)
+{
+ wimlib_assert(inode->i_nlink > 0);
+
+ for (unsigned i = 0; i < inode->i_num_streams; i++) {
+ struct blob_descriptor *blob;
+ const u8 *hash;
+
+ blob = stream_blob(&inode->i_streams[i], table);
+ if (blob) {
+ reference_blob_for_write(blob, blob_list, inode->i_nlink);
+ } else {
+ hash = stream_hash(&inode->i_streams[i]);
+ if (!is_zero_hash(hash))
+ return blob_not_found_error(inode, hash);
+ }
+ }
+ return 0;
+}
+
+static int
+do_blob_set_not_in_output_wim(struct blob_descriptor *blob, void *_ignore)
+{
+ blob->will_be_in_output_wim = 0;
+ return 0;
+}
+
+static int
+image_find_blobs_to_reference(WIMStruct *wim)
+{
+ struct wim_image_metadata *imd;
+ struct wim_inode *inode;
+ struct blob_descriptor *blob;
+ struct list_head *blob_list;
+ int ret;
+
+ imd = wim_get_current_image_metadata(wim);
+
+ image_for_each_unhashed_blob(blob, imd)
+ blob->will_be_in_output_wim = 0;
+
+ blob_list = wim->private;
+ image_for_each_inode(inode, imd) {
+ ret = inode_find_blobs_to_reference(inode,
+ wim->blob_table,
+ blob_list);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int
+prepare_unfiltered_list_of_blobs_in_output_wim(WIMStruct *wim,
+ int image,
+ int blobs_ok,
+ struct list_head *blob_list_ret)
+{
+ int ret;
+
+ INIT_LIST_HEAD(blob_list_ret);
+
+ if (blobs_ok && (image == WIMLIB_ALL_IMAGES ||
+ (image == 1 && wim->hdr.image_count == 1)))
+ {
+ /* Fast case: Assume that all blobs are being written and that
+ * the reference counts are correct. */
+ struct blob_descriptor *blob;
+ struct wim_image_metadata *imd;
+ unsigned i;
+
+ for_blob_in_table(wim->blob_table,
+ fully_reference_blob_for_write,
+ blob_list_ret);
+
+ for (i = 0; i < wim->hdr.image_count; i++) {
+ imd = wim->image_metadata[i];
+ image_for_each_unhashed_blob(blob, imd)
+ fully_reference_blob_for_write(blob, blob_list_ret);
+ }
+ } else {
+ /* Slow case: Walk through the images being written and
+ * determine the blobs referenced. */
+ for_blob_in_table(wim->blob_table,
+ do_blob_set_not_in_output_wim, NULL);
+ wim->private = blob_list_ret;
+ ret = for_image(wim, image, image_find_blobs_to_reference);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+struct insert_other_if_hard_filtered_ctx {
+ struct blob_size_table *tab;
+ struct filter_context *filter_ctx;
+};
+
+static int
+insert_other_if_hard_filtered(struct blob_descriptor *blob, void *_ctx)
+{
+ struct insert_other_if_hard_filtered_ctx *ctx = _ctx;
+
+ if (!blob->will_be_in_output_wim &&
+ blob_hard_filtered(blob, ctx->filter_ctx))
+ blob_size_table_insert(blob, ctx->tab);
+ return 0;
+}
+
+static int
+determine_blob_size_uniquity(struct list_head *blob_list,
+ struct blob_table *lt,
+ struct filter_context *filter_ctx)
+{
+ int ret;
+ struct blob_size_table tab;
+ struct blob_descriptor *blob;
+
+ ret = init_blob_size_table(&tab, 9001);
+ if (ret)
+ return ret;
+
+ if (may_hard_filter_blobs(filter_ctx)) {
+ struct insert_other_if_hard_filtered_ctx ctx = {
+ .tab = &tab,
+ .filter_ctx = filter_ctx,
+ };
+ for_blob_in_table(lt, insert_other_if_hard_filtered, &ctx);
+ }
+
+ list_for_each_entry(blob, blob_list, write_blobs_list)
+ blob_size_table_insert(blob, &tab);
+
+ destroy_blob_size_table(&tab);
+ return 0;
+}
+
+static void
+filter_blob_list_for_write(struct list_head *blob_list,
+ struct filter_context *filter_ctx)
+{
+ struct blob_descriptor *blob, *tmp;
+
+ list_for_each_entry_safe(blob, tmp, blob_list, write_blobs_list) {
+ int status = blob_filtered(blob, filter_ctx);
+
+ if (status == 0) {
+ /* Not filtered. */
+ continue;
+ } else {
+ if (status > 0) {
+ /* Soft filtered. */
+ } else {
+ /* Hard filtered. */
+ blob->will_be_in_output_wim = 0;
+ list_del(&blob->blob_table_list);
+ }
+ list_del(&blob->write_blobs_list);
+ }
+ }
+}
+
+/*
+ * prepare_blob_list_for_write() -
+ *
+ * Prepare the list of blobs to write for writing a WIM containing the specified
+ * image(s) with the specified write flags.
+ *
+ * @wim
+ * The WIMStruct on whose behalf the write is occurring.
+ *
+ * @image
+ * Image(s) from the WIM to write; may be WIMLIB_ALL_IMAGES.
+ *
+ * @write_flags
+ * WIMLIB_WRITE_FLAG_* flags for the write operation:
+ *
+ * STREAMS_OK: For writes of all images, assume that all blobs in the blob
+ * table of @wim and the per-image lists of unhashed blobs should be taken
+ * as-is, and image metadata should not be searched for references. This
+ * does not exclude filtering with APPEND and SKIP_EXTERNAL_WIMS, below.
+ *
+ * APPEND: Blobs already present in @wim shall not be returned in
+ * @blob_list_ret.
+ *
+ * SKIP_EXTERNAL_WIMS: Blobs already present in a WIM file, but not @wim,
+ * shall be returned in neither @blob_list_ret nor @blob_table_list_ret.
+ *
+ * @blob_list_ret
+ * List of blobs, linked by write_blobs_list, that need to be written will
+ * be returned here.
+ *
+ * Note that this function assumes that unhashed blobs will be written; it
+ * does not take into account that they may become duplicates when actually
+ * hashed.
+ *
+ * @blob_table_list_ret
+ * List of blobs, linked by blob_table_list, that need to be included in
+ * the WIM's blob table will be returned here. This will be a superset of
+ * the blobs in @blob_list_ret.
+ *
+ * This list will be a proper superset of @blob_list_ret if and only if
+ * WIMLIB_WRITE_FLAG_APPEND was specified in @write_flags and some of the
+ * blobs that would otherwise need to be written were already located in
+ * the WIM file.
+ *
+ * All blobs in this list will have @out_refcnt set to the number of
+ * references to the blob in the output WIM. If
+ * WIMLIB_WRITE_FLAG_STREAMS_OK was specified in @write_flags, @out_refcnt
+ * may be as low as 0.
+ *
+ * @filter_ctx_ret
+ * A context for queries of blob filter status with blob_filtered() is
+ * returned in this location.
+ *
+ * In addition, @will_be_in_output_wim will be set to 1 in all blobs inserted
+ * into @blob_table_list_ret and to 0 in all blobs in the blob table of @wim not
+ * inserted into @blob_table_list_ret.
+ *
+ * Still furthermore, @unique_size will be set to 1 on all blobs in
+ * @blob_list_ret that have unique size among all blobs in @blob_list_ret and
+ * among all blobs in the blob table of @wim that are ineligible for being
+ * written due to filtering.
+ *
+ * Returns 0 on success; nonzero on read error, memory allocation error, or
+ * otherwise.
+ */
+static int
+prepare_blob_list_for_write(WIMStruct *wim, int image,
+ int write_flags,
+ struct list_head *blob_list_ret,
+ struct list_head *blob_table_list_ret,
+ struct filter_context *filter_ctx_ret)
+{
+ int ret;
+ struct blob_descriptor *blob;
+
+ filter_ctx_ret->write_flags = write_flags;
+ filter_ctx_ret->wim = wim;
+
+ ret = prepare_unfiltered_list_of_blobs_in_output_wim(
+ wim,
+ image,
+ write_flags & WIMLIB_WRITE_FLAG_STREAMS_OK,
+ blob_list_ret);
+ if (ret)
+ return ret;
+
+ INIT_LIST_HEAD(blob_table_list_ret);
+ list_for_each_entry(blob, blob_list_ret, write_blobs_list)
+ list_add_tail(&blob->blob_table_list, blob_table_list_ret);
+
+ ret = determine_blob_size_uniquity(blob_list_ret, wim->blob_table,
+ filter_ctx_ret);
+ if (ret)
+ return ret;
+
+ if (may_filter_blobs(filter_ctx_ret))
+ filter_blob_list_for_write(blob_list_ret, filter_ctx_ret);
+
+ return 0;
+}
+
+static int
+write_file_data(WIMStruct *wim, int image, int write_flags,
+ unsigned num_threads,
+ struct list_head *blob_list_override,
+ struct list_head *blob_table_list_ret)
+{
+ int ret;
+ struct list_head _blob_list;
+ struct list_head *blob_list;
+ struct blob_descriptor *blob;
+ struct filter_context _filter_ctx;
+ struct filter_context *filter_ctx;
+
+ if (blob_list_override == NULL) {
+ /* Normal case: prepare blob list from image(s) being written.
+ */
+ blob_list = &_blob_list;
+ filter_ctx = &_filter_ctx;
+ ret = prepare_blob_list_for_write(wim, image, write_flags,
+ blob_list,
+ blob_table_list_ret,
+ filter_ctx);
+ if (ret)
+ return ret;
+ } else {
+ /* Currently only as a result of wimlib_split() being called:
+ * use blob list already explicitly provided. Use existing
+ * reference counts. */
+ blob_list = blob_list_override;
+ filter_ctx = NULL;
+ INIT_LIST_HEAD(blob_table_list_ret);
+ list_for_each_entry(blob, blob_list, write_blobs_list) {
+ blob->out_refcnt = blob->refcnt;
+ blob->will_be_in_output_wim = 1;
+ blob->unique_size = 0;
+ list_add_tail(&blob->blob_table_list, blob_table_list_ret);
+ }
+ }
+
+ return write_file_data_blobs(wim,
+ blob_list,
+ write_flags,
+ num_threads,
+ filter_ctx);
+}
+
+static int
+write_metadata_resources(WIMStruct *wim, int image, int write_flags)
+{
+ int ret;
+ int start_image;
+ int end_image;
+ int write_resource_flags;
+
+ if (write_flags & WIMLIB_WRITE_FLAG_NO_METADATA)
+ return 0;
+
+ write_resource_flags = write_flags_to_resource_flags(write_flags);
+
+ write_resource_flags &= ~WRITE_RESOURCE_FLAG_SOLID;
+
+ ret = call_progress(wim->progfunc,
+ WIMLIB_PROGRESS_MSG_WRITE_METADATA_BEGIN,
+ NULL, wim->progctx);
+ if (ret)
+ return ret;
+
+ if (image == WIMLIB_ALL_IMAGES) {
+ start_image = 1;
+ end_image = wim->hdr.image_count;
+ } else {
+ start_image = image;
+ end_image = image;
+ }
+
+ for (int i = start_image; i <= end_image; i++) {
+ struct wim_image_metadata *imd;
+
+ imd = wim->image_metadata[i - 1];
+ if (is_image_dirty(imd)) {
+ /* The image was modified from the original, or was
+ * newly added, so we have to build and write a new
+ * metadata resource. */
+ ret = write_metadata_resource(wim, i,
+ write_resource_flags);
+ } else if (is_image_unchanged_from_wim(imd, wim) &&
+ (write_flags & (WIMLIB_WRITE_FLAG_UNSAFE_COMPACT |
+ WIMLIB_WRITE_FLAG_APPEND)))
+ {
+ /* The metadata resource is already in the WIM file.
+ * For appends, we don't need to write it at all. For
+ * compactions, we re-write existing metadata resources
+ * along with the existing file resources, not here. */
+ if (write_flags & WIMLIB_WRITE_FLAG_APPEND)
+ blob_set_out_reshdr_for_reuse(imd->metadata_blob);
+ ret = 0;
+ } else {
+ /* The metadata resource is in a WIM file other than the
+ * one being written to. We need to rewrite it,
+ * possibly compressed differently; but rebuilding the
+ * metadata itself isn't necessary. */
+ ret = write_wim_resource(imd->metadata_blob,
+ &wim->out_fd,
+ wim->out_compression_type,
+ wim->out_chunk_size,
+ write_resource_flags);
+ }
+ if (ret)
+ return ret;
+ }
+
+ return call_progress(wim->progfunc,
+ WIMLIB_PROGRESS_MSG_WRITE_METADATA_END,
+ NULL, wim->progctx);
+}
+
+static int
+open_wim_writable(WIMStruct *wim, const tchar *path, int open_flags)
+{
+ int raw_fd = topen(path, open_flags | O_BINARY, 0644);
+ if (raw_fd < 0) {
+ ERROR_WITH_ERRNO("Failed to open \"%"TS"\" for writing", path);
+ return WIMLIB_ERR_OPEN;
+ }
+ filedes_init(&wim->out_fd, raw_fd);
+ return 0;
+}
+
+static int
+close_wim_writable(WIMStruct *wim, int write_flags)
+{
+ int ret = 0;
+
+ if (!(write_flags & WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR))
+ if (filedes_valid(&wim->out_fd))
+ if (filedes_close(&wim->out_fd))
+ ret = WIMLIB_ERR_WRITE;
+ filedes_invalidate(&wim->out_fd);
+ return ret;
+}
+
+static int
+cmp_blobs_by_out_rdesc(const void *p1, const void *p2)
+{
+ const struct blob_descriptor *blob1, *blob2;
+
+ blob1 = *(const struct blob_descriptor**)p1;
+ blob2 = *(const struct blob_descriptor**)p2;
+
+ if (blob1->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
+ if (blob2->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID) {
+ if (blob1->out_res_offset_in_wim != blob2->out_res_offset_in_wim)
+ return cmp_u64(blob1->out_res_offset_in_wim,
+ blob2->out_res_offset_in_wim);
+ } else {
+ return 1;
+ }
+ } else {
+ if (blob2->out_reshdr.flags & WIM_RESHDR_FLAG_SOLID)
+ return -1;
+ }
+ return cmp_u64(blob1->out_reshdr.offset_in_wim,
+ blob2->out_reshdr.offset_in_wim);
+}
+
+static int
+write_blob_table(WIMStruct *wim, int image, int write_flags,
+ struct list_head *blob_table_list)
+{
+ int ret;
+
+ /* Set output resource metadata for blobs already present in WIM. */
+ if (write_flags & WIMLIB_WRITE_FLAG_APPEND) {
+ struct blob_descriptor *blob;
+ list_for_each_entry(blob, blob_table_list, blob_table_list) {
+ if (blob->blob_location == BLOB_IN_WIM &&
+ blob->rdesc->wim == wim)
+ {
+ blob_set_out_reshdr_for_reuse(blob);
+ }
+ }
+ }
+
+ ret = sort_blob_list(blob_table_list,
+ offsetof(struct blob_descriptor, blob_table_list),
+ cmp_blobs_by_out_rdesc);
+ if (ret)
+ return ret;
+
+ /* Add entries for metadata resources. */
+ if (!(write_flags & WIMLIB_WRITE_FLAG_NO_METADATA)) {
+ int start_image;
+ int end_image;
+
+ if (image == WIMLIB_ALL_IMAGES) {
+ start_image = 1;
+ end_image = wim->hdr.image_count;
+ } else {
+ start_image = image;
+ end_image = image;
+ }
+
+ /* Push metadata blob table entries onto the front of the list
+ * in reverse order, so that they're written in order.
+ */
+ for (int i = end_image; i >= start_image; i--) {
+ struct blob_descriptor *metadata_blob;
+
+ metadata_blob = wim->image_metadata[i - 1]->metadata_blob;
+ wimlib_assert(metadata_blob->out_reshdr.flags & WIM_RESHDR_FLAG_METADATA);
+ metadata_blob->out_refcnt = 1;
+ list_add(&metadata_blob->blob_table_list, blob_table_list);
+ }
+ }
+
+ return write_blob_table_from_blob_list(blob_table_list,
+ &wim->out_fd,
+ wim->out_hdr.part_number,
+ &wim->out_hdr.blob_table_reshdr,
+ write_flags_to_resource_flags(write_flags));
+}
+
+/*
+ * Finish writing a WIM file: write the blob table, xml data, and integrity
+ * table, then overwrite the WIM header.
+ *
+ * The output file descriptor is closed on success, except when writing to a
+ * user-specified file descriptor (WIMLIB_WRITE_FLAG_FILE_DESCRIPTOR set).
+ */
+static int
+finish_write(WIMStruct *wim, int image, int write_flags,
+ struct list_head *blob_table_list)
+{
+ int write_resource_flags;
+ off_t old_blob_table_end = 0;
+ struct integrity_table *old_integrity_table = NULL;
+ off_t new_blob_table_end;
+ u64 xml_totalbytes;
+ int ret;
+
+ write_resource_flags = write_flags_to_resource_flags(write_flags);
+
+ /* In the WIM header, there is room for the resource entry for a
+ * metadata resource labeled as the "boot metadata". This entry should
+ * be zeroed out if there is no bootable image (boot_idx 0). Otherwise,
+ * it should be a copy of the resource entry for the image that is
+ * marked as bootable. */
+ if (wim->out_hdr.boot_idx == 0) {
+ zero_reshdr(&wim->out_hdr.boot_metadata_reshdr);
+ } else {
+ copy_reshdr(&wim->out_hdr.boot_metadata_reshdr,
+ &wim->image_metadata[
+ wim->out_hdr.boot_idx - 1]->metadata_blob->out_reshdr);
+ }
+
+ /* If appending to a WIM file containing an integrity table, we'd like
+ * to re-use the information in the old integrity table instead of
+ * recalculating it. But we might overwrite the old integrity table
+ * when we expand the XML data. Read it into memory just in case. */
+ if ((write_flags & (WIMLIB_WRITE_FLAG_APPEND |
+ WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)) ==
+ (WIMLIB_WRITE_FLAG_APPEND |
+ WIMLIB_WRITE_FLAG_CHECK_INTEGRITY)
+ && wim_has_integrity_table(wim))
+ {
+ old_blob_table_end = wim->hdr.blob_table_reshdr.offset_in_wim +
+ wim->hdr.blob_table_reshdr.size_in_wim;
+ (void)read_integrity_table(wim,
+ old_blob_table_end - WIM_HEADER_DISK_SIZE,
+ &old_integrity_table);
+ /* If we couldn't read the old integrity table, we can still
+ * re-calculate the full integrity table ourselves. Hence the
+ * ignoring of the return value. */
+ }
+
+ /* Write blob table if needed. */
+ if (!(write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS)) {
+ ret = write_blob_table(wim, image, write_flags,
+ blob_table_list);
+ if (ret) {
+ free_integrity_table(old_integrity_table);
+ return ret;
+ }
+ }
+
+ /* Write XML data. */
+ xml_totalbytes = wim->out_fd.offset;
+ if (write_flags & WIMLIB_WRITE_FLAG_USE_EXISTING_TOTALBYTES)
+ xml_totalbytes = WIM_TOTALBYTES_USE_EXISTING;
+ ret = write_wim_xml_data(wim, image, xml_totalbytes,
+ &wim->out_hdr.xml_data_reshdr,
+ write_resource_flags);
+ if (ret) {
+ free_integrity_table(old_integrity_table);
+ return ret;
+ }
+
+ /* Write integrity table if needed. */
+ if ((write_flags & WIMLIB_WRITE_FLAG_CHECK_INTEGRITY) &&
+ wim->out_hdr.blob_table_reshdr.offset_in_wim != 0)
+ {
+ if (write_flags & WIMLIB_WRITE_FLAG_NO_NEW_BLOBS) {
+ /* The XML data we wrote may have overwritten part of
+ * the old integrity table, so while calculating the new
+ * integrity table we should temporarily update the WIM
+ * header to remove the integrity table reference. */
+ struct wim_header checkpoint_hdr;
+ memcpy(&checkpoint_hdr, &wim->out_hdr, sizeof(struct wim_header));
+ zero_reshdr(&checkpoint_hdr.integrity_table_reshdr);
+ checkpoint_hdr.flags |= WIM_HDR_FLAG_WRITE_IN_PROGRESS;
+ ret = write_wim_header(&checkpoint_hdr, &wim->out_fd, 0);
+ if (ret) {
+ free_integrity_table(old_integrity_table);
+ return ret;
+ }
+ }
+
+ new_blob_table_end = wim->out_hdr.blob_table_reshdr.offset_in_wim +
+ wim->out_hdr.blob_table_reshdr.size_in_wim;
+
+ ret = write_integrity_table(wim,
+ new_blob_table_end,
+ old_blob_table_end,
+ old_integrity_table);
+ free_integrity_table(old_integrity_table);
+ if (ret)
+ return ret;
+ } else {
+ /* No integrity table. */
+ zero_reshdr(&wim->out_hdr.integrity_table_reshdr);
+ }
+
+ /* Now that all information in the WIM header has been determined, the
+ * preliminary header written earlier can be overwritten, the header of
+ * the existing WIM file can be overwritten, or the final header can be
+ * written to the end of the pipable WIM. */
+ wim->out_hdr.flags &= ~WIM_HDR_FLAG_WRITE_IN_PROGRESS;
+ if (write_flags & WIMLIB_WRITE_FLAG_PIPABLE)
+ ret = write_wim_header(&wim->out_hdr, &wim->out_fd, wim->out_fd.offset);
+ else
+ ret = write_wim_header(&wim->out_hdr, &wim->out_fd, 0);
+ if (ret)
+ return ret;
+
+ if (unlikely(write_flags & WIMLIB_WRITE_FLAG_UNSAFE_COMPACT)) {
+ /* Truncate any data the compaction freed up. */
+ if (ftruncate(wim->out_fd.fd, wim->out_fd.offset)) {
+ ERROR_WITH_ERRNO("Failed to truncate the output WIM file");
+ return WIMLIB_ERR_WRITE;
+ }
+ }
+
+ /* Possibly sync file data to disk before closing. On POSIX systems, it
+ * is necessary to do this before using rename() to overwrite an
+ * existing file with a new file. Otherwise, data loss would occur if
+ * the system is abruptly terminated when the metadata for the rename
+ * operation has been written to disk, but the new file data has not.
+ */
+ if (write_flags & WIMLIB_WRITE_FLAG_FSYNC) {
+ if (fsync(wim->out_fd.fd)) {
+ ERROR_WITH_ERRNO("Error syncing data to WIM file");
+ return WIMLIB_ERR_WRITE;
+ }
+ }
+
+ if (close_wim_writable(wim, write_flags)) {
+ ERROR_WITH_ERRNO("Failed to close the output WIM file");
+ return WIMLIB_ERR_WRITE;
+ }
+
+ return 0;
+}
+
+#if defined(HAVE_SYS_FILE_H) && defined(HAVE_FLOCK)
+
+/* Set advisory lock on WIM file (if not already done so) */
+int
+lock_wim_for_append(WIMStruct *wim)
+{
+ if (wim->locked_for_append)
+ return 0;
+ if (!flock(wim->in_fd.fd, LOCK_EX | LOCK_NB)) {
+ wim->locked_for_append = 1;
+ return 0;
+ }
+ if (errno != EWOULDBLOCK)
+ return 0;
+ return WIMLIB_ERR_ALREADY_LOCKED;
+}
+
+/* Remove advisory lock on WIM file (if present) */
+void
+unlock_wim_for_append(WIMStruct *wim)
+{
+ if (wim->locked_for_append) {
+ flock(wim->in_fd.fd, LOCK_UN);
+ wim->locked_for_append = 0;
+ }
+}
+#endif
+
+/*
+ * write_pipable_wim():
+ *
+ * Perform the intermediate stages of creating a "pipable" WIM (i.e. a WIM
+ * capable of being applied from a pipe).
+ *
+ * Pipable WIMs are a wimlib-specific modification of the WIM format such that
+ * images can be applied from them sequentially when the file data is sent over
+ * a pipe. In addition, a pipable WIM can be written sequentially to a pipe.
+ * The modifications made to the WIM format for pipable WIMs are:
+ *
+ * - Magic characters in header are "WLPWM\0\0\0" (wimlib pipable WIM) instead
+ * of "MSWIM\0\0\0". This lets wimlib know that the WIM is pipable and also
+ * stops other software from trying to read the file as a normal WIM.
+ *
+ * - The header at the beginning of the file does not contain all the normal
+ * information; in particular it will have all 0's for the blob table and XML
+ * data resource entries. This is because this information cannot be
+ * determined until the blob table and XML data have been written.
+ * Consequently, wimlib will write the full header at the very end of the
+ * file. The header at the end, however, is only used when reading the WIM
+ * from a seekable file (not a pipe).
+ *
+ * - An extra copy of the XML data is placed directly after the header. This
+ * allows image names and sizes to be determined at an appropriate time when
+ * reading the WIM from a pipe. This copy of the XML data is ignored if the
+ * WIM is read from a seekable file (not a pipe).
+ *
+ * - Solid resources are not allowed. Each blob is always stored in its own
+ * resource.
+ *
+ * - The format of resources, or blobs, has been modified to allow them to be
+ * used before the "blob table" has been read. Each blob is prefixed with a
+ * `struct pwm_blob_hdr' that is basically an abbreviated form of `struct
+ * blob_descriptor_disk' that only contains the SHA-1 message digest,
+ * uncompressed blob size, and flags that indicate whether the blob is
+ * compressed. The data of uncompressed blobs then follows literally, while
+ * the data of compressed blobs follows in a modified format. Compressed
+ * blobs do not begin with a chunk table, since the chunk table cannot be
+ * written until all chunks have been compressed. Instead, each compressed
+ * chunk is prefixed by a `struct pwm_chunk_hdr' that gives its size.
+ * Furthermore, the chunk table is written at the end of the resource instead
+ * of the start. Note: chunk offsets are given in the chunk table as if the
+ * `struct pwm_chunk_hdr's were not present; also, the chunk table is only
+ * used if the WIM is being read from a seekable file (not a pipe).
+ *
+ * - Metadata blobs always come before non-metadata blobs. (This does not by
+ * itself constitute an incompatibility with normal WIMs, since this is valid
+ * in normal WIMs.)
+ *
+ * - At least up to the end of the blobs, all components must be packed as
+ * tightly as possible; there cannot be any "holes" in the WIM. (This does
+ * not by itself consititute an incompatibility with normal WIMs, since this
+ * is valid in normal WIMs.)
+ *
+ * Note: the blob table, XML data, and header at the end are not used when
+ * applying from a pipe. They exist to support functionality such as image
+ * application and export when the WIM is *not* read from a pipe.
+ *
+ * Layout of pipable WIM: