From 0ecf748e0db6bb4d9a02388b4ea925d8742848b1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 30 Dec 2013 16:58:32 -0600 Subject: [PATCH] Fix completed_streams of write streams progress and update docs --- include/wimlib.h | 87 +++++++++++++++++------- programs/imagex.c | 18 +++-- src/write.c | 165 +++++++++++++++++++++++++++++----------------- 3 files changed, 180 insertions(+), 90 deletions(-) diff --git a/include/wimlib.h b/include/wimlib.h index 119c5870..0fc5fb9f 100644 --- a/include/wimlib.h +++ b/include/wimlib.h @@ -491,9 +491,8 @@ enum wimlib_progress_msg { * ::wimlib_progress_info.scan. */ WIMLIB_PROGRESS_MSG_SCAN_END, - /** - * File resources are currently being written to the WIM. - * @p info will point to ::wimlib_progress_info.write_streams. */ + /** File resources ("streams") are currently being written to the WIM. + * @p info will point to ::wimlib_progress_info.write_streams. */ WIMLIB_PROGRESS_MSG_WRITE_STREAMS, /** @@ -561,43 +560,83 @@ union wimlib_progress_info { /* N.B. I wanted these to be anonymous structs, but Doxygen won't * document them if they aren't given a name... */ - /** Valid on messages ::WIMLIB_PROGRESS_MSG_WRITE_STREAMS. */ + /** Valid on the message ::WIMLIB_PROGRESS_MSG_WRITE_STREAMS. This is + * the primary message for tracking the progress of writing a WIM file. + */ struct wimlib_progress_info_write_streams { - /** Number of bytes that are going to be written for all the - * streams combined. This is the amount in uncompressed data. - * (The actual number of bytes will be less if the data is being - * written compressed.) */ + /** Total number of uncompressed bytes of stream data being + * written. This can be thought of as the total uncompressed + * size of the files being archived, with some caveats. WIM + * files use single-instance streams, so the size provided here + * only counts distinct streams, except for the following + * exception: the size provided here may include the sizes of + * all newly added (e.g. with wimlib_add_image() streams, + * pending automatic de-duplication during the write operation + * itself. When each such stream de-duplication occurs, this + * number will be decreased by the size of the duplicate stream + * that need not be written. + * + * In the case of a wimlib_overwrite() that the library opted to + * perform in-place, both @p total_streams and @p total_bytes + * will only count the streams actually being written and not + * pre-existing streams in the WIM file. */ uint64_t total_bytes; - /** Number of streams that are going to be written. */ + /** Total number of streams being written. This can be thought + * of as the total number of files being archived, with some + * caveats. In general, a single file or directory may contain + * multiple data streams, each of which will be represented + * separately in this number. Furthermore, WIM files use + * single-instance streams, so the stream count provided here + * only counts distinct streams, except for the following + * exception: the stream count provided here may include newly + * added (e.g. with wimlib_add_image() streams, pending + * automatic de-duplication during the write operation itself. + * When each such stream de-duplication occurs, this number will + * be decreased by 1 to account for the duplicate stream that + * need not be written. */ uint64_t total_streams; - /** Number of uncompressed bytes that have been written so far. - * Will be 0 initially, and equal to @p total_bytes at the end. - * */ + /** Number of uncompressed bytes of stream data that have been + * written so far. This number be 0 initially, and will be + * equal to @p total_bytes at the end of the write operation. + * Note that @p total_bytes (but not @p completed_bytes) may + * decrease throughout the write operation due to the discovery + * of stream duplications. */ uint64_t completed_bytes; - /** Number of streams that have been written. Will be 0 - * initially, and equal to @p total_streams at the end. */ + /** Number of streams that have been written so far. This + * number will be 0 initially, and will be equal to @p + * total_streams at the end of the write operation. Note that + * @p total_streams (but not @p completed_streams) may decrease + * throughout the write operation due to the discovery of stream + * duplications. + * + * For applications that wish to calculate a simple "percent + * complete" for the write operation, it will likely be more + * accurate to calculate the percentage from @p completed_bytes + * and @p total_bytes rather than @p completed_streams and + * @p total_streams because the time for the operation to + * complete is mainly determined by the number of bytes that + * need to be read, compressed, and written, not just the number + * of files being archived. */ uint64_t completed_streams; - /** Number of threads that are being used to compress resources - * (if applicable). */ - unsigned num_threads; + /** Number of threads that are being used to compress streams, + * or 1 if streams are being written uncompressed. */ + uint32_t num_threads; - /** The compression type being used to write the streams; either - * ::WIMLIB_COMPRESSION_TYPE_NONE, - * ::WIMLIB_COMPRESSION_TYPE_XPRESS, or - * ::WIMLIB_COMPRESSION_TYPE_LZX. */ - int compression_type; + /** The compression type being used to write the streams, as one + * of the ::wimlib_compression_type constants. */ + int32_t compression_type; /** Number of split WIM parts from which streams are being * written (may be 0 if irrelevant). */ - unsigned total_parts; + uint32_t total_parts; /** Number of split WIM parts from which streams have been * written (may be 0 if irrelevant). */ - unsigned completed_parts; + uint32_t completed_parts; } write_streams; /** Valid on messages ::WIMLIB_PROGRESS_MSG_SCAN_BEGIN, diff --git a/programs/imagex.c b/programs/imagex.c index 39ed2742..9569a255 100644 --- a/programs/imagex.c +++ b/programs/imagex.c @@ -1101,16 +1101,22 @@ imagex_progress_func(enum wimlib_progress_msg msg, return 0; switch (msg) { case WIMLIB_PROGRESS_MSG_WRITE_STREAMS: + { + static bool first = false; + if (!first) { + imagex_printf(T("Writing %"TS"-compressed data " + "using %u thread%"TS"\n"), + wimlib_get_compression_type_string( + info->write_streams.compression_type), + info->write_streams.num_threads, + (info->write_streams.num_threads == 1) ? T("") : T("s")); + first = true; + } + } unit_shift = get_unit(info->write_streams.total_bytes, &unit_name); percent_done = TO_PERCENT(info->write_streams.completed_bytes, info->write_streams.total_bytes); - if (info->write_streams.completed_streams == 0) { - imagex_printf(T("Writing %"TS"-compressed data using %u thread%"TS"\n"), - wimlib_get_compression_type_string(info->write_streams.compression_type), - info->write_streams.num_threads, - (info->write_streams.num_threads == 1) ? T("") : T("s")); - } if (info->write_streams.total_parts <= 1) { imagex_printf(T("\r%"PRIu64" %"TS" of %"PRIu64" %"TS" (uncompressed) " "written (%u%% done)"), diff --git a/src/write.c b/src/write.c index bcf282ba..7e597e18 100644 --- a/src/write.c +++ b/src/write.c @@ -282,23 +282,27 @@ struct write_streams_progress_data { static void do_write_streams_progress(struct write_streams_progress_data *progress_data, - u64 size, - bool discarded, - struct wim_lookup_table_entry *cur_stream) + struct wim_lookup_table_entry *cur_stream, + u64 complete_size, + u32 complete_count, + bool discarded) { union wimlib_progress_info *progress = &progress_data->progress; bool new_wim_part; if (discarded) { - progress->write_streams.total_bytes -= size; + progress->write_streams.total_bytes -= complete_size; + progress->write_streams.total_streams -= complete_count; if (progress_data->next_progress != ~(uint64_t)0 && progress_data->next_progress > progress->write_streams.total_bytes) { progress_data->next_progress = progress->write_streams.total_bytes; } } else { - progress->write_streams.completed_bytes += size; + progress->write_streams.completed_bytes += complete_size; + progress->write_streams.completed_streams += complete_count; } + new_wim_part = false; if (cur_stream->resource_location == RESOURCE_IN_WIM && cur_stream->rspec->wim != progress_data->prev_wim_part) @@ -309,7 +313,7 @@ do_write_streams_progress(struct write_streams_progress_data *progress_data, } progress_data->prev_wim_part = cur_stream->rspec->wim; } - progress->write_streams.completed_streams++; + if (progress_data->progress_func && (progress->write_streams.completed_bytes >= progress_data->next_progress || new_wim_part)) @@ -368,20 +372,24 @@ struct write_streams_ctx { /* List of streams that currently have chunks being compressed. */ struct list_head pending_streams; + /* List of streams in the resource pack. Streams are moved here after + * @pending_streams only when writing a packed resource. */ + struct list_head pack_streams; + /* Set to true if the stream currently being read was a duplicate, and * therefore the corresponding stream entry needs to be freed once the * read finishes. (In this case we add the duplicate entry to * pending_streams rather than the entry being read.) */ bool stream_was_duplicate; - /* Current uncompressed offset in the resource being read. */ - u64 cur_read_res_offset; + /* Current uncompressed offset in the stream being read. */ + u64 cur_read_stream_offset; - /* Uncompressed size of the resource currently being read. */ - u64 cur_read_res_size; + /* Uncompressed size of the stream currently being read. */ + u64 cur_read_stream_size; - /* Current uncompressed offset in the resource being written. */ - u64 cur_write_res_offset; + /* Current uncompressed offset in the stream being written. */ + u64 cur_write_stream_offset; /* Uncompressed size of resource currently being written. */ u64 cur_write_res_size; @@ -489,7 +497,7 @@ begin_write_resource(struct write_streams_ctx *ctx, u64 res_expected_size) /* Output file descriptor is now positioned at the offset at which to * write the first chunk of the resource. */ ctx->chunks_start_offset = ctx->out_fd->offset; - ctx->cur_write_res_offset = 0; + ctx->cur_write_stream_offset = 0; ctx->cur_write_res_size = res_expected_size; return 0; } @@ -607,7 +615,8 @@ end_write_resource(struct write_streams_ctx *ctx, struct wim_reshdr *out_reshdr) u64 res_uncompressed_size; u64 res_offset_in_wim; - wimlib_assert(ctx->cur_write_res_size == ctx->cur_write_res_offset); + wimlib_assert(ctx->cur_write_stream_offset == ctx->cur_write_res_size || + (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)); res_uncompressed_size = ctx->cur_write_res_size; if (ctx->compressor) { @@ -637,8 +646,8 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, wimlib_assert(lte->size > 0); - ctx->cur_read_res_offset = 0; - ctx->cur_read_res_size = lte->size; + ctx->cur_read_stream_offset = 0; + ctx->cur_read_stream_size = lte->size; /* As an optimization, we allow some streams to be "unhashed", meaning * their SHA1 message digests are unknown. This is the case with @@ -676,7 +685,8 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, DEBUG("Discarding duplicate stream of " "length %"PRIu64, lte->size); do_write_streams_progress(&ctx->progress_data, - lte->size, true, lte); + lte, lte->size, + 1, true); list_del(<e->write_streams_list); list_del(<e->lookup_table_list); if (lte_new->will_be_in_output_wim) @@ -766,11 +776,13 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk, int ret; struct wim_lookup_table_entry *lte; + u32 completed_stream_count; + u32 completed_size; lte = list_entry(ctx->pending_streams.next, struct wim_lookup_table_entry, write_streams_list); - if (ctx->cur_write_res_offset == 0 && + if (ctx->cur_write_stream_offset == 0 && !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)) { /* Starting to write a new stream in non-packed mode. */ @@ -817,52 +829,83 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk, if (ret) goto error; - ctx->cur_write_res_offset += usize; + ctx->cur_write_stream_offset += usize; - do_write_streams_progress(&ctx->progress_data, - usize, false, lte); + completed_size = usize; + completed_stream_count = 0; + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { + /* Wrote chunk in packed mode. It may have finished multiple + * streams. */ + while (ctx->cur_write_stream_offset > lte->size) { + struct wim_lookup_table_entry *next; - if (ctx->cur_write_res_offset == ctx->cur_write_res_size && - !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)) - { - wimlib_assert(ctx->cur_write_res_offset == lte->size); + ctx->cur_write_stream_offset -= lte->size; - /* Finished writing a stream in non-packed mode. */ + wimlib_assert(!list_is_singular(&ctx->pending_streams) && + !list_empty(&ctx->pending_streams)); + next = list_entry(lte->write_streams_list.next, + struct wim_lookup_table_entry, + write_streams_list); + list_move_tail(<e->write_streams_list, + &ctx->pack_streams); + lte = next; + completed_stream_count++; + } + if (ctx->cur_write_stream_offset == lte->size) { + ctx->cur_write_stream_offset = 0; + list_move_tail(<e->write_streams_list, + &ctx->pack_streams); + completed_stream_count++; + } + } else { + /* Wrote chunk in non-packed mode. It may have finished a + * stream. */ + if (ctx->cur_write_stream_offset == lte->size) { - ret = end_write_resource(ctx, <e->out_reshdr); - if (ret) - return ret; + completed_stream_count++; - lte->out_reshdr.flags = filter_resource_flags(lte->flags); - if (ctx->compressor != NULL) - lte->out_reshdr.flags |= WIM_RESHDR_FLAG_COMPRESSED; + list_del(<e->write_streams_list); - if (ctx->compressor != NULL && - lte->out_reshdr.size_in_wim >= lte->out_reshdr.uncompressed_size && - !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) && - !(lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS)) - { - /* Stream did not compress to less than its original - * size. If we're not writing a pipable WIM (which - * could mean the output file descriptor is - * non-seekable), and the stream isn't located in a - * resource pack (which would make reading it again - * costly), truncate the file to the start of the stream - * and write it uncompressed instead. */ - DEBUG("Stream of size %"PRIu64" did not compress to " - "less than original size; writing uncompressed.", - lte->size); - ret = write_stream_uncompressed(lte, ctx->out_fd); + wimlib_assert(ctx->cur_write_stream_offset == + ctx->cur_write_res_size); + + ret = end_write_resource(ctx, <e->out_reshdr); if (ret) return ret; - } - wimlib_assert(lte->out_reshdr.uncompressed_size == lte->size); + lte->out_reshdr.flags = filter_resource_flags(lte->flags); + if (ctx->compressor != NULL) + lte->out_reshdr.flags |= WIM_RESHDR_FLAG_COMPRESSED; - list_del(<e->write_streams_list); - ctx->cur_write_res_offset = 0; + if (ctx->compressor != NULL && + lte->out_reshdr.size_in_wim >= lte->out_reshdr.uncompressed_size && + !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) && + !(lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS)) + { + /* Stream did not compress to less than its original + * size. If we're not writing a pipable WIM (which + * could mean the output file descriptor is + * non-seekable), and the stream isn't located in a + * resource pack (which would make reading it again + * costly), truncate the file to the start of the stream + * and write it uncompressed instead. */ + DEBUG("Stream of size %"PRIu64" did not compress to " + "less than original size; writing uncompressed.", + lte->size); + ret = write_stream_uncompressed(lte, ctx->out_fd); + if (ret) + return ret; + } + wimlib_assert(lte->out_reshdr.uncompressed_size == lte->size); + + ctx->cur_write_stream_offset = 0; + } } + do_write_streams_progress(&ctx->progress_data, lte, + completed_size, completed_stream_count, + false); + return 0; error: @@ -911,7 +954,7 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) ret = write_chunk(ctx, chunk, size, size); if (ret) return ret; - ctx->cur_read_res_offset += size; + ctx->cur_read_stream_offset += size; return 0; } @@ -929,8 +972,8 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) } else { u64 res_bytes_remaining; - res_bytes_remaining = ctx->cur_read_res_size - - ctx->cur_read_res_offset; + res_bytes_remaining = ctx->cur_read_stream_size - + ctx->cur_read_stream_offset; needed_chunk_size = min(ctx->out_chunk_size, ctx->chunk_buf_filled + res_bytes_remaining); @@ -942,7 +985,7 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) /* No intermediate buffering needed. */ resized_chunk = chunkptr; chunkptr += needed_chunk_size; - ctx->cur_read_res_offset += needed_chunk_size; + ctx->cur_read_stream_offset += needed_chunk_size; } else { /* Intermediate buffering needed. */ size_t bytes_consumed; @@ -954,7 +997,7 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx) chunkptr, bytes_consumed); chunkptr += bytes_consumed; - ctx->cur_read_res_offset += bytes_consumed; + ctx->cur_read_stream_offset += bytes_consumed; ctx->chunk_buf_filled += bytes_consumed; if (ctx->chunk_buf_filled == needed_chunk_size) { resized_chunk = ctx->chunk_buf; @@ -982,7 +1025,7 @@ write_stream_end_read(struct wim_lookup_table_entry *lte, int status, void *_ctx { struct write_streams_ctx *ctx = _ctx; if (status == 0) - wimlib_assert(ctx->cur_read_res_offset == ctx->cur_read_res_size); + wimlib_assert(ctx->cur_read_stream_offset == ctx->cur_read_stream_size); if (ctx->stream_was_duplicate) { free_lookup_table_entry(lte); } else if (lte->unhashed && ctx->lookup_table != NULL) { @@ -1144,7 +1187,8 @@ write_raw_copy_resources(struct list_head *raw_copy_resources, ret = write_raw_copy_resource(lte->rspec, out_fd); if (ret) return ret; - do_write_streams_progress(progress_data, lte->size, false, lte); + do_write_streams_progress(progress_data, lte, lte->size, + 1, false); } return 0; } @@ -1427,6 +1471,7 @@ write_stream_list(struct list_head *stream_list, ctx.progress_data.progress.write_streams.num_threads); INIT_LIST_HEAD(&ctx.pending_streams); + INIT_LIST_HEAD(&ctx.pack_streams); if (ctx.progress_data.progress_func) { (*ctx.progress_data.progress_func)(WIMLIB_PROGRESS_MSG_WRITE_STREAMS, @@ -1480,7 +1525,7 @@ write_stream_list(struct list_head *stream_list, reshdr.uncompressed_size); offset_in_res = 0; - list_for_each_entry(lte, &ctx.pending_streams, write_streams_list) { + list_for_each_entry(lte, &ctx.pack_streams, write_streams_list) { lte->out_reshdr.size_in_wim = lte->size; lte->out_reshdr.flags = filter_resource_flags(lte->flags); lte->out_reshdr.flags |= WIM_RESHDR_FLAG_PACKED_STREAMS; -- 2.43.0