Fix completed_streams of write streams progress and update docs
authorEric Biggers <ebiggers3@gmail.com>
Mon, 30 Dec 2013 22:58:32 +0000 (16:58 -0600)
committerEric Biggers <ebiggers3@gmail.com>
Mon, 30 Dec 2013 23:56:03 +0000 (17:56 -0600)
include/wimlib.h
programs/imagex.c
src/write.c

index 119c5870ca8fbf72ff69e110abf9b98ac9493001..0fc5fb9fd4e77ce5e6d7443038dc59b40b7f9e7b 100644 (file)
@@ -491,9 +491,8 @@ enum wimlib_progress_msg {
         * ::wimlib_progress_info.scan. */
        WIMLIB_PROGRESS_MSG_SCAN_END,
 
-       /**
-        * File resources are currently being written to the WIM.
-        * @p info will point to ::wimlib_progress_info.write_streams. */
+       /** File resources ("streams") are currently being written to the WIM.
+        * @p info will point to ::wimlib_progress_info.write_streams.  */
        WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
 
        /**
@@ -561,43 +560,83 @@ union wimlib_progress_info {
        /* N.B. I wanted these to be anonymous structs, but Doxygen won't
         * document them if they aren't given a name... */
 
-       /** Valid on messages ::WIMLIB_PROGRESS_MSG_WRITE_STREAMS. */
+       /** Valid on the message ::WIMLIB_PROGRESS_MSG_WRITE_STREAMS.  This is
+        * the primary message for tracking the progress of writing a WIM file.
+        */
        struct wimlib_progress_info_write_streams {
-               /** Number of bytes that are going to be written for all the
-                * streams combined.  This is the amount in uncompressed data.
-                * (The actual number of bytes will be less if the data is being
-                * written compressed.) */
+               /** Total number of uncompressed bytes of stream data being
+                * written.  This can be thought of as the total uncompressed
+                * size of the files being archived, with some caveats.  WIM
+                * files use single-instance streams, so the size provided here
+                * only counts distinct streams, except for the following
+                * exception: the size provided here may include the sizes of
+                * all newly added (e.g. with wimlib_add_image() streams,
+                * pending automatic de-duplication during the write operation
+                * itself.  When each such stream de-duplication occurs, this
+                * number will be decreased by the size of the duplicate stream
+                * that need not be written.
+                *
+                * In the case of a wimlib_overwrite() that the library opted to
+                * perform in-place, both @p total_streams and @p total_bytes
+                * will only count the streams actually being written and not
+                * pre-existing streams in the WIM file.  */
                uint64_t total_bytes;
 
-               /** Number of streams that are going to be written. */
+               /** Total number of streams being written.  This can be thought
+                * of as the total number of files being archived, with some
+                * caveats.  In general, a single file or directory may contain
+                * multiple data streams, each of which will be represented
+                * separately in this number.  Furthermore, WIM files use
+                * single-instance streams, so the stream count provided here
+                * only counts distinct streams, except for the following
+                * exception: the stream count provided here may include newly
+                * added (e.g. with wimlib_add_image() streams, pending
+                * automatic de-duplication during the write operation itself.
+                * When each such stream de-duplication occurs, this number will
+                * be decreased by 1 to account for the duplicate stream that
+                * need not be written.  */
                uint64_t total_streams;
 
-               /** Number of uncompressed bytes that have been written so far.
-                * Will be 0 initially, and equal to @p total_bytes at the end.
-                * */
+               /** Number of uncompressed bytes of stream data that have been
+                * written so far.  This number be 0 initially, and will be
+                * equal to @p total_bytes at the end of the write operation.
+                * Note that @p total_bytes (but not @p completed_bytes) may
+                * decrease throughout the write operation due to the discovery
+                * of stream duplications.  */
                uint64_t completed_bytes;
 
-               /** Number of streams that have been written.  Will be 0
-                * initially, and equal to @p total_streams at the end. */
+               /** Number of streams that have been written so far.  This
+                * number will be 0 initially, and will be equal to @p
+                * total_streams at the end of the write operation.  Note that
+                * @p total_streams (but not @p completed_streams) may decrease
+                * throughout the write operation due to the discovery of stream
+                * duplications.
+                *
+                * For applications that wish to calculate a simple "percent
+                * complete" for the write operation, it will likely be more
+                * accurate to calculate the percentage from @p completed_bytes
+                * and @p total_bytes rather than @p completed_streams and
+                * @p total_streams because the time for the operation to
+                * complete is mainly determined by the number of bytes that
+                * need to be read, compressed, and written, not just the number
+                * of files being archived.  */
                uint64_t completed_streams;
 
-               /** Number of threads that are being used to compress resources
-                * (if applicable).  */
-               unsigned num_threads;
+               /** Number of threads that are being used to compress streams,
+                * or 1 if streams are being written uncompressed.  */
+               uint32_t num_threads;
 
-               /** The compression type being used to write the streams; either
-                * ::WIMLIB_COMPRESSION_TYPE_NONE,
-                * ::WIMLIB_COMPRESSION_TYPE_XPRESS, or
-                * ::WIMLIB_COMPRESSION_TYPE_LZX. */
-               int      compression_type;
+               /** The compression type being used to write the streams, as one
+                * of the ::wimlib_compression_type constants.  */
+               int32_t  compression_type;
 
                /** Number of split WIM parts from which streams are being
                 * written (may be 0 if irrelevant).  */
-               unsigned total_parts;
+               uint32_t total_parts;
 
                /** Number of split WIM parts from which streams have been
                 * written (may be 0 if irrelevant).  */
-               unsigned completed_parts;
+               uint32_t completed_parts;
        } write_streams;
 
        /** Valid on messages ::WIMLIB_PROGRESS_MSG_SCAN_BEGIN,
index 39ed2742fcaca2fd40e9af9b3deecde241bad041..9569a255cd2d1f90cfcab3dab469702f79488441 100644 (file)
@@ -1101,16 +1101,22 @@ imagex_progress_func(enum wimlib_progress_msg msg,
                return 0;
        switch (msg) {
        case WIMLIB_PROGRESS_MSG_WRITE_STREAMS:
+               {
+                       static bool first = false;
+                       if (!first) {
+                               imagex_printf(T("Writing %"TS"-compressed data "
+                                               "using %u thread%"TS"\n"),
+                                             wimlib_get_compression_type_string(
+                                                       info->write_streams.compression_type),
+                                       info->write_streams.num_threads,
+                                       (info->write_streams.num_threads == 1) ? T("") : T("s"));
+                               first = true;
+                       }
+               }
                unit_shift = get_unit(info->write_streams.total_bytes, &unit_name);
                percent_done = TO_PERCENT(info->write_streams.completed_bytes,
                                          info->write_streams.total_bytes);
 
-               if (info->write_streams.completed_streams == 0) {
-                       imagex_printf(T("Writing %"TS"-compressed data using %u thread%"TS"\n"),
-                               wimlib_get_compression_type_string(info->write_streams.compression_type),
-                               info->write_streams.num_threads,
-                               (info->write_streams.num_threads == 1) ? T("") : T("s"));
-               }
                if (info->write_streams.total_parts <= 1) {
                        imagex_printf(T("\r%"PRIu64" %"TS" of %"PRIu64" %"TS" (uncompressed) "
                                "written (%u%% done)"),
index bcf282ba3fe882126e929b208baa593c426d402a..7e597e18bf2410526a3225bb47946834fecd1f35 100644 (file)
@@ -282,23 +282,27 @@ struct write_streams_progress_data {
 
 static void
 do_write_streams_progress(struct write_streams_progress_data *progress_data,
-                         u64 size,
-                         bool discarded,
-                         struct wim_lookup_table_entry *cur_stream)
+                         struct wim_lookup_table_entry *cur_stream,
+                         u64 complete_size,
+                         u32 complete_count,
+                         bool discarded)
 {
        union wimlib_progress_info *progress = &progress_data->progress;
        bool new_wim_part;
 
        if (discarded) {
-               progress->write_streams.total_bytes -= size;
+               progress->write_streams.total_bytes -= complete_size;
+               progress->write_streams.total_streams -= complete_count;
                if (progress_data->next_progress != ~(uint64_t)0 &&
                    progress_data->next_progress > progress->write_streams.total_bytes)
                {
                        progress_data->next_progress = progress->write_streams.total_bytes;
                }
        } else {
-               progress->write_streams.completed_bytes += size;
+               progress->write_streams.completed_bytes += complete_size;
+               progress->write_streams.completed_streams += complete_count;
        }
+
        new_wim_part = false;
        if (cur_stream->resource_location == RESOURCE_IN_WIM &&
            cur_stream->rspec->wim != progress_data->prev_wim_part)
@@ -309,7 +313,7 @@ do_write_streams_progress(struct write_streams_progress_data *progress_data,
                }
                progress_data->prev_wim_part = cur_stream->rspec->wim;
        }
-       progress->write_streams.completed_streams++;
+
        if (progress_data->progress_func
            && (progress->write_streams.completed_bytes >= progress_data->next_progress
                || new_wim_part))
@@ -368,20 +372,24 @@ struct write_streams_ctx {
        /* List of streams that currently have chunks being compressed.  */
        struct list_head pending_streams;
 
+       /* List of streams in the resource pack.  Streams are moved here after
+        * @pending_streams only when writing a packed resource.  */
+       struct list_head pack_streams;
+
        /* Set to true if the stream currently being read was a duplicate, and
         * therefore the corresponding stream entry needs to be freed once the
         * read finishes.  (In this case we add the duplicate entry to
         * pending_streams rather than the entry being read.)  */
        bool stream_was_duplicate;
 
-       /* Current uncompressed offset in the resource being read.  */
-       u64 cur_read_res_offset;
+       /* Current uncompressed offset in the stream being read.  */
+       u64 cur_read_stream_offset;
 
-       /* Uncompressed size of the resource currently being read.  */
-       u64 cur_read_res_size;
+       /* Uncompressed size of the stream currently being read.  */
+       u64 cur_read_stream_size;
 
-       /* Current uncompressed offset in the resource being written.  */
-       u64 cur_write_res_offset;
+       /* Current uncompressed offset in the stream being written.  */
+       u64 cur_write_stream_offset;
 
        /* Uncompressed size of resource currently being written.  */
        u64 cur_write_res_size;
@@ -489,7 +497,7 @@ begin_write_resource(struct write_streams_ctx *ctx, u64 res_expected_size)
        /* Output file descriptor is now positioned at the offset at which to
         * write the first chunk of the resource.  */
        ctx->chunks_start_offset = ctx->out_fd->offset;
-       ctx->cur_write_res_offset = 0;
+       ctx->cur_write_stream_offset = 0;
        ctx->cur_write_res_size = res_expected_size;
        return 0;
 }
@@ -607,7 +615,8 @@ end_write_resource(struct write_streams_ctx *ctx, struct wim_reshdr *out_reshdr)
        u64 res_uncompressed_size;
        u64 res_offset_in_wim;
 
-       wimlib_assert(ctx->cur_write_res_size == ctx->cur_write_res_offset);
+       wimlib_assert(ctx->cur_write_stream_offset == ctx->cur_write_res_size ||
+                     (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS));
        res_uncompressed_size = ctx->cur_write_res_size;
 
        if (ctx->compressor) {
@@ -637,8 +646,8 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte,
 
        wimlib_assert(lte->size > 0);
 
-       ctx->cur_read_res_offset = 0;
-       ctx->cur_read_res_size = lte->size;
+       ctx->cur_read_stream_offset = 0;
+       ctx->cur_read_stream_size = lte->size;
 
        /* As an optimization, we allow some streams to be "unhashed", meaning
         * their SHA1 message digests are unknown.  This is the case with
@@ -676,7 +685,8 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte,
                                DEBUG("Discarding duplicate stream of "
                                      "length %"PRIu64, lte->size);
                                do_write_streams_progress(&ctx->progress_data,
-                                                         lte->size, true, lte);
+                                                         lte, lte->size,
+                                                         1, true);
                                list_del(&lte->write_streams_list);
                                list_del(&lte->lookup_table_list);
                                if (lte_new->will_be_in_output_wim)
@@ -766,11 +776,13 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk,
        int ret;
 
        struct wim_lookup_table_entry *lte;
+       u32 completed_stream_count;
+       u32 completed_size;
 
        lte = list_entry(ctx->pending_streams.next,
                         struct wim_lookup_table_entry, write_streams_list);
 
-       if (ctx->cur_write_res_offset == 0 &&
+       if (ctx->cur_write_stream_offset == 0 &&
            !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS))
        {
                /* Starting to write a new stream in non-packed mode.  */
@@ -817,52 +829,83 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk,
        if (ret)
                goto error;
 
-       ctx->cur_write_res_offset += usize;
+       ctx->cur_write_stream_offset += usize;
 
-       do_write_streams_progress(&ctx->progress_data,
-                                 usize, false, lte);
+       completed_size = usize;
+       completed_stream_count = 0;
+       if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) {
+               /* Wrote chunk in packed mode.  It may have finished multiple
+                * streams.  */
+               while (ctx->cur_write_stream_offset > lte->size) {
+                       struct wim_lookup_table_entry *next;
 
-       if (ctx->cur_write_res_offset == ctx->cur_write_res_size &&
-           !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS))
-       {
-               wimlib_assert(ctx->cur_write_res_offset == lte->size);
+                       ctx->cur_write_stream_offset -= lte->size;
 
-               /* Finished writing a stream in non-packed mode.  */
+                       wimlib_assert(!list_is_singular(&ctx->pending_streams) &&
+                                     !list_empty(&ctx->pending_streams));
+                       next = list_entry(lte->write_streams_list.next,
+                                         struct wim_lookup_table_entry,
+                                         write_streams_list);
+                       list_move_tail(&lte->write_streams_list,
+                                      &ctx->pack_streams);
+                       lte = next;
+                       completed_stream_count++;
+               }
+               if (ctx->cur_write_stream_offset == lte->size) {
+                       ctx->cur_write_stream_offset = 0;
+                       list_move_tail(&lte->write_streams_list,
+                                      &ctx->pack_streams);
+                       completed_stream_count++;
+               }
+       } else {
+               /* Wrote chunk in non-packed mode.  It may have finished a
+                * stream.  */
+               if (ctx->cur_write_stream_offset == lte->size) {
 
-               ret = end_write_resource(ctx, &lte->out_reshdr);
-               if (ret)
-                       return ret;
+                       completed_stream_count++;
 
-               lte->out_reshdr.flags = filter_resource_flags(lte->flags);
-               if (ctx->compressor != NULL)
-                       lte->out_reshdr.flags |= WIM_RESHDR_FLAG_COMPRESSED;
+                       list_del(&lte->write_streams_list);
 
-               if (ctx->compressor != NULL &&
-                   lte->out_reshdr.size_in_wim >= lte->out_reshdr.uncompressed_size &&
-                   !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) &&
-                   !(lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS))
-               {
-                       /* Stream did not compress to less than its original
-                        * size.  If we're not writing a pipable WIM (which
-                        * could mean the output file descriptor is
-                        * non-seekable), and the stream isn't located in a
-                        * resource pack (which would make reading it again
-                        * costly), truncate the file to the start of the stream
-                        * and write it uncompressed instead.  */
-                       DEBUG("Stream of size %"PRIu64" did not compress to "
-                             "less than original size; writing uncompressed.",
-                             lte->size);
-                       ret = write_stream_uncompressed(lte, ctx->out_fd);
+                       wimlib_assert(ctx->cur_write_stream_offset ==
+                                     ctx->cur_write_res_size);
+
+                       ret = end_write_resource(ctx, &lte->out_reshdr);
                        if (ret)
                                return ret;
-               }
 
-               wimlib_assert(lte->out_reshdr.uncompressed_size == lte->size);
+                       lte->out_reshdr.flags = filter_resource_flags(lte->flags);
+                       if (ctx->compressor != NULL)
+                               lte->out_reshdr.flags |= WIM_RESHDR_FLAG_COMPRESSED;
 
-               list_del(&lte->write_streams_list);
-               ctx->cur_write_res_offset = 0;
+                       if (ctx->compressor != NULL &&
+                           lte->out_reshdr.size_in_wim >= lte->out_reshdr.uncompressed_size &&
+                           !(ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) &&
+                           !(lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS))
+                       {
+                               /* Stream did not compress to less than its original
+                                * size.  If we're not writing a pipable WIM (which
+                                * could mean the output file descriptor is
+                                * non-seekable), and the stream isn't located in a
+                                * resource pack (which would make reading it again
+                                * costly), truncate the file to the start of the stream
+                                * and write it uncompressed instead.  */
+                               DEBUG("Stream of size %"PRIu64" did not compress to "
+                                     "less than original size; writing uncompressed.",
+                                     lte->size);
+                               ret = write_stream_uncompressed(lte, ctx->out_fd);
+                               if (ret)
+                                       return ret;
+                       }
+                       wimlib_assert(lte->out_reshdr.uncompressed_size == lte->size);
+
+                       ctx->cur_write_stream_offset = 0;
+               }
        }
 
+       do_write_streams_progress(&ctx->progress_data, lte,
+                                 completed_size, completed_stream_count,
+                                 false);
+
        return 0;
 
 error:
@@ -911,7 +954,7 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx)
                 ret = write_chunk(ctx, chunk, size, size);
                 if (ret)
                         return ret;
-                ctx->cur_read_res_offset += size;
+                ctx->cur_read_stream_offset += size;
                 return 0;
        }
 
@@ -929,8 +972,8 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx)
                } else {
                        u64 res_bytes_remaining;
 
-                       res_bytes_remaining = ctx->cur_read_res_size -
-                                             ctx->cur_read_res_offset;
+                       res_bytes_remaining = ctx->cur_read_stream_size -
+                                             ctx->cur_read_stream_offset;
                        needed_chunk_size = min(ctx->out_chunk_size,
                                                ctx->chunk_buf_filled +
                                                        res_bytes_remaining);
@@ -942,7 +985,7 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx)
                        /* No intermediate buffering needed.  */
                        resized_chunk = chunkptr;
                        chunkptr += needed_chunk_size;
-                       ctx->cur_read_res_offset += needed_chunk_size;
+                       ctx->cur_read_stream_offset += needed_chunk_size;
                } else {
                        /* Intermediate buffering needed.  */
                        size_t bytes_consumed;
@@ -954,7 +997,7 @@ write_stream_process_chunk(const void *chunk, size_t size, void *_ctx)
                               chunkptr, bytes_consumed);
 
                        chunkptr += bytes_consumed;
-                       ctx->cur_read_res_offset += bytes_consumed;
+                       ctx->cur_read_stream_offset += bytes_consumed;
                        ctx->chunk_buf_filled += bytes_consumed;
                        if (ctx->chunk_buf_filled == needed_chunk_size) {
                                resized_chunk = ctx->chunk_buf;
@@ -982,7 +1025,7 @@ write_stream_end_read(struct wim_lookup_table_entry *lte, int status, void *_ctx
 {
        struct write_streams_ctx *ctx = _ctx;
        if (status == 0)
-               wimlib_assert(ctx->cur_read_res_offset == ctx->cur_read_res_size);
+               wimlib_assert(ctx->cur_read_stream_offset == ctx->cur_read_stream_size);
        if (ctx->stream_was_duplicate) {
                free_lookup_table_entry(lte);
        } else if (lte->unhashed && ctx->lookup_table != NULL) {
@@ -1144,7 +1187,8 @@ write_raw_copy_resources(struct list_head *raw_copy_resources,
                ret = write_raw_copy_resource(lte->rspec, out_fd);
                if (ret)
                        return ret;
-               do_write_streams_progress(progress_data, lte->size, false, lte);
+               do_write_streams_progress(progress_data, lte, lte->size,
+                                         1, false);
        }
        return 0;
 }
@@ -1427,6 +1471,7 @@ write_stream_list(struct list_head *stream_list,
              ctx.progress_data.progress.write_streams.num_threads);
 
        INIT_LIST_HEAD(&ctx.pending_streams);
+       INIT_LIST_HEAD(&ctx.pack_streams);
 
        if (ctx.progress_data.progress_func) {
                (*ctx.progress_data.progress_func)(WIMLIB_PROGRESS_MSG_WRITE_STREAMS,
@@ -1480,7 +1525,7 @@ write_stream_list(struct list_head *stream_list,
                      reshdr.uncompressed_size);
 
                offset_in_res = 0;
-               list_for_each_entry(lte, &ctx.pending_streams, write_streams_list) {
+               list_for_each_entry(lte, &ctx.pack_streams, write_streams_list) {
                        lte->out_reshdr.size_in_wim = lte->size;
                        lte->out_reshdr.flags = filter_resource_flags(lte->flags);
                        lte->out_reshdr.flags |= WIM_RESHDR_FLAG_PACKED_STREAMS;