Workaround for WOF incompatibility
authorEric Biggers <ebiggers3@gmail.com>
Sun, 28 Sep 2014 03:55:57 +0000 (22:55 -0500)
committerEric Biggers <ebiggers3@gmail.com>
Sun, 28 Sep 2014 05:12:20 +0000 (00:12 -0500)
If a stream has compressed_size == uncompressed_size, WOF assumes it's
uncompressed.  As a workaround, we need to re-write the stream as uncompressed
if this ever happens.

This requires adjusting the DONE_WITH_FILE messages to be fired after writing
each stream, not after reading each stream.

We also need to loosen the check for whether the input stream is in a solid
block or not.

Finally, this commit also makes the DONE_WITH_FILE messages work in more cases
--- when appending to a WIM, and when the file hashes are already known.

NEWS
include/wimlib/inode.h
include/wimlib/lookup_table.h
src/unix_capture.c
src/win32_capture.c
src/write.c

diff --git a/NEWS b/NEWS
index 7ce8537..2b9bb34 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -13,6 +13,10 @@ Version 1.7.2-BETA:
 
                Mandatory integrity labels are now backed up and restored.
 
+       Added a workaround for an issue where in rare cases, wimlib could create
+       a compressed data stream that could not be read correctly by Windows
+       after an extraction in "WIMBoot" mode.
+
        Library changes:
                Added file count progress data for
                WIMLIB_PROGRESS_MSG_EXTRACT_FILE_STRUCTURE and
index 03c9acd..879375c 100644 (file)
@@ -181,7 +181,7 @@ struct wim_inode {
                 * WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES:  the number
                 * of data streams this inode has that have not yet been fully
                 * read.  */
-               u32 num_unread_streams;
+               u32 num_remaining_streams;
 
 #ifdef WITH_FUSE
                struct {
index 4252f3a..8857fd8 100644 (file)
@@ -106,6 +106,8 @@ struct wim_lookup_table_entry {
         * need this.)  */
        u32 dont_check_metadata_hash : 1;
 
+       u32 may_send_done_with_file : 1;
+
        union {
                /* (On-disk field) SHA1 message digest of the stream referenced
                 * by this lookup table entry.  */
@@ -166,7 +168,10 @@ struct wim_lookup_table_entry {
                        struct wim_resource_spec *rspec;
                        u64 offset_in_res;
                };
-               tchar *file_on_disk;
+               struct {
+                       tchar *file_on_disk;
+                       struct wim_inode *file_inode;
+               };
                void *attached_buffer;
        #ifdef WITH_FUSE
                struct {
index 753da47..884c9fd 100644 (file)
@@ -121,6 +121,7 @@ unix_scan_regular_file(const char *path, u64 size, struct wim_inode *inode,
                return WIMLIB_ERR_NOMEM;
        }
        lte->file_on_disk = file_on_disk;
+       lte->file_inode = inode;
        lte->resource_location = RESOURCE_IN_FILE_ON_DISK;
        lte->size = size;
        add_unhashed_stream(lte, inode, 0, unhashed_streams);
index 26f54c2..7d6d458 100644 (file)
@@ -960,6 +960,7 @@ winnt_scan_stream(const wchar_t *path, size_t path_nchars,
                stream_id = 0;
                inode->i_lte = lte;
        }
+       lte->file_inode = inode;
        add_unhashed_stream(lte, inode, stream_id, unhashed_streams);
        return 0;
 }
index 32bad1f..0e15da7 100644 (file)
@@ -284,7 +284,6 @@ struct write_streams_progress_data {
 
 static int
 do_write_streams_progress(struct write_streams_progress_data *progress_data,
-                         struct wim_lookup_table_entry *cur_stream,
                          u64 complete_size,
                          u32 complete_count,
                          bool discarded)
@@ -389,14 +388,6 @@ struct write_streams_ctx {
         * @pending_streams only when writing a packed resource.  */
        struct list_head pack_streams;
 
-       /* Set to true if the stream currently being read was a duplicate, and
-        * therefore the corresponding stream entry needs to be freed once the
-        * read finishes.  (In this case we add the duplicate entry to
-        * pending_streams rather than the entry being read.)  */
-       bool stream_was_duplicate;
-
-       struct wim_inode *stream_inode;
-
        /* Current uncompressed offset in the stream being read.  */
        u64 cur_read_stream_offset;
 
@@ -566,7 +557,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size,
        if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) {
                ret = full_write(ctx->out_fd, ctx->chunk_csizes, chunk_table_size);
                if (ret)
-                       goto error;
+                       goto write_error;
                res_end_offset = ctx->out_fd->offset;
                res_start_offset = ctx->chunks_start_offset;
        } else {
@@ -590,7 +581,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size,
                        ret = full_pwrite(ctx->out_fd, &hdr, sizeof(hdr),
                                          chunk_table_offset - sizeof(hdr));
                        if (ret)
-                               goto error;
+                               goto write_error;
                        res_start_offset = chunk_table_offset - sizeof(hdr);
                } else {
                        res_start_offset = chunk_table_offset;
@@ -599,7 +590,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size,
                ret = full_pwrite(ctx->out_fd, ctx->chunk_csizes,
                                  chunk_table_size, chunk_table_offset);
                if (ret)
-                       goto error;
+                       goto write_error;
        }
 
        *res_start_offset_ret = res_start_offset;
@@ -607,7 +598,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size,
 
        return 0;
 
-error:
+write_error:
        ERROR_WITH_ERRNO("Write error");
        return ret;
 }
@@ -655,60 +646,79 @@ done_with_file(const tchar *path, wimlib_progress_func_t progfunc, void *progctx
                             &info, progctx);
 }
 
-/* Handle WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES mode.  */
-static int
-done_with_stream(struct wim_lookup_table_entry *stream,
-                wimlib_progress_func_t progfunc, void *progctx,
-                struct wim_inode *inode)
+static inline bool
+is_file_stream(const struct wim_lookup_table_entry *lte)
 {
-       if (stream->resource_location == RESOURCE_IN_FILE_ON_DISK
+       return lte->resource_location == RESOURCE_IN_FILE_ON_DISK
 #ifdef __WIN32__
-           || stream->resource_location == RESOURCE_IN_WINNT_FILE_ON_DISK
-           || stream->resource_location == RESOURCE_WIN32_ENCRYPTED
+           || lte->resource_location == RESOURCE_IN_WINNT_FILE_ON_DISK
+           || lte->resource_location == RESOURCE_WIN32_ENCRYPTED
 #endif
-          )
-       {
-               int ret;
+          ;
+}
 
-               wimlib_assert(inode->num_unread_streams > 0);
-               if (--inode->num_unread_streams > 0)
-                       return 0;
+static int
+do_done_with_stream(struct wim_lookup_table_entry *lte,
+                   wimlib_progress_func_t progfunc, void *progctx)
+{
+       int ret;
+       struct wim_inode *inode;
 
-       #ifdef __WIN32__
-               /* XXX: This logic really should be somewhere else.  */
-
-               /* We want the path to the file, but stream->file_on_disk might
-                * actually refer to a named data stream.  Temporarily strip the
-                * named data stream from the path.  */
-               wchar_t *p_colon = NULL;
-               wchar_t *p_question_mark = NULL;
-               const wchar_t *p_stream_name;
-
-               p_stream_name = path_stream_name(stream->file_on_disk);
-               if (unlikely(p_stream_name)) {
-                       p_colon = (wchar_t *)(p_stream_name - 1);
-                       wimlib_assert(*p_colon == L':');
-                       *p_colon = L'\0';
-               }
+       if (!lte->may_send_done_with_file)
+               return 0;
 
-               /* We also should use a fake Win32 path instead of a NT path  */
-               if (!wcsncmp(stream->file_on_disk, L"\\??\\", 4)) {
-                       p_question_mark = &stream->file_on_disk[1];
-                       *p_question_mark = L'\\';
-               }
-       #endif
+       inode = lte->file_inode;
 
-               ret = done_with_file(stream->file_on_disk, progfunc, progctx);
+       wimlib_assert(inode != NULL);
+       wimlib_assert(inode->num_remaining_streams > 0);
+       if (--inode->num_remaining_streams > 0)
+               return 0;
 
-       #ifdef __WIN32__
-               if (p_colon)
-                       *p_colon = L':';
-               if (p_question_mark)
-                       *p_question_mark = L'?';
-       #endif
-               return ret;
+#ifdef __WIN32__
+       /* XXX: This logic really should be somewhere else.  */
+
+       /* We want the path to the file, but lte->file_on_disk might actually
+        * refer to a named data stream.  Temporarily strip the named data
+        * stream from the path.  */
+       wchar_t *p_colon = NULL;
+       wchar_t *p_question_mark = NULL;
+       const wchar_t *p_stream_name;
+
+       p_stream_name = path_stream_name(lte->file_on_disk);
+       if (unlikely(p_stream_name)) {
+               p_colon = (wchar_t *)(p_stream_name - 1);
+               wimlib_assert(*p_colon == L':');
+               *p_colon = L'\0';
        }
-       return 0;
+
+       /* We also should use a fake Win32 path instead of a NT path  */
+       if (!wcsncmp(lte->file_on_disk, L"\\??\\", 4)) {
+               p_question_mark = &lte->file_on_disk[1];
+               *p_question_mark = L'\\';
+       }
+#endif
+
+       ret = done_with_file(lte->file_on_disk, progfunc, progctx);
+
+#ifdef __WIN32__
+       if (p_colon)
+               *p_colon = L':';
+       if (p_question_mark)
+               *p_question_mark = L'?';
+#endif
+       return ret;
+}
+
+/* Handle WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES mode.  */
+static inline int
+done_with_stream(struct wim_lookup_table_entry *lte,
+                struct write_streams_ctx *ctx)
+{
+       if (likely(!(ctx->write_resource_flags &
+                    WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE)))
+               return 0;
+       return do_done_with_stream(lte, ctx->progress_data.progfunc,
+                                  ctx->progress_data.progctx);
 }
 
 /* Begin processing a stream for writing.  */
@@ -723,11 +733,6 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, void *_ctx)
        ctx->cur_read_stream_offset = 0;
        ctx->cur_read_stream_size = lte->size;
 
-       if (lte->unhashed)
-               ctx->stream_inode = lte->back_inode;
-       else
-               ctx->stream_inode = NULL;
-
        /* As an optimization, we allow some streams to be "unhashed", meaning
         * their SHA1 message digests are unknown.  This is the case with
         * streams that are added by scanning a directry tree with
@@ -739,7 +744,6 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, void *_ctx)
         * still provide the data again to write_stream_process_chunk().  This
         * is okay because an unhashed stream cannot be in a WIM resource, which
         * might be costly to decompress.  */
-       ctx->stream_was_duplicate = false;
        if (ctx->lookup_table != NULL && lte->unhashed && !lte->unique_size) {
 
                struct wim_lookup_table_entry *lte_new;
@@ -762,7 +766,7 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, void *_ctx)
                                DEBUG("Discarding duplicate stream of "
                                      "length %"PRIu64, lte->size);
                                ret = do_write_streams_progress(&ctx->progress_data,
-                                                               lte, lte->size,
+                                                               lte->size,
                                                                1, true);
                                list_del(&lte->write_streams_list);
                                list_del(&lte->lookup_table_list);
@@ -770,14 +774,8 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, void *_ctx)
                                        lte_new->out_refcnt += lte->out_refcnt;
                                if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS)
                                        ctx->cur_write_res_size -= lte->size;
-                               if (!ret && unlikely(ctx->write_resource_flags &
-                                                    WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE))
-                               {
-                                       ret = done_with_stream(lte,
-                                                              ctx->progress_data.progfunc,
-                                                              ctx->progress_data.progctx,
-                                                              ctx->stream_inode);
-                               }
+                               if (!ret)
+                                       ret = done_with_stream(lte, ctx);
                                free_lookup_table_entry(lte);
                                if (ret)
                                        return ret;
@@ -795,9 +793,10 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, void *_ctx)
                                             &lte_new->write_streams_list);
                                list_replace(&lte->lookup_table_list,
                                             &lte_new->lookup_table_list);
+                               lte->will_be_in_output_wim = 0;
                                lte_new->out_refcnt = lte->out_refcnt;
                                lte_new->will_be_in_output_wim = 1;
-                               ctx->stream_was_duplicate = true;
+                               lte_new->may_send_done_with_file = 0;
                                lte = lte_new;
                        }
                }
@@ -854,6 +853,43 @@ write_stream_uncompressed(struct wim_lookup_table_entry *lte,
        return 0;
 }
 
+/* Returns true if the specified stream should be truncated from the WIM file
+ * and re-written as uncompressed.  lte->out_reshdr must be filled in from the
+ * initial write of the stream.  */
+static bool
+should_rewrite_stream_uncompressed(const struct write_streams_ctx *ctx,
+                                  const struct wim_lookup_table_entry *lte)
+{
+       /* If the compressed data is smaller than the uncompressed data, prefer
+        * the compressed data.  */
+       if (lte->out_reshdr.size_in_wim < lte->out_reshdr.uncompressed_size)
+               return false;
+
+       /* If we're not actually writing compressed data, then there's no need
+        * for re-writing.  */
+       if (!ctx->compressor)
+               return false;
+
+       /* If writing a pipable WIM, everything we write to the output is final
+        * (it might actually be a pipe!).  */
+       if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE)
+               return false;
+
+       /* If the stream that would need to be re-read is located in a solid
+        * block in another WIM file, then re-reading it would be costly.  So
+        * don't do it.
+        *
+        * Exception: if the compressed size happens to be *exactly* the same as
+        * the uncompressed size, then the stream *must* be written uncompressed
+        * in order to remain compatible with the Windows Overlay Filesystem
+        * Filter Driver (WOF).  */
+       if ((lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) &&
+           (lte->out_reshdr.size_in_wim != lte->out_reshdr.uncompressed_size))
+               return false;
+
+       return true;
+}
+
 /* Write the next chunk of (typically compressed) data to the output WIM,
  * handling the writing of the chunk table.  */
 static int
@@ -907,14 +943,14 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk,
                        ret = full_write(ctx->out_fd, &chunk_hdr,
                                         sizeof(chunk_hdr));
                        if (ret)
-                               goto error;
+                               goto write_error;
                }
        }
 
        /* Write the chunk data.  */
        ret = full_write(ctx->out_fd, cchunk, csize);
        if (ret)
-               goto error;
+               goto write_error;
 
        ctx->cur_write_stream_offset += usize;
 
@@ -923,36 +959,32 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk,
        if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) {
                /* Wrote chunk in packed mode.  It may have finished multiple
                 * streams.  */
-               while (ctx->cur_write_stream_offset > lte->size) {
-                       struct wim_lookup_table_entry *next;
+               struct wim_lookup_table_entry *next_lte;
+
+               while (lte && ctx->cur_write_stream_offset >= lte->size) {
 
                        ctx->cur_write_stream_offset -= lte->size;
 
-                       wimlib_assert(!list_is_singular(&ctx->pending_streams) &&
-                                     !list_empty(&ctx->pending_streams));
-                       next = list_entry(lte->write_streams_list.next,
-                                         struct wim_lookup_table_entry,
-                                         write_streams_list);
-                       list_move_tail(&lte->write_streams_list,
-                                      &ctx->pack_streams);
-                       lte = next;
-                       completed_stream_count++;
-               }
-               if (ctx->cur_write_stream_offset == lte->size) {
-                       ctx->cur_write_stream_offset = 0;
-                       list_move_tail(&lte->write_streams_list,
-                                      &ctx->pack_streams);
+                       if (ctx->cur_write_stream_offset)
+                               next_lte = list_entry(lte->write_streams_list.next,
+                                                     struct wim_lookup_table_entry,
+                                                     write_streams_list);
+                       else
+                               next_lte = NULL;
+
+                       ret = done_with_stream(lte, ctx);
+                       if (ret)
+                               return ret;
+                       list_move_tail(&lte->write_streams_list, &ctx->pack_streams);
                        completed_stream_count++;
+
+                       lte = next_lte;
                }
        } else {
                /* Wrote chunk in non-packed mode.  It may have finished a
                 * stream.  */
                if (ctx->cur_write_stream_offset == lte->size) {
 
-                       completed_stream_count++;
-
-                       list_del(&lte->write_streams_list);
-
                        wimlib_assert(ctx->cur_write_stream_offset ==
                                      ctx->cur_write_res_size);
 
@@ -964,19 +996,7 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk,
                        if (ctx->compressor != NULL)
                                lte->out_reshdr.flags |= WIM_RESHDR_FLAG_COMPRESSED;
 
-                       if (ctx->compressor != NULL &&
-                           lte->out_reshdr.size_in_wim >= lte->out_reshdr.uncompressed_size &&
-                           !(ctx->write_resource_flags & (WRITE_RESOURCE_FLAG_PIPABLE |
-                                                          WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE)) &&
-                           !(lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS))
-                       {
-                               /* Stream did not compress to less than its original
-                                * size.  If we're not writing a pipable WIM (which
-                                * could mean the output file descriptor is
-                                * non-seekable), and the stream isn't located in a
-                                * resource pack (which would make reading it again
-                                * costly), truncate the file to the start of the stream
-                                * and write it uncompressed instead.  */
+                       if (should_rewrite_stream_uncompressed(ctx, lte)) {
                                DEBUG("Stream of size %"PRIu64" did not compress to "
                                      "less than original size; writing uncompressed.",
                                      lte->size);
@@ -987,14 +1007,20 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk,
                        wimlib_assert(lte->out_reshdr.uncompressed_size == lte->size);
 
                        ctx->cur_write_stream_offset = 0;
+
+                       ret = done_with_stream(lte, ctx);
+                       if (ret)
+                               return ret;
+                       list_del(&lte->write_streams_list);
+                       completed_stream_count++;
                }
        }
 
-       return do_write_streams_progress(&ctx->progress_data, lte,
+       return do_write_streams_progress(&ctx->progress_data,
                                         completed_size, completed_stream_count,
                                         false);
 
-error:
+write_error:
        ERROR_WITH_ERRNO("Write error");
        return ret;
 }
@@ -1110,31 +1136,31 @@ static int
 write_stream_end_read(struct wim_lookup_table_entry *lte, int status, void *_ctx)
 {
        struct write_streams_ctx *ctx = _ctx;
-       if (status)
-               goto out;
-
-       wimlib_assert(ctx->cur_read_stream_offset == ctx->cur_read_stream_size);
 
-       if (unlikely(ctx->write_resource_flags &
-                    WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE) &&
-           ctx->stream_inode != NULL)
-       {
-               status = done_with_stream(lte,
-                                         ctx->progress_data.progfunc,
-                                         ctx->progress_data.progctx,
-                                         ctx->stream_inode);
-       }
+       wimlib_assert(ctx->cur_read_stream_offset == ctx->cur_read_stream_size || status);
 
-       if (!ctx->stream_was_duplicate && lte->unhashed &&
-           ctx->lookup_table != NULL)
-       {
+       if (!lte->will_be_in_output_wim) {
+               /* The 'lte' stream was a duplicate.  Now that its data has
+                * finished being read, it is being discarded in favor of the
+                * duplicate entry.  It therefore is no longer needed, and we
+                * can fire the DONE_WITH_FILE callback because the file will
+                * not be read again.
+                *
+                * Note: we can't yet fire DONE_WITH_FILE for non-duplicate
+                * streams, since it needs to be possible to re-read the file if
+                * it does not compress to less than its original size.  */
+               if (!status)
+                       status = done_with_stream(lte, ctx);
+               free_lookup_table_entry(lte);
+       } else if (!status && lte->unhashed && ctx->lookup_table != NULL) {
+               /* The 'lte' stream was not a duplicate and was previously
+                * unhashed.  Since we passed COMPUTE_MISSING_STREAM_HASHES to
+                * read_stream_list(), lte->hash is now computed and valid.  So
+                * turn this stream into a "hashed" stream.  */
                list_del(&lte->unhashed_list);
                lookup_table_insert(ctx->lookup_table, lte);
                lte->unhashed = 0;
        }
-out:
-       if (ctx->stream_was_duplicate)
-               free_lookup_table_entry(lte);
        return status;
 }
 
@@ -1297,7 +1323,7 @@ write_raw_copy_resources(struct list_head *raw_copy_streams,
                                return ret;
                        lte->rspec->raw_copy_ok = 0;
                }
-               ret = do_write_streams_progress(progress_data, lte, lte->size,
+               ret = do_write_streams_progress(progress_data, lte->size,
                                                1, false);
                if (ret)
                        return ret;
@@ -1349,6 +1375,25 @@ remove_zero_length_streams(struct list_head *stream_list)
        }
 }
 
+static void
+init_done_with_file_info(struct list_head *stream_list)
+{
+       struct wim_lookup_table_entry *lte;
+
+       list_for_each_entry(lte, stream_list, write_streams_list) {
+               if (is_file_stream(lte)) {
+                       lte->file_inode->num_remaining_streams = 0;
+                       lte->may_send_done_with_file = 1;
+               } else {
+                       lte->may_send_done_with_file = 0;
+               }
+       }
+
+       list_for_each_entry(lte, stream_list, write_streams_list)
+               if (lte->may_send_done_with_file)
+                       lte->file_inode->num_remaining_streams++;
+}
+
 /*
  * Write a list of streams to the output WIM file.
  *
@@ -1484,6 +1529,11 @@ write_stream_list(struct list_head *stream_list,
                return 0;
        }
 
+       /* If needed, set auxiliary information so that we can detect when the
+        * library has finished using each external file.  */
+       if (unlikely(write_resource_flags & WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE))
+               init_done_with_file_info(stream_list);
+
        memset(&ctx, 0, sizeof(ctx));
 
        /* Pre-sorting the streams is required for compute_stream_list_stats().
@@ -2170,17 +2220,6 @@ write_wim_streams(WIMStruct *wim, int image, int write_flags,
                }
        }
 
-       /* If needed, set auxiliary information so that we can detect when
-        * wimlib has finished using each external file.  */
-       if (unlikely(write_flags & WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES)) {
-               list_for_each_entry(lte, stream_list, write_streams_list)
-                       if (lte->unhashed)
-                               lte->back_inode->num_unread_streams = 0;
-               list_for_each_entry(lte, stream_list, write_streams_list)
-                       if (lte->unhashed)
-                               lte->back_inode->num_unread_streams++;
-       }
-
        return wim_write_stream_list(wim,
                                     stream_list,
                                     write_flags,