From: Eric Biggers Date: Sun, 28 Sep 2014 03:55:57 +0000 (-0500) Subject: Workaround for WOF incompatibility X-Git-Tag: v1.7.2~1 X-Git-Url: https://wimlib.net/git/?p=wimlib;a=commitdiff_plain;h=1ba2a3422b48935899b8cd406d6976a5997e81ea Workaround for WOF incompatibility If a stream has compressed_size == uncompressed_size, WOF assumes it's uncompressed. As a workaround, we need to re-write the stream as uncompressed if this ever happens. This requires adjusting the DONE_WITH_FILE messages to be fired after writing each stream, not after reading each stream. We also need to loosen the check for whether the input stream is in a solid block or not. Finally, this commit also makes the DONE_WITH_FILE messages work in more cases --- when appending to a WIM, and when the file hashes are already known. --- diff --git a/NEWS b/NEWS index 7ce85371..2b9bb34b 100644 --- a/NEWS +++ b/NEWS @@ -13,6 +13,10 @@ Version 1.7.2-BETA: Mandatory integrity labels are now backed up and restored. + Added a workaround for an issue where in rare cases, wimlib could create + a compressed data stream that could not be read correctly by Windows + after an extraction in "WIMBoot" mode. + Library changes: Added file count progress data for WIMLIB_PROGRESS_MSG_EXTRACT_FILE_STRUCTURE and diff --git a/include/wimlib/inode.h b/include/wimlib/inode.h index 03c9acd8..879375cc 100644 --- a/include/wimlib/inode.h +++ b/include/wimlib/inode.h @@ -181,7 +181,7 @@ struct wim_inode { * WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES: the number * of data streams this inode has that have not yet been fully * read. */ - u32 num_unread_streams; + u32 num_remaining_streams; #ifdef WITH_FUSE struct { diff --git a/include/wimlib/lookup_table.h b/include/wimlib/lookup_table.h index 4252f3a5..8857fd83 100644 --- a/include/wimlib/lookup_table.h +++ b/include/wimlib/lookup_table.h @@ -106,6 +106,8 @@ struct wim_lookup_table_entry { * need this.) */ u32 dont_check_metadata_hash : 1; + u32 may_send_done_with_file : 1; + union { /* (On-disk field) SHA1 message digest of the stream referenced * by this lookup table entry. */ @@ -166,7 +168,10 @@ struct wim_lookup_table_entry { struct wim_resource_spec *rspec; u64 offset_in_res; }; - tchar *file_on_disk; + struct { + tchar *file_on_disk; + struct wim_inode *file_inode; + }; void *attached_buffer; #ifdef WITH_FUSE struct { diff --git a/src/unix_capture.c b/src/unix_capture.c index 753da47f..884c9fd4 100644 --- a/src/unix_capture.c +++ b/src/unix_capture.c @@ -121,6 +121,7 @@ unix_scan_regular_file(const char *path, u64 size, struct wim_inode *inode, return WIMLIB_ERR_NOMEM; } lte->file_on_disk = file_on_disk; + lte->file_inode = inode; lte->resource_location = RESOURCE_IN_FILE_ON_DISK; lte->size = size; add_unhashed_stream(lte, inode, 0, unhashed_streams); diff --git a/src/win32_capture.c b/src/win32_capture.c index 26f54c22..7d6d4583 100644 --- a/src/win32_capture.c +++ b/src/win32_capture.c @@ -960,6 +960,7 @@ winnt_scan_stream(const wchar_t *path, size_t path_nchars, stream_id = 0; inode->i_lte = lte; } + lte->file_inode = inode; add_unhashed_stream(lte, inode, stream_id, unhashed_streams); return 0; } diff --git a/src/write.c b/src/write.c index 32bad1f5..0e15da71 100644 --- a/src/write.c +++ b/src/write.c @@ -284,7 +284,6 @@ struct write_streams_progress_data { static int do_write_streams_progress(struct write_streams_progress_data *progress_data, - struct wim_lookup_table_entry *cur_stream, u64 complete_size, u32 complete_count, bool discarded) @@ -389,14 +388,6 @@ struct write_streams_ctx { * @pending_streams only when writing a packed resource. */ struct list_head pack_streams; - /* Set to true if the stream currently being read was a duplicate, and - * therefore the corresponding stream entry needs to be freed once the - * read finishes. (In this case we add the duplicate entry to - * pending_streams rather than the entry being read.) */ - bool stream_was_duplicate; - - struct wim_inode *stream_inode; - /* Current uncompressed offset in the stream being read. */ u64 cur_read_stream_offset; @@ -566,7 +557,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) { ret = full_write(ctx->out_fd, ctx->chunk_csizes, chunk_table_size); if (ret) - goto error; + goto write_error; res_end_offset = ctx->out_fd->offset; res_start_offset = ctx->chunks_start_offset; } else { @@ -590,7 +581,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, ret = full_pwrite(ctx->out_fd, &hdr, sizeof(hdr), chunk_table_offset - sizeof(hdr)); if (ret) - goto error; + goto write_error; res_start_offset = chunk_table_offset - sizeof(hdr); } else { res_start_offset = chunk_table_offset; @@ -599,7 +590,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, ret = full_pwrite(ctx->out_fd, ctx->chunk_csizes, chunk_table_size, chunk_table_offset); if (ret) - goto error; + goto write_error; } *res_start_offset_ret = res_start_offset; @@ -607,7 +598,7 @@ end_chunk_table(struct write_streams_ctx *ctx, u64 res_actual_size, return 0; -error: +write_error: ERROR_WITH_ERRNO("Write error"); return ret; } @@ -655,60 +646,79 @@ done_with_file(const tchar *path, wimlib_progress_func_t progfunc, void *progctx &info, progctx); } -/* Handle WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES mode. */ -static int -done_with_stream(struct wim_lookup_table_entry *stream, - wimlib_progress_func_t progfunc, void *progctx, - struct wim_inode *inode) +static inline bool +is_file_stream(const struct wim_lookup_table_entry *lte) { - if (stream->resource_location == RESOURCE_IN_FILE_ON_DISK + return lte->resource_location == RESOURCE_IN_FILE_ON_DISK #ifdef __WIN32__ - || stream->resource_location == RESOURCE_IN_WINNT_FILE_ON_DISK - || stream->resource_location == RESOURCE_WIN32_ENCRYPTED + || lte->resource_location == RESOURCE_IN_WINNT_FILE_ON_DISK + || lte->resource_location == RESOURCE_WIN32_ENCRYPTED #endif - ) - { - int ret; + ; +} - wimlib_assert(inode->num_unread_streams > 0); - if (--inode->num_unread_streams > 0) - return 0; +static int +do_done_with_stream(struct wim_lookup_table_entry *lte, + wimlib_progress_func_t progfunc, void *progctx) +{ + int ret; + struct wim_inode *inode; - #ifdef __WIN32__ - /* XXX: This logic really should be somewhere else. */ - - /* We want the path to the file, but stream->file_on_disk might - * actually refer to a named data stream. Temporarily strip the - * named data stream from the path. */ - wchar_t *p_colon = NULL; - wchar_t *p_question_mark = NULL; - const wchar_t *p_stream_name; - - p_stream_name = path_stream_name(stream->file_on_disk); - if (unlikely(p_stream_name)) { - p_colon = (wchar_t *)(p_stream_name - 1); - wimlib_assert(*p_colon == L':'); - *p_colon = L'\0'; - } + if (!lte->may_send_done_with_file) + return 0; - /* We also should use a fake Win32 path instead of a NT path */ - if (!wcsncmp(stream->file_on_disk, L"\\??\\", 4)) { - p_question_mark = &stream->file_on_disk[1]; - *p_question_mark = L'\\'; - } - #endif + inode = lte->file_inode; - ret = done_with_file(stream->file_on_disk, progfunc, progctx); + wimlib_assert(inode != NULL); + wimlib_assert(inode->num_remaining_streams > 0); + if (--inode->num_remaining_streams > 0) + return 0; - #ifdef __WIN32__ - if (p_colon) - *p_colon = L':'; - if (p_question_mark) - *p_question_mark = L'?'; - #endif - return ret; +#ifdef __WIN32__ + /* XXX: This logic really should be somewhere else. */ + + /* We want the path to the file, but lte->file_on_disk might actually + * refer to a named data stream. Temporarily strip the named data + * stream from the path. */ + wchar_t *p_colon = NULL; + wchar_t *p_question_mark = NULL; + const wchar_t *p_stream_name; + + p_stream_name = path_stream_name(lte->file_on_disk); + if (unlikely(p_stream_name)) { + p_colon = (wchar_t *)(p_stream_name - 1); + wimlib_assert(*p_colon == L':'); + *p_colon = L'\0'; } - return 0; + + /* We also should use a fake Win32 path instead of a NT path */ + if (!wcsncmp(lte->file_on_disk, L"\\??\\", 4)) { + p_question_mark = <e->file_on_disk[1]; + *p_question_mark = L'\\'; + } +#endif + + ret = done_with_file(lte->file_on_disk, progfunc, progctx); + +#ifdef __WIN32__ + if (p_colon) + *p_colon = L':'; + if (p_question_mark) + *p_question_mark = L'?'; +#endif + return ret; +} + +/* Handle WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES mode. */ +static inline int +done_with_stream(struct wim_lookup_table_entry *lte, + struct write_streams_ctx *ctx) +{ + if (likely(!(ctx->write_resource_flags & + WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE))) + return 0; + return do_done_with_stream(lte, ctx->progress_data.progfunc, + ctx->progress_data.progctx); } /* Begin processing a stream for writing. */ @@ -723,11 +733,6 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, void *_ctx) ctx->cur_read_stream_offset = 0; ctx->cur_read_stream_size = lte->size; - if (lte->unhashed) - ctx->stream_inode = lte->back_inode; - else - ctx->stream_inode = NULL; - /* As an optimization, we allow some streams to be "unhashed", meaning * their SHA1 message digests are unknown. This is the case with * streams that are added by scanning a directry tree with @@ -739,7 +744,6 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, void *_ctx) * still provide the data again to write_stream_process_chunk(). This * is okay because an unhashed stream cannot be in a WIM resource, which * might be costly to decompress. */ - ctx->stream_was_duplicate = false; if (ctx->lookup_table != NULL && lte->unhashed && !lte->unique_size) { struct wim_lookup_table_entry *lte_new; @@ -762,7 +766,7 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, void *_ctx) DEBUG("Discarding duplicate stream of " "length %"PRIu64, lte->size); ret = do_write_streams_progress(&ctx->progress_data, - lte, lte->size, + lte->size, 1, true); list_del(<e->write_streams_list); list_del(<e->lookup_table_list); @@ -770,14 +774,8 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, void *_ctx) lte_new->out_refcnt += lte->out_refcnt; if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) ctx->cur_write_res_size -= lte->size; - if (!ret && unlikely(ctx->write_resource_flags & - WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE)) - { - ret = done_with_stream(lte, - ctx->progress_data.progfunc, - ctx->progress_data.progctx, - ctx->stream_inode); - } + if (!ret) + ret = done_with_stream(lte, ctx); free_lookup_table_entry(lte); if (ret) return ret; @@ -795,9 +793,10 @@ write_stream_begin_read(struct wim_lookup_table_entry *lte, void *_ctx) <e_new->write_streams_list); list_replace(<e->lookup_table_list, <e_new->lookup_table_list); + lte->will_be_in_output_wim = 0; lte_new->out_refcnt = lte->out_refcnt; lte_new->will_be_in_output_wim = 1; - ctx->stream_was_duplicate = true; + lte_new->may_send_done_with_file = 0; lte = lte_new; } } @@ -854,6 +853,43 @@ write_stream_uncompressed(struct wim_lookup_table_entry *lte, return 0; } +/* Returns true if the specified stream should be truncated from the WIM file + * and re-written as uncompressed. lte->out_reshdr must be filled in from the + * initial write of the stream. */ +static bool +should_rewrite_stream_uncompressed(const struct write_streams_ctx *ctx, + const struct wim_lookup_table_entry *lte) +{ + /* If the compressed data is smaller than the uncompressed data, prefer + * the compressed data. */ + if (lte->out_reshdr.size_in_wim < lte->out_reshdr.uncompressed_size) + return false; + + /* If we're not actually writing compressed data, then there's no need + * for re-writing. */ + if (!ctx->compressor) + return false; + + /* If writing a pipable WIM, everything we write to the output is final + * (it might actually be a pipe!). */ + if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PIPABLE) + return false; + + /* If the stream that would need to be re-read is located in a solid + * block in another WIM file, then re-reading it would be costly. So + * don't do it. + * + * Exception: if the compressed size happens to be *exactly* the same as + * the uncompressed size, then the stream *must* be written uncompressed + * in order to remain compatible with the Windows Overlay Filesystem + * Filter Driver (WOF). */ + if ((lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS) && + (lte->out_reshdr.size_in_wim != lte->out_reshdr.uncompressed_size)) + return false; + + return true; +} + /* Write the next chunk of (typically compressed) data to the output WIM, * handling the writing of the chunk table. */ static int @@ -907,14 +943,14 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk, ret = full_write(ctx->out_fd, &chunk_hdr, sizeof(chunk_hdr)); if (ret) - goto error; + goto write_error; } } /* Write the chunk data. */ ret = full_write(ctx->out_fd, cchunk, csize); if (ret) - goto error; + goto write_error; ctx->cur_write_stream_offset += usize; @@ -923,36 +959,32 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk, if (ctx->write_resource_flags & WRITE_RESOURCE_FLAG_PACK_STREAMS) { /* Wrote chunk in packed mode. It may have finished multiple * streams. */ - while (ctx->cur_write_stream_offset > lte->size) { - struct wim_lookup_table_entry *next; + struct wim_lookup_table_entry *next_lte; + + while (lte && ctx->cur_write_stream_offset >= lte->size) { ctx->cur_write_stream_offset -= lte->size; - wimlib_assert(!list_is_singular(&ctx->pending_streams) && - !list_empty(&ctx->pending_streams)); - next = list_entry(lte->write_streams_list.next, - struct wim_lookup_table_entry, - write_streams_list); - list_move_tail(<e->write_streams_list, - &ctx->pack_streams); - lte = next; - completed_stream_count++; - } - if (ctx->cur_write_stream_offset == lte->size) { - ctx->cur_write_stream_offset = 0; - list_move_tail(<e->write_streams_list, - &ctx->pack_streams); + if (ctx->cur_write_stream_offset) + next_lte = list_entry(lte->write_streams_list.next, + struct wim_lookup_table_entry, + write_streams_list); + else + next_lte = NULL; + + ret = done_with_stream(lte, ctx); + if (ret) + return ret; + list_move_tail(<e->write_streams_list, &ctx->pack_streams); completed_stream_count++; + + lte = next_lte; } } else { /* Wrote chunk in non-packed mode. It may have finished a * stream. */ if (ctx->cur_write_stream_offset == lte->size) { - completed_stream_count++; - - list_del(<e->write_streams_list); - wimlib_assert(ctx->cur_write_stream_offset == ctx->cur_write_res_size); @@ -964,19 +996,7 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk, if (ctx->compressor != NULL) lte->out_reshdr.flags |= WIM_RESHDR_FLAG_COMPRESSED; - if (ctx->compressor != NULL && - lte->out_reshdr.size_in_wim >= lte->out_reshdr.uncompressed_size && - !(ctx->write_resource_flags & (WRITE_RESOURCE_FLAG_PIPABLE | - WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE)) && - !(lte->flags & WIM_RESHDR_FLAG_PACKED_STREAMS)) - { - /* Stream did not compress to less than its original - * size. If we're not writing a pipable WIM (which - * could mean the output file descriptor is - * non-seekable), and the stream isn't located in a - * resource pack (which would make reading it again - * costly), truncate the file to the start of the stream - * and write it uncompressed instead. */ + if (should_rewrite_stream_uncompressed(ctx, lte)) { DEBUG("Stream of size %"PRIu64" did not compress to " "less than original size; writing uncompressed.", lte->size); @@ -987,14 +1007,20 @@ write_chunk(struct write_streams_ctx *ctx, const void *cchunk, wimlib_assert(lte->out_reshdr.uncompressed_size == lte->size); ctx->cur_write_stream_offset = 0; + + ret = done_with_stream(lte, ctx); + if (ret) + return ret; + list_del(<e->write_streams_list); + completed_stream_count++; } } - return do_write_streams_progress(&ctx->progress_data, lte, + return do_write_streams_progress(&ctx->progress_data, completed_size, completed_stream_count, false); -error: +write_error: ERROR_WITH_ERRNO("Write error"); return ret; } @@ -1110,31 +1136,31 @@ static int write_stream_end_read(struct wim_lookup_table_entry *lte, int status, void *_ctx) { struct write_streams_ctx *ctx = _ctx; - if (status) - goto out; - - wimlib_assert(ctx->cur_read_stream_offset == ctx->cur_read_stream_size); - if (unlikely(ctx->write_resource_flags & - WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE) && - ctx->stream_inode != NULL) - { - status = done_with_stream(lte, - ctx->progress_data.progfunc, - ctx->progress_data.progctx, - ctx->stream_inode); - } + wimlib_assert(ctx->cur_read_stream_offset == ctx->cur_read_stream_size || status); - if (!ctx->stream_was_duplicate && lte->unhashed && - ctx->lookup_table != NULL) - { + if (!lte->will_be_in_output_wim) { + /* The 'lte' stream was a duplicate. Now that its data has + * finished being read, it is being discarded in favor of the + * duplicate entry. It therefore is no longer needed, and we + * can fire the DONE_WITH_FILE callback because the file will + * not be read again. + * + * Note: we can't yet fire DONE_WITH_FILE for non-duplicate + * streams, since it needs to be possible to re-read the file if + * it does not compress to less than its original size. */ + if (!status) + status = done_with_stream(lte, ctx); + free_lookup_table_entry(lte); + } else if (!status && lte->unhashed && ctx->lookup_table != NULL) { + /* The 'lte' stream was not a duplicate and was previously + * unhashed. Since we passed COMPUTE_MISSING_STREAM_HASHES to + * read_stream_list(), lte->hash is now computed and valid. So + * turn this stream into a "hashed" stream. */ list_del(<e->unhashed_list); lookup_table_insert(ctx->lookup_table, lte); lte->unhashed = 0; } -out: - if (ctx->stream_was_duplicate) - free_lookup_table_entry(lte); return status; } @@ -1297,7 +1323,7 @@ write_raw_copy_resources(struct list_head *raw_copy_streams, return ret; lte->rspec->raw_copy_ok = 0; } - ret = do_write_streams_progress(progress_data, lte, lte->size, + ret = do_write_streams_progress(progress_data, lte->size, 1, false); if (ret) return ret; @@ -1349,6 +1375,25 @@ remove_zero_length_streams(struct list_head *stream_list) } } +static void +init_done_with_file_info(struct list_head *stream_list) +{ + struct wim_lookup_table_entry *lte; + + list_for_each_entry(lte, stream_list, write_streams_list) { + if (is_file_stream(lte)) { + lte->file_inode->num_remaining_streams = 0; + lte->may_send_done_with_file = 1; + } else { + lte->may_send_done_with_file = 0; + } + } + + list_for_each_entry(lte, stream_list, write_streams_list) + if (lte->may_send_done_with_file) + lte->file_inode->num_remaining_streams++; +} + /* * Write a list of streams to the output WIM file. * @@ -1484,6 +1529,11 @@ write_stream_list(struct list_head *stream_list, return 0; } + /* If needed, set auxiliary information so that we can detect when the + * library has finished using each external file. */ + if (unlikely(write_resource_flags & WRITE_RESOURCE_FLAG_SEND_DONE_WITH_FILE)) + init_done_with_file_info(stream_list); + memset(&ctx, 0, sizeof(ctx)); /* Pre-sorting the streams is required for compute_stream_list_stats(). @@ -2170,17 +2220,6 @@ write_wim_streams(WIMStruct *wim, int image, int write_flags, } } - /* If needed, set auxiliary information so that we can detect when - * wimlib has finished using each external file. */ - if (unlikely(write_flags & WIMLIB_WRITE_FLAG_SEND_DONE_WITH_FILE_MESSAGES)) { - list_for_each_entry(lte, stream_list, write_streams_list) - if (lte->unhashed) - lte->back_inode->num_unread_streams = 0; - list_for_each_entry(lte, stream_list, write_streams_list) - if (lte->unhashed) - lte->back_inode->num_unread_streams++; - } - return wim_write_stream_list(wim, stream_list, write_flags,